libva-1.6.2

git-svn-id: svn://kolibrios.org@6146 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Sergey Semyonov (Serge)
2016-02-05 22:00:38 +00:00
parent 84cfd5cf57
commit a08f61ddb9
1084 changed files with 430297 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
SUBDIRS = h264 mpeg2 render post_processing vme utils
EXTRA_DIST = gpp.py
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in

View File

@@ -0,0 +1,634 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = src/shaders
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/src/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
SOURCES =
DIST_SOURCES =
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
install-exec-recursive install-html-recursive \
install-info-recursive install-pdf-recursive \
install-ps-recursive install-recursive installcheck-recursive \
installdirs-recursive pdf-recursive ps-recursive \
tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
am__recursive_targets = \
$(RECURSIVE_TARGETS) \
$(RECURSIVE_CLEAN_TARGETS) \
$(am__extra_recursive_targets)
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
distdir
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DIST_SUBDIRS = $(SUBDIRS)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
sed_rest='s,^[^/]*/*,,'; \
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
sed_butlast='s,/*[^/]*$$,,'; \
while test -n "$$dir1"; do \
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
if test "$$first" != "."; then \
if test "$$first" = ".."; then \
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
else \
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
if test "$$first2" = "$$first"; then \
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
else \
dir2="../$$dir2"; \
fi; \
dir0="$$dir0"/"$$first"; \
fi; \
fi; \
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
done; \
reldir="$$dir2"
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DRM_CFLAGS = @DRM_CFLAGS@
DRM_LIBS = @DRM_LIBS@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGL_CFLAGS = @EGL_CFLAGS@
EGL_LIBS = @EGL_LIBS@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GEN4ASM = @GEN4ASM@
GEN4ASM_CFLAGS = @GEN4ASM_CFLAGS@
GEN4ASM_LIBS = @GEN4ASM_LIBS@
GIT = @GIT@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INTEL_DRIVER_LT_LDFLAGS = @INTEL_DRIVER_LT_LDFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBDRM_VERSION = @LIBDRM_VERSION@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIBVA_DEPS_CFLAGS = @LIBVA_DEPS_CFLAGS@
LIBVA_DEPS_LIBS = @LIBVA_DEPS_LIBS@
LIBVA_DRIVERS_PATH = @LIBVA_DRIVERS_PATH@
LIBVA_DRM_DEPS_CFLAGS = @LIBVA_DRM_DEPS_CFLAGS@
LIBVA_DRM_DEPS_LIBS = @LIBVA_DRM_DEPS_LIBS@
LIBVA_PACKAGE_VERSION = @LIBVA_PACKAGE_VERSION@
LIBVA_WAYLAND_DEPS_CFLAGS = @LIBVA_WAYLAND_DEPS_CFLAGS@
LIBVA_WAYLAND_DEPS_LIBS = @LIBVA_WAYLAND_DEPS_LIBS@
LIBVA_X11_DEPS_CFLAGS = @LIBVA_X11_DEPS_CFLAGS@
LIBVA_X11_DEPS_LIBS = @LIBVA_X11_DEPS_LIBS@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
PYTHON2 = @PYTHON2@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
WAYLAND_LIBS = @WAYLAND_LIBS@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
wayland_protocoldir = @wayland_protocoldir@
wayland_scanner = @wayland_scanner@
SUBDIRS = h264 mpeg2 render post_processing vme utils
EXTRA_DIST = gpp.py
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in
all: all-recursive
.SUFFIXES:
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/shaders/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu src/shaders/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
# To change the values of 'make' variables: instead of editing Makefiles,
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(am__recursive_targets):
@fail=; \
if $(am__make_keepgoing); then \
failcom='fail=yes'; \
else \
failcom='exit 1'; \
fi; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-recursive
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
$(am__make_dryrun) \
|| test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
dir1=$$subdir; dir2="$(top_distdir)"; \
$(am__relativize); \
new_top_distdir=$$reldir; \
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
($(am__cd) $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$new_top_distdir" \
distdir="$$new_distdir" \
am__remove_distdir=: \
am__skip_length_check=: \
am__skip_mode_fix=: \
distdir) \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-recursive
all-am: Makefile
installdirs: installdirs-recursive
installdirs-am:
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
clean: clean-recursive
clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-recursive
-rm -f Makefile
distclean-am: clean-am distclean-generic distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
html-am:
info: info-recursive
info-am:
install-data-am:
install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am:
install-html: install-html-recursive
install-html-am:
install-info: install-info-recursive
install-info-am:
install-man:
install-pdf: install-pdf-recursive
install-pdf-am:
install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am:
.MAKE: $(am__recursive_targets) install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
check-am clean clean-generic clean-libtool cscopelist-am ctags \
ctags-am distclean distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
installdirs-am maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
ps ps-am tags tags-am uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python
#coding=UTF-8
# Copyright © 2011 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Authors:
# Chen, Yangyang <yangyang.chen@intel.com>
# Han, Haofu <haofu.han@intel.com>
#
import sys
class Block:
def __init__(self, ln=0, s=None):
assert type(ln) == int
assert type(s) == str or s == None
self.lineno = ln
self.text = s
self.subblocks = []
def append(self, block):
self.subblocks.append(block)
def checkfor(self, line):
import re
p = r'\$\s*for\s*'
if re.match(p, line) == None:
raise Exception(self.__errmsg('syntax error'))
tail = line.split('(', 1)[1].rsplit(')', 1)
conds = tail[0].split(';')
lb = tail[1]
if lb.strip() != '{':
raise Exception(self.__errmsg('missing "{"'))
if len(conds) != 3:
raise Exception(self.__errmsg('syntax error(miss ";"?)'))
init = conds[0]
cond = conds[1]
step = conds[2]
self.__parse_init(init)
self.__parse_cond(cond)
self.__parse_step(step)
def __parse_init(self, init):
inits = init.split(',')
self.param_init = []
for ini in inits:
try:
val = eval(ini)
self.param_init.append(val)
except:
raise Exception(self.__errmsg('non an exp: %s'%ini))
self.param_num = len(inits)
def __parse_cond(self, cond):
cond = cond.strip()
if cond[0] in ['<', '>']:
if cond[1] == '=':
self.param_op = cond[:2]
limit = cond[2:]
else:
self.param_op = cond[0]
limit = cond[1:]
try:
self.param_limit = eval(limit)
except:
raise Exception(self.__errmsg('non an exp: %s'%limit))
else:
raise Exception(self.__errmsg('syntax error'))
def __parse_step(self, step):
steps = step.split(',')
if len(steps) != self.param_num:
raise Exception(self.__errmsg('params number no match'))
self.param_step = []
for st in steps:
try:
val = eval(st)
self.param_step.append(val)
except:
raise Exception(self.__errmsg('non an exp: %s'%st))
def __errmsg(self, msg=''):
return '%d: %s' % (self.lineno, msg)
def readlines(f):
lines = f.readlines()
buf = []
for line in lines:
if '\\n' in line:
tmp = line.split('\\n')
buf.extend(tmp)
else:
buf.append(line)
return buf
def parselines(lines):
root = Block(0)
stack = [root]
lineno = 0
for line in lines:
lineno += 1
line = line.strip()
if line.startswith('$'):
block = Block(lineno)
block.checkfor(line)
stack[-1].append(block)
stack.append(block)
elif line.startswith('}'):
stack.pop()
elif line and not line.startswith('#'):
stack[-1].append(Block(lineno, line))
return root
def writeblocks(outfile, blocks):
buf = []
def check_cond(op, cur, lim):
assert op in ['<', '>', '<=', '>=']
assert type(cur) == int
assert type(lim) == int
return eval('%d %s %d' % (cur, op, lim))
def do_writeblock(block, curs):
if block.text != None:
import re
p = r'\%(\d+)'
newline = block.text
params = set(re.findall(p, block.text))
for param in params:
index = int(param) - 1
if index >= len(curs):
raise Exception('%d: too many param(%%%d)'%(block.lineno, index+1))
newline = newline.replace('%%%d'%(index+1), str(curs[index]))
if newline and \
not newline.startswith('.') and \
not newline.endswith(':') and \
not newline.endswith(';'):
newline += ';'
buf.append(newline)
else:
for_curs = block.param_init
while check_cond(block.param_op, for_curs[0], block.param_limit):
for sblock in block.subblocks:
do_writeblock(sblock, for_curs)
for i in range(0, block.param_num):
for_curs[i] += block.param_step[i]
for block in blocks.subblocks:
do_writeblock(block, [])
outfile.write('\n'.join(buf))
outfile.write('\n')
if __name__ == '__main__':
argc = len(sys.argv)
if argc == 1:
print >>sys.stderr, 'no input file'
sys.exit(0)
try:
infile = open(sys.argv[1], 'r')
except IOError:
print >>sys.stderr, 'can not open %s' % sys.argv[1]
sys.exit(1)
if argc == 2:
outfile = sys.stdout
else:
try:
outfile = open(sys.argv[2], 'w')
except IOError:
print >>sys.stderr, 'can not write to %s' % sys.argv[2]
sys.exit(1)
lines = readlines(infile)
try:
infile.close()
except IOError:
pass
blocks = parselines(lines)
writeblocks(outfile, blocks)

View File

@@ -0,0 +1,4 @@
SUBDIRS = ildb mc
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in

View File

@@ -0,0 +1,633 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = src/shaders/h264
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/src/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
SOURCES =
DIST_SOURCES =
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
install-exec-recursive install-html-recursive \
install-info-recursive install-pdf-recursive \
install-ps-recursive install-recursive installcheck-recursive \
installdirs-recursive pdf-recursive ps-recursive \
tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
am__recursive_targets = \
$(RECURSIVE_TARGETS) \
$(RECURSIVE_CLEAN_TARGETS) \
$(am__extra_recursive_targets)
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
distdir
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DIST_SUBDIRS = $(SUBDIRS)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
sed_rest='s,^[^/]*/*,,'; \
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
sed_butlast='s,/*[^/]*$$,,'; \
while test -n "$$dir1"; do \
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
if test "$$first" != "."; then \
if test "$$first" = ".."; then \
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
else \
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
if test "$$first2" = "$$first"; then \
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
else \
dir2="../$$dir2"; \
fi; \
dir0="$$dir0"/"$$first"; \
fi; \
fi; \
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
done; \
reldir="$$dir2"
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DRM_CFLAGS = @DRM_CFLAGS@
DRM_LIBS = @DRM_LIBS@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGL_CFLAGS = @EGL_CFLAGS@
EGL_LIBS = @EGL_LIBS@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GEN4ASM = @GEN4ASM@
GEN4ASM_CFLAGS = @GEN4ASM_CFLAGS@
GEN4ASM_LIBS = @GEN4ASM_LIBS@
GIT = @GIT@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INTEL_DRIVER_LT_LDFLAGS = @INTEL_DRIVER_LT_LDFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBDRM_VERSION = @LIBDRM_VERSION@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIBVA_DEPS_CFLAGS = @LIBVA_DEPS_CFLAGS@
LIBVA_DEPS_LIBS = @LIBVA_DEPS_LIBS@
LIBVA_DRIVERS_PATH = @LIBVA_DRIVERS_PATH@
LIBVA_DRM_DEPS_CFLAGS = @LIBVA_DRM_DEPS_CFLAGS@
LIBVA_DRM_DEPS_LIBS = @LIBVA_DRM_DEPS_LIBS@
LIBVA_PACKAGE_VERSION = @LIBVA_PACKAGE_VERSION@
LIBVA_WAYLAND_DEPS_CFLAGS = @LIBVA_WAYLAND_DEPS_CFLAGS@
LIBVA_WAYLAND_DEPS_LIBS = @LIBVA_WAYLAND_DEPS_LIBS@
LIBVA_X11_DEPS_CFLAGS = @LIBVA_X11_DEPS_CFLAGS@
LIBVA_X11_DEPS_LIBS = @LIBVA_X11_DEPS_LIBS@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
PYTHON2 = @PYTHON2@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
WAYLAND_LIBS = @WAYLAND_LIBS@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
wayland_protocoldir = @wayland_protocoldir@
wayland_scanner = @wayland_scanner@
SUBDIRS = ildb mc
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in
all: all-recursive
.SUFFIXES:
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/shaders/h264/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu src/shaders/h264/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
# To change the values of 'make' variables: instead of editing Makefiles,
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(am__recursive_targets):
@fail=; \
if $(am__make_keepgoing); then \
failcom='fail=yes'; \
else \
failcom='exit 1'; \
fi; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-recursive
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
$(am__make_dryrun) \
|| test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
dir1=$$subdir; dir2="$(top_distdir)"; \
$(am__relativize); \
new_top_distdir=$$reldir; \
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
($(am__cd) $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$new_top_distdir" \
distdir="$$new_distdir" \
am__remove_distdir=: \
am__skip_length_check=: \
am__skip_mode_fix=: \
distdir) \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-recursive
all-am: Makefile
installdirs: installdirs-recursive
installdirs-am:
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
clean: clean-recursive
clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-recursive
-rm -f Makefile
distclean-am: clean-am distclean-generic distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
html-am:
info: info-recursive
info-am:
install-data-am:
install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am:
install-html: install-html-recursive
install-html-am:
install-info: install-info-recursive
install-info-am:
install-man:
install-pdf: install-pdf-recursive
install-pdf-am:
install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am:
.MAKE: $(am__recursive_targets) install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
check-am clean clean-generic clean-libtool cscopelist-am ctags \
ctags-am distclean distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
installdirs-am maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
ps ps-am tags tags-am uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@@ -0,0 +1,748 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#if !defined(__AVC_ILDB_HEADER__) // Make sure this file is only included once
#define __AVC_ILDB_HEADER__
// Module name: AVC_ILDB.inc
#undef ORIX
#undef ORIY
//========== Root thread input parameters ==================================================
#define RootParam r1 // :w
#define MBsCntX r1.0 // :w, MB count per row
#define MBsCntY r1.1 // :w, MB count per col
//#define PicType r1.2 // :w, Picture type
#define MaxThreads r1.3 // :w, Max Thread limit
#define EntrySignature r1.4 // :w, Debug flag
#define BitFields r1.5 // :uw
#define MbaffFlag BIT0 // :w, mbaff flag, bit 0 in BitFields
#define BotFieldFlag BIT1 // :w, bottom field flag, bit 1 in BitFields
#define CntlDataExpFlag BIT2 // :w, Control Data Expansion Flag, bit 2 in BitFields
#define RampConst r1.12 // 8 :ub, Ramp constant, r1.12 - r1.19:ub
#define StepToNextMB r1.20 // :b, 2 bytes
#define Minus2Minus1 r1.22 // :b, 2 bytes
// next one starts at r1.11:w
#define TopFieldFlag 0xFFFD // :w, top field flag, used to set bit1 to 0.
//========== Root Locals =============================================================
// Variables in root kernel for launching child therad
#define ChildParam r2.0 // :w
//Not used #define URBOffset r2.3 // :w, Each row occupies 4 URB entries. All children in the same row use the same set of URB entries
#define CurCol r2.10 // :w, current col
#define CurColB r2.20 // :b, current col
#define CurRow r2.11 // :w, current row
#define CurRowB r2.22 // :b, current row
#define LastCol r2.12 // :w, last col
#define LastRow r2.13 // :w, last row
// Root local constants during spawning process
#define Col_Boundary r3.0 // :w,
#define Row_Boundary r3.1 // :w,
//#define TotalBlocks r3.2 // :w, Total blocks in the frame
#define URB_EntriesPerMB_2 r3.3 // :w, = URB entries per MB, but in differnt form
#define URBOffsetUVBase r3.4 // :w, UV Base offset in URB
#define Temp1_D r3.6 // :d:
#define Temp1_W r3.12 // :w, Temp1
#define Temp1_B r3.24 // :b, = Temp1_W
#define Temp2_W r3.13 // :w, Temp2
#define Temp2_B r3.26 // :b, = Temp2_W
// Root local variables
#define JumpTable r4 // :d, jump table
#define JUMPTABLE_BASE 4*32
#define JumpAddr a0.7
#define TopRowForScan r5.0 // :w, track the top row for scan. All rows above this row is deblocked already.
// Child Thread R0 Header Field
#define MRF0 m0
#define CT_R0Hdr m1
/*
.declare GatewayAperture Base=r50.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
#define GatewayApertureB 1600 // r50 byte offset from r0.0
// Chroma root thread updates luma root's ThreadLimit at r10.0:w via gateway
#define ThreadLimit r62.0 // :w, thread limit //r56.0
#define THREAD_LIMIT_OFFSET 0x01800000 // Offset from r50 to r56 = 12*32 = 384 = 0x0180. 0x180 << 16 = 0x01800000
//#define THREAD_LIMIT_OFFSET 0x00C00000 // Offset from r50 to r56 = 6*32 = 192 = 0x00C0. 0xC0 << 16 = 0x00C00000
*/
// Gateway size is 16 GRF. 68 rows of MBs takes 9 GRFs (r6 - r14)
// For CTG: Expended to support 1280 rows of pixel (80 rows of MBs). It requires 10 GRFs (r6 - r15)
.declare GatewayAperture Base=r6.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
#define GatewayApertureB 192 // r0.0 byte offset from r0.0
// Chroma root thread updates luma root's ThreadLimit at r10.0:w via gateway
#define ThreadLimit r18.0 // :w, thread limit
#define THREAD_LIMIT_OFFSET 0x01800000 // Offset from r50 to r56 = 12*32 = 384 = 0x0180. 0x180 << 16 = 0x01800000
#define TotalBlocks r18.1 // :w, Total blocks in the frame
// Root local variables
#define ChildThreadsID r19.0 // :w, Child thread ID, unique to each child
#define OutstandingThreads r20.0 // :w, Outstanding threads
#define ProcessedMBs r20.1 // :w, # of MBs processed
#define URBOffset r21.0 // :w, Each row occupies 4 URB entries. All children in the same row use the same set of URB entries
//=================================================================================
#define ScoreBd_Size 128 //96 // size of Status[] or ProcCol[]
#define ScoreBd_Idx 2
//#define Saved_Col 0
#define StatusAddr a0.4 // :w, point to r50
//=================================================================================
// Gateway payload
#define GatewayPayload r48.0 // :ud
#define GatewayPayloadKey r48.8 // :uw
#define DispatchID r48.20 // :ub
#define RegBase_GatewaySize r48.5 // :ud, used in open a gateway
#define Offset_Length r48.5 // :ud, used in forwardmsg back to root
#define EUID_TID r48.9 // :uw, used in forwardmsg back to root
// Gateway response
#define GatewayResponse r49.0 // :ud, one GRF
#define URBWriteMsgDesc a0.0 // Used in URB write, :ud
#define URBWriteMsgDescLow a0.0 // Used in URB write, :uw
#define URBWriteMsgDescHigh a0.1 // Used in URB write, :uw
.declare WritebackResponse Base=r50 ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 1 GRF for write backs
/////////////////////////////////////////////////////////////////////////////////////////////
// IDesc Order Offset
//
// 0) luma root 0 from luma root
// 1) luma child 16 from luma root
// 2) chroma root 32 from luma root
// 3) chroma child 16 from chroma root
// 4) luma field root 0 from luma field root
// 5) luma field child 16 from luma field root
// 6) chroma field root 32 from luma field root
// 7) chroma field child 16 from chroma field root
// 8) luma Mbaff root 0 from luma Mbaff root
// 9) luma Mbaff child 16 from luma Mbaff root
// 10) chroma Mbaff root 32 from luma Mbaff root
// 11) chroma Mbaff child 16 from chroma Mbaff root
// IDesc offset within non-mbaff or mbaff mode
#define CHROMA_ROOT_OFFSET 32 // Offset from luma root to chroma root
#define CHILD_OFFSET 16 // Offset from luma root to luma child,
// and from chroma root to chroma child
/////////////////////////////////////////////////////////////////////////////////////////////
//========== End of Root Variables ======================================================
//========== Child thread input parameters ==============================================
//#define MBsCntX r1.0 // :w, MB count per row (same as root)
//#define MBsCntY r1.1 // :w, MB count per col (same as root)
//#define PicTypeC r1.2 // :w, Picture type same as root thread (same as root)
#define URBOffsetC r1.3 // :w,
#define EntrySignatureC r1.4 // :w, Debug field (same as root)
//#define BitFields r1.5 // :w (same as root)
//#define MbaffFlag BIT0 // :w, mbaff flag, bit 0 in BitFields
//#define BotFieldFlag BIT1 // :w, bottom field flag, bit 1 in BitFields
//#define CntlDataExpFlag BIT2 // :w, Control Data Expansion Flag, bit 2 in BitFields
#define RampConstC r1.12 // 8 :ub, Ramp constant, r1.12 - r1.19:ub.
#define ORIX r1.10 // :w, carry over from root r1 in MB count
#define ORIY r1.11 // :w, carry over from root r1 in MB count
#define LastColC r1.12 // :w, last col
#define LastRowC r1.13 // :w, last row
.declare GatewayApertureC Base=r1.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
#define GatewayApertureCBase 32 // r1 byte offset from r0.0
//========== Child Variables ============================================================
// Mbaff Alpha, Beta, Tc0 vectors for an edge
.declare Mbaff_ALPHA Base=r14.0 ElementSize=2 SrcRegion=REGION(8,1) Type=uw // r14
.declare Mbaff_BETA Base=r15.0 ElementSize=2 SrcRegion=REGION(8,1) Type=uw // r15
.declare Mbaff_TC0 Base=r16.0 ElementSize=2 SrcRegion=REGION(8,1) Type=uw // r16
.declare RRampW Base=r17.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w // r17
.declare Mbaff_ALPHA2 Base=r45.0 ElementSize=2 SrcRegion=REGION(8,1) Type=uw // alpha2 = (alpha >> 2) + 2
#define ORIX_CUR r46.0 // :w, current block origin X in bytes
#define ORIY_CUR r46.1 // :w, current block origin Y in bytes
#define ORIX_LEFT r46.2 // :w, left block origin X in bytes
#define ORIY_LEFT r46.3 // :w, left block origin Y in bytes
#define ORIX_TOP r46.4 // :w, top block origin X in bytes
#define ORIY_TOP r46.5 // :w, top block origin Y in bytes
//#define FilterSampleFlag r46.6 // :uw,
#define CTemp0_W r46.7 // :w, child Temp0
#define alpha r46.8 // :w, Scaler version for non Mbaff
#define beta r46.9 // :w, Scaler version for non Mbaff
#define tc0 r46.20 // 4 :ub, r46.20 ~ r46.23, Scaler version for non Mbaff
#define MaskA r46.12 // :uw
#define MaskB r46.13 // :uw
// Child control flags
#define DualFieldMode r47.0 // Cur MB is frame based, above MB is field based in mbaff mode
// :uw, 0 = not in dual field mode, 1 = in dual field mode, filter both top and bot fields
#define GateWayOffsetC r47.1 // :w, Gateway offset for child writing into root space
#define CntrlDataOffsetY r47.1 // :ud, MB control data data offset
#define alpha2 r47.4 // :uw, alpha2 = (alpha >> 2) + 2
#define VertEdgePattern r47.5 // :uw,
#define CTemp1_W r47.6 // :w, child Temp1
#define CTemp1_B r47.12 // :b, = child Temp1_W
#define CTemp2_W r47.7 // :w, child Temp2
#define CTemp2_B r47.14 // :b, = child Temp2_W
// Used in child
#define ECM_AddrReg a0.4 // Edge Control Map register
#define P_AddrReg a0.6 // point to P samples in left or top MB
#define Q_AddrReg a0.7 // point to Q samples in cur MB
.declare RTempD Base=r26.0 ElementSize=4 SrcRegion=REGION(8,1) Type=d // r26-27
.declare RTempB Base=r26.0 ElementSize=1 SrcRegion=REGION(8,4) Type=ub // r26-27
.declare RTempW Base=r26.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w // r26-27
#define LEFT_TEMP_D RTempD
#define LEFT_TEMP_B RTempB
#define LEFT_TEMP_W RTempW
.declare TempRow0 Base=r26.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare TempRow0B Base=r26.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
.declare TempRow1 Base=r27.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare TempRow1B Base=r27.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
.declare CUR_TEMP_D Base=r28.0 ElementSize=4 SrcRegion=REGION(8,1) Type=d // 8 GRFs
.declare CUR_TEMP_B Base=r28.0 ElementSize=1 SrcRegion=REGION(8,4) Type=ub
.declare CUR_TEMP_W Base=r28.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
#define FilterSampleFlag r28.0 // :uw,
.declare A Base=r28.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w
.declare BB Base=r29.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w
.declare TempRow3 Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare TempRow3B Base=r30.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
.declare tc0_exp Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare tc8 Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare tc_exp Base=r31.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare tx_exp_8 Base=r31.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare q0_p0 Base=r32.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare ABS_q0_p0 Base=r33.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare ap Base=r34.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare aq Base=r35.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
// These buffers have the src data for each edge to be beblocked.
// They have modified pixels from previous edges.
//
// Y:
// +----+----+----+----+----+----+----+----+
// | p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |
// +----+----+----+----+----+----+----+----+
//
// p3 = r[P_AddrReg, 0]<16;16,1>
// p2 = r[P_AddrReg, 16]<16;16,1>
// p1 = r[P_AddrReg, 32]<16;16,1>
// p0 = r[P_AddrReg, 48]<16;16,1>
// q0 = r[Q_AddrReg, 0]<16;16,1>
// q1 = r[Q_AddrReg, 16]<16;16,1>
// q2 = r[Q_AddrReg, 32]<16;16,1>
// q3 = r[Q_AddrReg, 48]<16;16,1>
.declare p0123_W Base=r36.0 ElementSize=2 SrcRegion=REGION(16,1) Type=uw // r36, r37
.declare q0123_W Base=r38.0 ElementSize=2 SrcRegion=REGION(16,1) Type=uw // r38, r39
.declare p3 Base=r36.0 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare p2 Base=r36.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare p1 Base=r37.0 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare p0 Base=r37.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare q0 Base=r38.0 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare q1 Base=r38.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare q2 Base=r39.0 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare q3 Base=r39.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare TempRow2 Base=r38.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
// Temp space for mbaff dual field mode
#define ABOVE_CUR_MB_BASE 40*GRFWIB // Byte offset to r40
.declare ABOVE_CUR_MB_YW Base=r40 ElementSize=2 SrcRegion=REGION(8,1) Type=uw
.declare ABOVE_CUR_MB_UW Base=r40 ElementSize=2 SrcRegion=REGION(8,1) Type=uw
.declare P0_plus_P1 Base=r41.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare Q0_plus_Q1 Base=r42.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare P2_plus_P3 Base=r43.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare Q2_plus_Q3 Base=r44.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
//////////////////////////////////////////////////////////////////////////////////////////
// MB control data reference
// Expanded control data is in r18 - r25
.declare CNTRL_DATA_D Base=r18 ElementSize=4 SrcRegion=REGION(8,1) Type=ud // For read, 8 GRFs
#define CNTRL_DATA_BASE 18*GRFWIB // Base offset to r18
// Bit mask for extracting bits
#define MbaffFrameFlag 0x01
#define FieldModeCurrentMbFlag 0x02
#define FieldModeLeftMbFlag 0x04
#define FieldModeAboveMbFlag 0x08
#define FilterInternal8x8EdgesFlag 0x10
#define FilterInternal4x4EdgesFlag 0x20
#define FilterLeftMbEdgeFlag 0x40
#define FilterTopMbEdgeFlag 0x80
#define DISABLE_ILDB_FLAG 0x01
// Exact bit pattern for left and cur MB coding mode (frame vs. field)
#define LEFT_FRAME_CUR_FRAME 0x00
#define LEFT_FRAME_CUR_FIELD 0x02
#define LEFT_FIELD_CUR_FRAME 0x04
#define LEFT_FIELD_CUR_FIELD 0x06
// Exact bit pattern for above and cur MB coding mode (frame vs. field)
#define ABOVE_FRAME_CUR_FRAME 0x00
#define ABOVE_FRAME_CUR_FIELD 0x02
#define ABOVE_FIELD_CUR_FRAME 0x08
#define ABOVE_FIELD_CUR_FIELD 0x0A
//========== MB control data field offset in byte ==========
#if !defined(_APPLE)
// GRF0 - GRF1 holds original control data
// GRF0
#define HorizOrigin 0
#define VertOrigin 1
#define BitFlags 2 // Bit flags
#define bbSinternalLeftVert 4 // Internal left vertical bS, 2 bits per bS for 4 Y pixels and 2 U/V pixels
#define bbSinternalMidVert 5 // Internal mid vertical bS
#define bbSinternalRightVert 6 // Internal right vertical bS
#define bbSinternalTopHorz 7 // Internal top horizontal bS
#define bbSinternalMidHorz 8 // Internal mid horizontal bS
#define bbSinternalBotHorz 9 // Internal bottom horizontal bS
#define wbSLeft0 10 // External left vertical bS (0), 4 bits per bS for 4 Y pixels and 2 U/V pixels, and byte 11
#define wbSLeft1 12 // External left vertical bS (1), and byte 13
#define wbSTop0 14 // External top horizontal bS (0), and byte 15
#define wbSTop1 16 // Externaltop horizontal bS (1), and byte 17
#define bIndexAinternal_Y 18 // Internal index A for Y
#define bIndexBinternal_Y 19 // Internal index B for Y
#define bIndexAleft0_Y 20 // Left index A for Y (0)
#define bIndexBleft0_Y 21 // Left index B for Y (0)
#define bIndexAleft1_Y 22 // Left index A for Y (1)
#define bIndexBleft1_Y 23 // Left index B for Y (1)
#define bIndexAtop0_Y 24 // Top index A for Y (0)
#define bIndexBtop0_Y 25 // Top index B for Y (0)
#define bIndexAtop1_Y 26 // Top index A for Y (1)
#define bIndexBtop1_Y 27 // Top index B for Y (1)
#define bIndexAinternal_Cb 28 // Internal index A for Cb
#define bIndexBinternal_Cb 29 // Internal index B for Cb
#define bIndexAleft0_Cb 30 // Left index A for Cb (0)
#define bIndexBleft0_Cb 31 // Left index B for Cb (0)
// GRF1
#define bIndexAleft1_Cb 32 // Left index A for Cb (1)
#define bIndexBleft1_Cb 33 // Left index B for Cb (1)
#define bIndexAtop0_Cb 34 // Top index A for Cb (0)
#define bIndexBtop0_Cb 35 // Top index B for Cb (0)
#define bIndexAtop1_Cb 36 // Top index A for Cb (1)
#define bIndexBtop1_Cb 37 // Top index B for Cb (1)
#define bIndexAinternal_Cr 38 // Internal index A for Cr
#define bIndexBinternal_Cr 39 // Internal index B for Cr
#define bIndexAleft0_Cr 40 // Left index A for Cr (0)
#define bIndexBleft0_Cr 41 // Left index B for Cr (0)
#define bIndexAleft1_Cr 42 // Left index A for Cr (1)
#define bIndexBleft1_Cr 43 // Left index B for Cr (1)
#define bIndexAtop0_Cr 44 // Top index A for Cr (0)
#define bIndexBtop0_Cr 45 // Top index B for Cr (0)
#define bIndexAtop1_Cr 46 // Top index A for Cr (1)
#define bIndexBtop1_Cr 47 // Top index B for Cr (1)
#define ExtBitFlags 48 // Extended bit flags, such as disable ILDB bits
// Offset 49 - 63 not used
//===== GRF2 - GRF7 hold expanded control data =====
// GRF2
#define wEdgeCntlMap_IntLeftVert 64 // Derived from bbSinternalLeftVert, 1 bit per pixel
#define wEdgeCntlMap_IntMidVert 66 // Derived from bbSinternalLeftVert
#define wEdgeCntlMap_IntRightVert 68 // Derived from bbSinternalRightVert
#define wEdgeCntlMap_IntTopHorz 70 // Derived from bbSinternalTopHorz, 1bit per pixel
#define wEdgeCntlMap_IntMidHorz 72 // Derived from bbSinternalMidHorz
#define wEdgeCntlMap_IntBotHorz 74 // Derived from bbSinternalBotHorz
// Offset 76 - 79 not used
#define wEdgeCntlMapA_ExtLeftVert0 80 // Derived from wbSLeft0, 1bit per pixel
#define wEdgeCntlMapB_ExtLeftVert0 82 // Derived from wbSLeft0
#define wEdgeCntlMapA_ExtTopHorz0 84 // Derived from wbSTop0, 1bit per pixel
#define wEdgeCntlMapB_ExtTopHorz0 86 // Derived from wbSTop0
#define wEdgeCntlMapA_ExtLeftVert1 88 // Derived from wbSLeft1, 1bit per pixel
#define wEdgeCntlMapB_ExtLeftVert1 90 // Derived from wbSLeft1
#define wEdgeCntlMapA_ExtTopHorz1 92 // Derived from wbSTop1, 1bit per pixel
#define wEdgeCntlMapB_ExtTopHorz1 94 // Derived from wbSTop1
// GRF3
#define bTc0_v00_0_Y 96 // Derived from bSv00_0 and bIndexAleft0_Y, 4 pixels per tc0
#define bTc0_v10_0_Y 97 // Derived from bSv10_0 and bIndexAleft0_Y
#define bTc0_v20_0_Y 98 // Derived from bSv20_0 and bIndexAleft0_Y
#define bTc0_v30_0_Y 99 // Derived from bSv30_0 and bIndexAleft0_Y
#define bTc0_v01_Y 100 // Derived from bSv01 and bIndexAinternal_Y
#define bTc0_v11_Y 101 // Derived from bSv11 and bIndexAinternal_Y
#define bTc0_v21_Y 102 // Derived from bSv21 and bIndexAinternal_Y
#define bTc0_v31_Y 103 // Derived from bSv31 and bIndexAinternal_Y
#define bTc0_v02_Y 104 // Derived from bSv02 and bIndexAinternal_Y
#define bTc0_v12_Y 105 // Derived from bSv12 and bIndexAinternal_Y
#define bTc0_v22_Y 106 // Derived from bSv22 and bIndexAinternal_Y
#define bTc0_v32_Y 107 // Derived from bSv32 and bIndexAinternal_Y
#define bTc0_v03_Y 108 // Derived from bSv03 and bIndexAinternal_Y
#define bTc0_v13_Y 109 // Derived from bSv13 and bIndexAinternal_Y
#define bTc0_v23_Y 110 // Derived from bSv23 and bIndexAinternal_Y
#define bTc0_v33_Y 111 // Derived from bSv33 and bIndexAinternal_Y
#define bTc0_h00_0_Y 112 // Derived from bSh00_0 and bIndexAleft0_Y
#define bTc0_h01_0_Y 113 // Derived from bSh01_0 and bIndexAleft0_Y
#define bTc0_h02_0_Y 114 // Derived from bSh02_0 and bIndexAleft0_Y
#define bTc0_h03_0_Y 115 // Derived from bSh03_0 and bIndexAleft0_Y
#define bTc0_h10_Y 116 // Derived from bSh10 and bIndexAinternal_Y
#define bTc0_h11_Y 117 // Derived from bSh11 and bIndexAinternal_Y
#define bTc0_h12_Y 118 // Derived from bSh12 and bIndexAinternal_Y
#define bTc0_h13_Y 119 // Derived from bSh13 and bIndexAinternal_Y
#define bTc0_h20_Y 120 // Derived from bSh20 and bIndexAinternal_Y
#define bTc0_h21_Y 121 // Derived from bSh21 and bIndexAinternal_Y
#define bTc0_h22_Y 122 // Derived from bSh22 and bIndexAinternal_Y
#define bTc0_h23_Y 123 // Derived from bSh23 and bIndexAinternal_Y
#define bTc0_h30_Y 124 // Derived from bSh30 and bIndexAinternal_Y
#define bTc0_h31_Y 125 // Derived from bSh31 and bIndexAinternal_Y
#define bTc0_h32_Y 126 // Derived from bSh32 and bIndexAinternal_Y
#define bTc0_h33_Y 127 // Derived from bSh33 and bIndexAinternal_Y
// GRF4
#define bAlphaLeft0_Y 128 // Derived from bIndexAleft0_Y
#define bBetaLeft0_Y 129 // Derived from bIndexBleft0_Y
#define bAlphaTop0_Y 130 // Derived from bIndexAtop0_Y
#define bBetaTop0_Y 131 // Derived from bIndexBtop0_Y
#define bAlphaInternal_Y 132 // Derived from bIndexAinternal_Y
#define bBetaInternal_Y 133 // Derived from bIndexBinternal_Y
// Offset 134 - 135 not used
// Offset 136 - 143 not used
#define bAlphaLeft1_Y 144 // Derived from bIndexAleft1_Y Used in Mbaff mode only
#define bBetaLeft1_Y 145 // Derived from bIndexBleft1_Y Used in Mbaff mode only
#define bAlphaTop1_Y 146 // Derived from bIndexAtop1_Y Used in Mbaff mode only
#define bBetaTop1_Y 147 // Derived from bIndexBtop1_Y Used in Mbaff mode only
// Offset 148 - 151 not used
#define bTc0_v00_1_Y 152 // Derived from bSv00_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_v10_1_Y 153 // Derived from bSv10_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_v20_1_Y 154 // Derived from bSv20_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_v30_1_Y 155 // Derived from bSv30_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_h00_1_Y 156 // Derived from bSh00_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_h01_1_Y 157 // Derived from bSh01_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_h02_1_Y 158 // Derived from bSh02_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_h03_1_Y 159 // Derived from bSh03_1 and bIndexAleft1_Y Used in Mbaff mode only
// GRF5
#define bTc0_v00_0_Cb 160 // Derived from bSv00_0 and bIndexAleft0_Cb, 2 pixels per tc0 Left0
#define bTc0_v10_0_Cb 161 // Derived from bSv10_0 and bIndexAleft0_Cb
#define bTc0_v20_0_Cb 162 // Derived from bSv20_0 and bIndexAleft0_Cb
#define bTc0_v30_0_Cb 163 // Derived from bSv30_0 and bIndexAleft0_Cb
#define bTc0_v02_Cb 164 // Derived from bSv02 and bIndexAinternal_Cb MidVert
#define bTc0_v12_Cb 165 // Derived from bSv12 and bIndexAinternal_Cb
#define bTc0_v22_Cb 166 // Derived from bSv22 and bIndexAinternal_Cb
#define bTc0_v32_Cb 167 // Derived from bSv32 and bIndexAinternal_Cb
#define bTc0_h00_0_Cb 168 // Derived from bSh00_0 and bIndexAleft0_Cb Top0
#define bTc0_h01_0_Cb 169 // Derived from bSh01_0 and bIndexAleft0_Cb
#define bTc0_h02_0_Cb 170 // Derived from bSh02_0 and bIndexAleft0_Cb
#define bTc0_h03_0_Cb 171 // Derived from bSh03_0 and bIndexAleft0_Cb
#define bTc0_h20_Cb 172 // Derived from bSh20 and bIndexAinternal_Cb MidHorz
#define bTc0_h21_Cb 173 // Derived from bSh21 and bIndexAinternal_Cb
#define bTc0_h22_Cb 174 // Derived from bSh22 and bIndexAinternal_Cb
#define bTc0_h23_Cb 175 // Derived from bSh23 and bIndexAinternal_Cb
#define bTc0_v00_0_Cr 176 // Derived from bSv00_0 and bIndexAleft0_Cr, 2 pixels per tc0 Left0
#define bTc0_v10_0_Cr 177 // Derived from bSv10_0 and bIndexAleft0_Cr
#define bTc0_v20_0_Cr 178 // Derived from bSv20_0 and bIndexAleft0_Cr
#define bTc0_v30_0_Cr 179 // Derived from bSv30_0 and bIndexAleft0_Cr
#define bTc0_v02_Cr 180 // Derived from bSv02 and bIndexAinternal_Cr Mid Vert
#define bTc0_v12_Cr 181 // Derived from bSv12 and bIndexAinternal_Cr
#define bTc0_v22_Cr 182 // Derived from bSv22 and bIndexAinternal_Cr
#define bTc0_v32_Cr 183 // Derived from bSv32 and bIndexAinternal_Cr
#define bTc0_h00_0_Cr 184 // Derived from bSh00_0 and bIndexAleft0_Cr, 2 pixels per tc0 Top0
#define bTc0_h01_0_Cr 185 // Derived from bSh01_0 and bIndexAleft0_Cr
#define bTc0_h02_0_Cr 186 // Derived from bSh02_0 and bIndexAleft0_Cr
#define bTc0_h03_0_Cr 187 // Derived from bSh03_0 and bIndexAleft0_Cr
#define bTc0_h20_Cr 188 // Derived from bSh20 and bIndexAinternal_Cr Mid Horz
#define bTc0_h21_Cr 189 // Derived from bSh21 and bIndexAinternal_Cr
#define bTc0_h22_Cr 190 // Derived from bSh22 and bIndexAinternal_Cr
#define bTc0_h23_Cr 191 // Derived from bSh23 and bIndexAinternal_Cr
// GRF6
#define bAlphaLeft0_Cb 192 // Derived from bIndexAleft0_Cb
#define bBetaLeft0_Cb 193 // Derived from bIndexBleft0_Cb
#define bAlphaTop0_Cb 194 // Derived from bIndexAtop0_Cb
#define bBetaTop0_Cb 195 // Derived from bIndexBtop0_Cb
#define bAlphaInternal_Cb 196 // Derived from bIndexAinternal_Cb
#define bBetaInternal_Cb 197 // Derived from bIndexBinternal_Cb
// Offset 198 - 199 not used
#define bAlphaLeft0_Cr 200 // Derived from bIndexAleft0_Cr
#define bBetaLeft0_Cr 201 // Derived from bIndexBleft0_Cr
#define bAlphaTop0_Cr 202 // Derived from bIndexAtop0_Cr
#define bBetaTop0_Cr 203 // Derived from bIndexBtop0_Cr
#define bAlphaInternal_Cr 204 // Derived from bIndexAinternal_Cr
#define bBetaInternal_Cr 205 // Derived from bIndexBinternal_Cr
// Offset 206 - 223 not used
// GRF7
#define bAlphaLeft1_Cb 224 // Derived from bIndexAleft1_Cb Used in Mbaff mode only
#define bBetaLeft1_Cb 225 // Derived from bIndexBleft1_Cb Used in Mbaff mode only
#define bAlphaTop1_Cb 226 // Derived from bIndexAtop1_Cb Used in Mbaff mode only
#define bBetaTop1_Cb 227 // Derived from bIndexBtop1_Cb Used in Mbaff mode only
// Offset 228 - 231 not used
#define bTc0_v00_1_Cb 232 // Derived from bSv00_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_v10_1_Cb 233 // Derived from bSv10_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_v20_1_Cb 234 // Derived from bSv20_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_v30_1_Cb 235 // Derived from bSv30_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_h00_1_Cb 236 // Derived from bSh00_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_h01_1_Cb 237 // Derived from bSh01_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_h02_1_Cb 238 // Derived from bSh02_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_h03_1_Cb 239 // Derived from bSh03_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bAlphaLeft1_Cr 240 // Derived from bIndexAleft1_Cr Used in Mbaff mode only
#define bBetaLeft1_Cr 241 // Derived from bIndexBleft1_Cr Used in Mbaff mode only
#define bAlphaTop1_Cr 242 // Derived from bIndexAtop1_Cr Used in Mbaff mode only
#define bBetaTop1_Cr 243 // Derived from bIndexBtop1_Cr Used in Mbaff mode only
// Offset 244 - 247 not used
#define bTc0_v00_1_Cr 248 // Derived from bSv00_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_v10_1_Cr 249 // Derived from bSv10_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_v20_1_Cr 250 // Derived from bSv20_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_v30_1_Cr 251 // Derived from bSv30_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_h00_1_Cr 252 // Derived from bSh00_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_h01_1_Cr 253 // Derived from bSh01_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_h02_1_Cr 254 // Derived from bSh02_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_h03_1_Cr 255 // Derived from bSh03_1 and bIndexAleft1_Cr Used in Mbaff mode only
#else // _APPLE is defined
//******** Crestline for Apple, progressive only, 88 bytes **********
// GRF0
#define HorizOrigin 0
#define VertOrigin 1
#define BitFlags 2 // Bit flags
#define wEdgeCntlMap_IntLeftVert 4 // Derived from bbSinternalLeftVert, 1 bit per pixel
#define wEdgeCntlMap_IntMidVert 6 // Derived from bbSinternalLeftVert
#define wEdgeCntlMap_IntRightVert 8 // Derived from bbSinternalRightVert
#define wEdgeCntlMap_IntTopHorz 10 // Derived from bbSinternalTopHorz, 1bit per pixel
#define wEdgeCntlMap_IntMidHorz 12 // Derived from bbSinternalMidHorz
#define wEdgeCntlMap_IntBotHorz 14 // Derived from bbSinternalBotHorz
#define wEdgeCntlMapA_ExtLeftVert0 16 // Derived from wbSLeft0, 1bit per pixel
#define wEdgeCntlMapB_ExtLeftVert0 18 // Derived from wbSLeft0
#define wEdgeCntlMapA_ExtTopHorz0 20 // Derived from wbSTop0, 1bit per pixel
#define wEdgeCntlMapB_ExtTopHorz0 22 // Derived from wbSTop0
#define bAlphaLeft0_Y 24 // Derived from bIndexAleft0_Y
#define bBetaLeft0_Y 25 // Derived from bIndexBleft0_Y
#define bAlphaTop0_Y 26 // Derived from bIndexAtop0_Y
#define bBetaTop0_Y 27 // Derived from bIndexBtop0_Y
#define bAlphaInternal_Y 28 // Derived from bIndexAinternal_Y
#define bBetaInternal_Y 29 // Derived from bIndexBinternal_Y
// GRF1
#define bTc0_v00_0_Y 32 // Derived from bSv00_0 and bIndexAleft0_Y, 4 pixels per tc0
#define bTc0_v10_0_Y 33 // Derived from bSv10_0 and bIndexAleft0_Y
#define bTc0_v20_0_Y 34 // Derived from bSv20_0 and bIndexAleft0_Y
#define bTc0_v30_0_Y 35 // Derived from bSv30_0 and bIndexAleft0_Y
#define bTc0_v01_Y 36 // Derived from bSv01 and bIndexAinternal_Y
#define bTc0_v11_Y 37 // Derived from bSv11 and bIndexAinternal_Y
#define bTc0_v21_Y 38 // Derived from bSv21 and bIndexAinternal_Y
#define bTc0_v31_Y 39 // Derived from bSv31 and bIndexAinternal_Y
#define bTc0_v02_Y 40 // Derived from bSv02 and bIndexAinternal_Y
#define bTc0_v12_Y 41 // Derived from bSv12 and bIndexAinternal_Y
#define bTc0_v22_Y 42 // Derived from bSv22 and bIndexAinternal_Y
#define bTc0_v32_Y 43 // Derived from bSv32 and bIndexAinternal_Y
#define bTc0_v03_Y 44 // Derived from bSv03 and bIndexAinternal_Y
#define bTc0_v13_Y 45 // Derived from bSv13 and bIndexAinternal_Y
#define bTc0_v23_Y 46 // Derived from bSv23 and bIndexAinternal_Y
#define bTc0_v33_Y 47 // Derived from bSv33 and bIndexAinternal_Y
#define bTc0_h00_0_Y 48 // Derived from bSh00_0 and bIndexAleft0_Y
#define bTc0_h01_0_Y 49 // Derived from bSh01_0 and bIndexAleft0_Y
#define bTc0_h02_0_Y 50 // Derived from bSh02_0 and bIndexAleft0_Y
#define bTc0_h03_0_Y 51 // Derived from bSh03_0 and bIndexAleft0_Y
#define bTc0_h10_Y 52 // Derived from bSh10 and bIndexAinternal_Y
#define bTc0_h11_Y 53 // Derived from bSh11 and bIndexAinternal_Y
#define bTc0_h12_Y 54 // Derived from bSh12 and bIndexAinternal_Y
#define bTc0_h13_Y 55 // Derived from bSh13 and bIndexAinternal_Y
#define bTc0_h20_Y 56 // Derived from bSh20 and bIndexAinternal_Y
#define bTc0_h21_Y 57 // Derived from bSh21 and bIndexAinternal_Y
#define bTc0_h22_Y 58 // Derived from bSh22 and bIndexAinternal_Y
#define bTc0_h23_Y 59 // Derived from bSh23 and bIndexAinternal_Y
#define bTc0_h30_Y 60 // Derived from bSh30 and bIndexAinternal_Y
#define bTc0_h31_Y 61 // Derived from bSh31 and bIndexAinternal_Y
#define bTc0_h32_Y 62 // Derived from bSh32 and bIndexAinternal_Y
#define bTc0_h33_Y 63 // Derived from bSh33 and bIndexAinternal_Y
// GRF2,
#define bTc0_v00_0_Cb 64 // Derived from bSv00_0 and bIndexAleft0_Cb, 2 pixels per tc0 Left0
#define bTc0_v10_0_Cb 65 // Derived from bSv10_0 and bIndexAleft0_Cb
#define bTc0_v20_0_Cb 66 // Derived from bSv20_0 and bIndexAleft0_Cb
#define bTc0_v30_0_Cb 67 // Derived from bSv30_0 and bIndexAleft0_Cb
#define bTc0_v02_Cb 68 // Derived from bSv02 and bIndexAinternal_Cb MidVert
#define bTc0_v12_Cb 69 // Derived from bSv12 and bIndexAinternal_Cb
#define bTc0_v22_Cb 70 // Derived from bSv22 and bIndexAinternal_Cb
#define bTc0_v32_Cb 71 // Derived from bSv32 and bIndexAinternal_Cb
#define bTc0_h00_0_Cb 72 // Derived from bSh00_0 and bIndexAleft0_Cb Top0
#define bTc0_h01_0_Cb 73 // Derived from bSh01_0 and bIndexAleft0_Cb
#define bTc0_h02_0_Cb 74 // Derived from bSh02_0 and bIndexAleft0_Cb
#define bTc0_h03_0_Cb 75 // Derived from bSh03_0 and bIndexAleft0_Cb
#define bTc0_h20_Cb 76 // Derived from bSh20 and bIndexAinternal_Cb MidHorz
#define bTc0_h21_Cb 77 // Derived from bSh21 and bIndexAinternal_Cb
#define bTc0_h22_Cb 78 // Derived from bSh22 and bIndexAinternal_Cb
#define bTc0_h23_Cb 79 // Derived from bSh23 and bIndexAinternal_Cb
#define bAlphaLeft0_Cb 80 // Derived from bIndexAleft0_Cb
#define bBetaLeft0_Cb 81 // Derived from bIndexBleft0_Cb
#define bAlphaTop0_Cb 82 // Derived from bIndexAtop0_Cb
#define bBetaTop0_Cb 83 // Derived from bIndexBtop0_Cb
#define bAlphaInternal_Cb 84 // Derived from bIndexAinternal_Cb
#define bBetaInternal_Cb 85 // Derived from bIndexBinternal_Cb
#define ExtBitFlags 86 // Extended bit flags, such as disable ILDB bits
// Shared between Cb and Cr
#define bTc0_v00_0_Cr bTc0_v00_0_Cb
#define bTc0_v10_0_Cr bTc0_v10_0_Cb
#define bTc0_v20_0_Cr bTc0_v20_0_Cb
#define bTc0_v30_0_Cr bTc0_v30_0_Cb
#define bTc0_v02_Cr bTc0_v02_Cb
#define bTc0_v12_Cr bTc0_v12_Cb
#define bTc0_v22_Cr bTc0_v22_Cb
#define bTc0_v32_Cr bTc0_v32_Cb
#define bTc0_h00_0_Cr bTc0_h00_0_Cb
#define bTc0_h01_0_Cr bTc0_h01_0_Cb
#define bTc0_h02_0_Cr bTc0_h02_0_Cb
#define bTc0_h03_0_Cr bTc0_h03_0_Cb
#define bTc0_h20_Cr bTc0_h20_Cb
#define bTc0_h21_Cr bTc0_h21_Cb
#define bTc0_h22_Cr bTc0_h22_Cb
#define bTc0_h23_Cr bTc0_h23_Cb
#define bAlphaLeft0_Cr bAlphaLeft0_Cb
#define bBetaLeft0_Cr bBetaLeft0_Cb
#define bAlphaTop0_Cr bAlphaTop0_Cb
#define bBetaTop0_Cr bBetaTop0_Cb
#define bAlphaInternal_Cr bAlphaInternal_Cb
#define bBetaInternal_Cr bBetaInternal_Cb
#endif
//========== End of Child Variables ===============================================================
#if !defined(COMBINED_KERNEL)
#define ILDB_LABEL(x) x // No symbol extension for standalone kernels
#endif
#endif // !defined(__AVC_ILDB_HEADER__)

View File

@@ -0,0 +1,39 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "AVC_ILDB_Child_UV.asm"

View File

@@ -0,0 +1,39 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "AVC_ILDB_Child_Y.asm"

View File

@@ -0,0 +1,203 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)
//
// First de-block vertical edges from left to right.
// Second de-block horizontal edge from top to bottom.
//
// For 4:2:0, chroma is always de-blocked at 8x8.
// NV12 format allows to filter U and V together.
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_MBAFF_UV
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_UV):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xE997:w
#endif
// Setup temp buf used by load and save code
#define BUF_B RTempB
#define BUF_W RTempW
#define BUF_D RTempD
// Init local variables
mul (4) ORIX_CUR<2>:w ORIX<0;1,0>:w 16:w { NoDDClr } // Expand X addr to bytes, repeat 4 times
mul (4) ORIY_CUR<2>:w ORIY<0;1,0>:w 32:w { NoDDChk } // Expand Y addr to bytes, repeat 4 times
mov (2) f0.0<1>:w 0:w
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
//=== Null Kernel ===============================================================
// jmpi ILDB_LABEL(POST_ILDB_UV)
//===============================================================================
//====================================================================================
// Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.
// Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16
// Each MB has 256 bytes of control data
// For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (256 << Mbaff_flag), Mbaff_flag = 0 or 1.
// Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes
// where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)
// MBCntrlDataOffsetY holds y'.
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (64 << Mbaff_flag), Mbaff_flag = 0 or 1.
// MBCntrlDataOffsetY holds globel byte offset.
#if !defined(DEV_CL)
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 128:uw
#endif
//====================================================================================
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
//=========== Process Top MB ============
and (1) BitFields:w BitFields:w TopFieldFlag:w // Reset BotFieldFlag
// Build a ramp from 0 to 15
mov (16) RRampW(0)<1> RampConstC<0;8,1>:ub
add (8) RRampW(0,8)<1> RRampW(0,8) 8:w // RRampW = ramp 15-0
ILDB_LABEL(RE_ENTRY_UV): // for bootom field
// Load current MB control data
#if defined(DEV_CL)
#include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#else
#include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init ECM_AddrReg
// Use free cycles here
// Check loaded control data
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
// Set DualFieldMode for all data read, write and deblocking
and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
// Get Vert Edge Pattern (frame vs. field MBs)
and (1) VertEdgePattern:uw r[ECM_AddrReg, BitFlags]:ub FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
(f0.1.all16h) jmpi ILDB_LABEL(SKIP_ILDB_UV) // Skip ILDB
(f0.0) jmpi ILDB_LABEL(SKIP_ILDB_UV) // Skip ILDB
// Set DualFieldMode for all data read, write and deblocking
// and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
cmp.z.f0.0 (1) null:w CTemp1_W:uw ABOVE_FIELD_CUR_FRAME:w
and (1) DualFieldMode:w f0.0:w 0x0001:w
#include "load_Cur_UV_8x8T_Mbaff.asm" // Load transposed data 8x8
#include "load_Left_UV_2x8T_Mbaff.asm" // Load left MB (2x8) UV data from memory if exists
#include "Transpose_Cur_UV_8x8.asm"
#include "Transpose_Left_UV_2x8.asm"
//---------- Perform vertical ILDB filting on UV ----------
#include "AVC_ILDB_Filter_Mbaff_UV_v.asm"
//---------------------------------------------------------
#include "save_Left_UV_8x2T_Mbaff.asm" // Write left MB (2x8) Y data to memory if exists
#include "load_Top_UV_8x2_Mbaff.asm" // Load top MB (8x2) Y data from memory if exists
#include "Transpose_Cur_UV_8x8.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on UV ----------
#include "AVC_ILDB_Filter_Mbaff_UV_h.asm"
//-----------------------------------------------------------
#include "save_Cur_UV_8x8_Mbaff.asm" // Write 8x8
#include "save_Top_UV_8x2_Mbaff.asm" // Write top MB (8x2) if not the top row
//-----------------------------------------------------------
ILDB_LABEL(SKIP_ILDB_UV):
and.z.f0.0 (1) null:w BitFields:w BotFieldFlag:w
//=========== Process Bottom MB ============
or (1) BitFields:w BitFields:w BotFieldFlag:w // Set BotFieldFlag to 1
(f0.0) jmpi ILDB_LABEL(RE_ENTRY_UV) // Loop back for bottom deblocking
// Fall through to finish
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
ILDB_LABEL(POST_ILDB_UV):
// Send notification thru Gateway to root thread, update chroma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
#include "AVC_ILDB_Chroma_Core_Mbaff.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,218 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB Y comp)
//
// First, de-block vertical edges from left to right.
// Second, de-block horizontal edge from top to bottom.
//
// ***** MBAFF Mode *****
// This version deblocks top MB first, followed by bottom MB.
//
// Need variable CurMB to indicate top MB or bottom MB (CurMB = 0 or 1).
// We can use BotFieldFlag in BitFields to represent it.
//
// Usage:
// 1) Access control data for top
// CntrlDataOffsetY + CurMB * Control data block size (64 DWs for CL, 16 DWs for BLC)
//
// 2) Load frame/field video data based on flags: FieldModeCurrentMbFlag, FieldModeLeftMbFlag, FieldModeaboveMbFlag,
//
// E.g.
// if (pCntlData->BitField & FieldModeCurrentMbFlag)
// cur_y = ORIX_CUR.y + CurMB * 1; // Add field vertical offset for bot field MB .
// else
// cur_y = ORIX_CUR.y + CurMB * MB_Rows_Y; // Add bottom MB vertical offset for bot MB
//
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_MBAFF_Y
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_Y):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xE998:w
#endif
// Setup temp buf used by load and save code
#define BUF_B RTempB
#define BUF_D RTempD
// Init local variables
// These coordinates are in progressive fashion
mul (4) ORIX_CUR<2>:w ORIX<0;1,0>:w 16:w { NoDDClr } // Expand X addr to bytes, repeat 4 times
mul (4) ORIY_CUR<2>:w ORIY<0;1,0>:w 32:w { NoDDChk } // Expand Y addr to bytes, repeat 4 times
mov (2) f0.0<1>:w 0:w
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
//=== Null Kernel ===============================================================
// jmpi POST_ILDB
//===============================================================================
//====================================================================================
// Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.
// Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16
// Each MB has 256 bytes of control data
// For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (256 << Mbaff_flag), Mbaff_flag = 0 or 1.
// Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes
// where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)
// MBCntrlDataOffsetY holds y'.
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (64 << Mbaff_flag), Mbaff_flag = 0 or 1.
// MBCntrlDataOffsetY holds globel byte offset.
#if !defined(DEV_CL)
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 128:uw
#endif
//====================================================================================
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
//=========== Process Top MB ============
and (1) BitFields:w BitFields:w TopFieldFlag:w // Reset BotFieldFlag
RE_ENTRY: // for bootom field
// Load current MB control data
#if defined(DEV_CL)
#include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#else
#include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init edge control map AddrReg
// Check loaded control data
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
// Use free cycles here
// Set DualFieldMode for all data read, write and deblocking
and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
// Get Vert Edge Pattern (frame vs. field MBs)
and (1) VertEdgePattern:uw r[ECM_AddrReg, BitFlags]:ub FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
(f0.1.all16h) jmpi SKIP_ILDB // Skip ILDB
(f0.0) jmpi SKIP_ILDB // Skip ILDB
// Set DualFieldMode for all data read, write and deblocking
// and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
cmp.z.f0.0 (1) null:w CTemp1_W:uw ABOVE_FIELD_CUR_FRAME:w
and (1) DualFieldMode:w f0.0:w 0x0001:w
// Load current MB // DDD1
#include "load_Cur_Y_16x16T_Mbaff.asm" // Load cur Y, 16x16, transpose
#include "load_Left_Y_4x16T_Mbaff.asm" // Load left MB (4x16) Y data from memory if exists
#include "Transpose_Cur_Y_16x16.asm"
#include "Transpose_Left_Y_4x16.asm"
//---------- Perform vertical ILDB filting on Y----------
#include "AVC_ILDB_Filter_Mbaff_Y_v.asm"
//-------------------------------------------------------
#include "save_Left_Y_16x4T_Mbaff.asm" // Write left MB (4x16) Y data to memory if exists
#include "load_Top_Y_16x4_Mbaff.asm" // Load top MB (16x4) Y data from memory if exists
#include "Transpose_Cur_Y_16x16.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on Y ----------
#include "AVC_ILDB_Filter_Mbaff_Y_h.asm"
//----------------------------------------------------------
#include "save_Cur_Y_16x16_Mbaff.asm" // Write cur MB (16x16)
#include "save_Top_Y_16x4_Mbaff.asm" // Write top MB (16x4) if not the top row
SKIP_ILDB:
//----------------------------------------------------------
and.z.f0.0 (1) null:w BitFields:w BotFieldFlag:w
//=========== Process Bottom MB ============
or (1) BitFields:w BitFields:w BotFieldFlag:w // Set BotFieldFlag to 1
(f0.0) jmpi RE_ENTRY // Loop back for bottom deblocking
// Fall through to finish
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
POST_ILDB:
//---------------------------------------------------------------------------
// Send notification thru Gateway to root thread, update luma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
#include "AVC_ILDB_Luma_Core_Mbaff.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,216 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)
//
// First de-block vertical edges from left to right.
// Second de-block horizontal edge from top to bottom.
//
// For 4:2:0, chroma is always de-blocked at 8x8.
// NV12 format allows to filter U and V together.
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_UV
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_UV):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x9997:w
#endif
// Init local variables
shl (8) ORIX_CUR<1>:w ORIX<0;2,1>:w 4:w // Expand addr to bytes, repeat (x,y) 4 times
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init ECM_AddrReg
//=== Null Kernel ===============================================================
// jmpi ILDB_LABEL(POST_ILDB_UV_UV)
//===============================================================================
#if defined(DEV_CL)
mov (1) acc0.0:w 240:w
#else
//====================================================================================
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
// Assign to MSGSRC.2:ud for memory access
// mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 64:uw
mul (1) MSGSRC.2:ud CntrlDataOffsetY:ud 64:uw
mov (1) acc0.0:w 320:w
#endif
mac (1) URBOffsetC:w ORIY:w 4:w // UV URB entries are right after Y entries
// Init local variables
// shl (8) ORIX_CUR<1>:w ORIX<0;2,1>:w 4:w // Expand addr to bytes, repeat (x,y) 4 times
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
// Build a ramp from 0 to 15
mov (16) RRampW(0)<1> RampConstC<0;8,1>:ub
add (8) RRampW(0,8)<1> RRampW(0,8) 8:w // RRampW = ramp 15-0
// Load current MB control data
#if defined(DEV_CL)
#if defined(_APPLE)
#include "Load_ILDB_Cntrl_Data_22DW.asm" // Crestline for Apple, progressive only
#else
#include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#endif
#else
#include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Check loaded control data
#if defined(_APPLE)
and.z.f0.1 (8) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw 0xFFFF:uw // Skip ILDB?
(f0.1) and.z.f0.1 (2) null<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw 0xFFFF:uw // Skip ILDB?
#else
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
#endif
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
#if defined(_APPLE)
(f0.1.all8h) jmpi ILDB_LABEL(READ_FOR_URB_UV) // Skip ILDB
#else
(f0.1.all16h) jmpi ILDB_LABEL(READ_FOR_URB_UV) // Skip ILDB
#endif
(f0.0) jmpi ILDB_LABEL(READ_FOR_URB_UV) // Skip ILDB
#include "load_Cur_UV_8x8T.asm" // Load transposed data 8x8
// #include "load_Left_UV_2x8T.asm"
#include "load_Top_UV_8x2.asm" // Load top MB (8x2) Y data from memory if exists
#include "Transpose_Cur_UV_8x8.asm"
// #include "Transpose_Left_UV_2x8.asm"
//---------- Perform vertical ILDB filting on UV ----------
#include "AVC_ILDB_Filter_UV_v.asm"
//---------------------------------------------------------
#include "save_Left_UV_8x2T.asm" // Write left MB (2x8) Y data to memory if exists
#include "Transpose_Cur_UV_8x8.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on UV ----------
#include "AVC_ILDB_Filter_UV_h.asm"
//-----------------------------------------------------------
#include "save_Cur_UV_8x8.asm" // Write 8x8
#include "save_Top_UV_8x2.asm" // Write top MB (8x2) if not the top row
//---------- Write right most 4 columns of cur MB to URB ----------
// Transpose the right most 2 cols 2x8 (word) in GRF to 8x2 in BUF_D. It is 2 left most cols in cur MB.
#include "Transpose_Cur_UV_2x8.asm"
ILDB_LABEL(WRITE_URB_UV):
mov (8) m1<1>:ud LEFT_TEMP_D(1)<8;8,1> // Copy 1 GRF to 1 URB entry (U+V)
#include "writeURB_UV_Child.asm"
//-----------------------------------------------------------------
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
ILDB_LABEL(POST_ILDB_UV):
//---------------------------------
// Send notification thru Gateway to root thread, update chroma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
ILDB_LABEL(READ_FOR_URB_UV):
// Still need to prepare URB data for the right neighbor MB
#include "load_Cur_UV_Right_Most_2x8.asm" // Load cur MB ( right most 4x16) Y data from memory
#include "Transpose_Cur_UV_Right_Most_2x8.asm"
// jmpi ILDB_LABEL(WRITE_URB_UV)
mov (8) m1<1>:ud LEFT_TEMP_D(1)<8;8,1> // Copy 1 GRF to 1 URB entry (U+V)
#include "writeURB_UV_Child.asm"
//-----------------------------------------------------------------
// Send notification thru Gateway to root thread, update chroma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
// #include "AVC_ILDB_Luma_Core.asm"
#include "AVC_ILDB_Chroma_Core.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,206 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB Y comp)
//
// First, de-block vertical edges from left to right.
// Second, de-block horizontal edge from top to bottom.
//
// If transform_size_8x8_flag = 1, luma is de-blocked at 8x8. Otherwise, luma is de-blocked at 4x4.
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_Y
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_Y):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x9998:w
#endif
// Init local variables
shl (8) ORIX_CUR<1>:w ORIX<0;2,1>:w 4:w // Expand addr to bytes, repeat (x,y) 4 times
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init edge control map AddrReg
//=== Null Kernel ===============================================================
// jmpi ILDB_LABEL(POST_ILDB_Y)
//===============================================================================
mul (1) URBOffsetC:uw ORIY:uw 4:w
#if !defined(DEV_CL)
//====================================================================================
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
// Assign to MSGSRC.2:ud for memory access
// mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 64:uw
mul (1) MSGSRC.2:ud CntrlDataOffsetY:ud 64:uw
#endif
// Load current MB control data
#if defined(DEV_CL)
#if defined(_APPLE)
#include "Load_ILDB_Cntrl_Data_22DW.asm" // Crestline for Apple, progressive only
#else
#include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#endif
#else
#include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Check loaded control data
#if defined(_APPLE)
and.z.f0.1 (8) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw 0xFFFF:uw // Skip ILDB?
(f0.1) and.z.f0.1 (2) null<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw 0xFFFF:uw // Skip ILDB?
#else
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
#endif
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
// Use free cycles here
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
// add (1) ORIY_TOP:w ORIY_TOP:w -4:w
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
#if defined(_APPLE)
(f0.1.all8h) jmpi ILDB_LABEL(READ_FOR_URB_Y) // Skip ILDB
#else
(f0.1.all16h) jmpi ILDB_LABEL(READ_FOR_URB_Y) // Skip ILDB
#endif
(f0.0) jmpi ILDB_LABEL(READ_FOR_URB_Y) // Skip ILDB
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
// Bettr performance is observed if boundary MBs are not checked and skipped.
#include "load_Cur_Y_16x16T.asm" // Load cur MB Y, 16x16, transpose
// #include "load_Left_Y_4x16T.asm" // Load left MB (4x16) Y data from memory
#include "load_Top_Y_16x4.asm" // Load top MB (16x4) Y data from memory
#include "Transpose_Cur_Y_16x16.asm"
// #include "Transpose_Left_Y_4x16.asm"
//---------- Perform vertical ILDB filting on Y ---------
#include "AVC_ILDB_Filter_Y_v.asm"
//-------------------------------------------------------
#include "save_Left_Y_16x4T.asm" // Write left MB (4x16) Y data to memory
#include "Transpose_Cur_Y_16x16.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on Y -------
#include "AVC_ILDB_Filter_Y_h.asm"
//-------------------------------------------------------
#include "save_Cur_Y_16x16.asm" // Write cur MB (16x16)
#include "save_Top_Y_16x4.asm" // Write top MB (16x4)
//---------- Write right most 4 columns of cur MB to URB ----------
// Transpose the right most 4 cols 4x16 in GRF to 16x4 in LEFT_TEMP_B. It is 4 left most cols in cur MB.
#include "Transpose_Cur_Y_4x16.asm"
ILDB_LABEL(WRITE_URB_Y):
// Note: LEFT_TEMP_B(2) = TOP_TEMP_B(0), TOP_TEMP_B must be avail
mov (16) m1<1>:ud LEFT_TEMP_D(2)<8;8,1> // Copy 2 GRFs to 2 URB entries (Y)
#include "writeURB_Y_Child.asm"
//-----------------------------------------------------------------
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
ILDB_LABEL(POST_ILDB_Y):
// Send notification thru Gateway to root thread, update luma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
ILDB_LABEL(READ_FOR_URB_Y):
// Still need to prepare URB data for the right neighbor MB
#include "load_Cur_Y_Right_Most_4x16.asm" // Load cur MB ( right most 4x16) Y data from memory
#include "Transpose_Cur_Y_Right_Most_4x16.asm"
// jmpi ILDB_LABEL(WRITE_URB_Y)
// Note: LEFT_TEMP_B(2) = TOP_TEMP_B(0), TOP_TEMP_B must be avail
mov (16) m1<1>:ud LEFT_TEMP_D(2)<8;8,1> // Copy 2 GRFs to 2 URB entries (Y)
#include "writeURB_Y_Child.asm"
//-----------------------------------------------------------------
// Send notification thru Gateway to root thread, update luma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
#include "AVC_ILDB_Luma_Core.asm"
// #include "AVC_ILDB_Chroma_Core.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,195 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#if !defined(__AVC_ILDB_CHROMA_CORE__) // Make sure this file is only included once
#define __AVC_ILDB_CHROMA_CORE__
////////// AVC ILDB Chroma Core /////////////////////////////////////////////////////////////////////////////////
//
// This core performs AVC U or V ILDB filtering on one horizontal edge (8 pixels) of a MB.
// If data is transposed, it can also de-block a vertical edge.
//
// Bafore calling this subroutine, caller needs to set the following parameters.
//
// - EdgeCntlMap1 // Edge control map A
// - EdgeCntlMap2 // Edge control map B
// - P_AddrReg // Src and dest address register for P pixels
// - Q_AddrReg // Src and dest address register for Q pixels
// - alpha // alpha corresponding to the edge to be filtered
// - beta // beta corresponding to the edge to be filtered
// - tc0 // tc0 corresponding to the edge to be filtered
//
// U or V:
// +----+----+----+----+
// | P1 | p0 | q0 | q1 |
// +----+----+----+----+
//
// p1 = r[P_AddrReg, 0]<16;8,2>
// p0 = r[P_AddrReg, 16]<16;8,2>
// q0 = r[Q_AddrReg, 0]<16;8,2>
// q1 = r[Q_AddrReg, 16]<16;8,2>
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// The region is both src and dest
// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values
#undef P1
#undef P0
#undef Q0
#undef Q1
#define P1 r[P_AddrReg, 0]<16;8,2>:ub
#define P0 r[P_AddrReg, 16]<16;8,2>:ub
#define Q0 r[Q_AddrReg, 0]<16;8,2>:ub
#define Q1 r[Q_AddrReg, 16]<16;8,2>:ub
// New region as dest
#undef NewP0
#undef NewQ0
#define NewP0 r[P_AddrReg, 16]<2>:ub
#define NewQ0 r[Q_AddrReg, 0]<2>:ub
// Filter one chroma edge
FILTER_UV:
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x1112:w
#endif
//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
// bS is in MaskA
// Src copy of the p1, p0, q0, q1
// mov (8) p1(0)<1> r[P_AddrReg, 0]<16;8,2>:ub
// mov (8) p0(0)<1> r[P_AddrReg, 16]<16;8,2>:ub
// mov (8) q0(0)<1> r[Q_AddrReg, 0]<16;8,2>:ub
// mov (8) q1(0)<1> r[Q_AddrReg, 16]<16;8,2>:ub
// mov (1) f0.0:uw MaskA:uw
add (8) q0_p0(0)<1> Q0 -P0 // q0-p0
add (8) TempRow0(0)<1> P1 -P0 // p1-p0
add (8) TempRow1(0)<1> Q1 -Q0 // q1-q0
// Build FilterSampleFlag
// abs(q0-p0) < alpha
(f0.0) cmp.l.f0.0 (16) null:w (abs)q0_p0(0) alpha:w
// abs(p1-p0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow0(0) beta:w
// abs(q1-q0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow1(0) beta:w
//-----------------------------------------------------------------------------------------
// if
(f0.0) if (8) UV_ENDIF1
// For channels whose edge control map1 = 1 ---> perform de-blocking
// mov (1) f0.1:w MaskB:w {NoMask} // Now check for which algorithm to apply
(f0.1) if (8) UV_ELSE2
// For channels whose edge control map2 = 1 ---> bS = 4 algorithm
// p0' = (2*p1 + p0 + q1 + 2) >> 2
// q0' = (2*q1 + q0 + p1 + 2) >> 2
// Optimized version:
// A = (p1 + q1 + 2)
// p0' = (p0 + p1 + A) >> 2
// q0' = (q0 + q1 + A) >> 2
//------------------------------------------------------------------------------------
// p0' = (2*p1 + p0 + q1 + 2) >> 2
add (8) acc0<1>:w Q1 2:w
mac (8) acc0<1>:w P1 2:w
add (8) acc0<1>:w acc0<8;8,1>:w P0
shr.sat (8) TempRow0B(0)<2> acc0<8;8,1>:w 2:w
// q0' = (2*q1 + q0 + p1 + 2) >> 2
add (8) acc0<1>:w P1 2:w
mac (8) acc0<1>:w Q1 2:w
add (8) acc0<1>:w acc0<8;8,1>:w Q0
shr.sat (8) TempRow1B(0)<2> acc0<8;8,1>:w 2:w
mov (8) NewP0 TempRow0B(0) // p0'
mov (8) NewQ0 TempRow1B(0) // q0'
UV_ELSE2:
else (8) UV_ENDIF2
// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
// Expand tc0 (tc0 has 4 bytes)
// mov (8) tc0_exp(0)<1> tc0<1;2,0>:ub {NoMask} // tc0_exp = tc0, each tc0 is duplicated 2 times for 2 adjcent pixels
mov (8) acc0<1>:w tc0<1;2,0>:ub {NoMask} // tc0_exp = tc0, each tc0 is duplicated 2 times for 2 adjcent pixels
// tc_exp = tc0_exp + 1
// add (8) tc_exp(0)<1> tc0_exp(0) 1:w
add (8) tc_exp(0)<1> acc0<8;8,1>:w 1:w
// delta = Clip3(-tc, tc, ((((q0 - p0)<<2) + (p1-q1) + 4) >> 3))
// 4 * (q0-p0) + p1 - q1 + 4
add (8) acc0<1>:w P1 4:w
mac (8) acc0<1>:w q0_p0(0) 4:w
add (8) acc0<1>:w acc0<8;8,1>:w -Q1
shr (8) TempRow0(0)<1> acc0<8;8,1>:w 3:w
// tc clip
cmp.g.f0.0 (8) null:w TempRow0(0) tc_exp(0) // Clip if > tc0
cmp.l.f0.1 (8) null:w TempRow0(0) -tc_exp(0) // Clip if < -tc0
(f0.0) mov (8) TempRow0(0)<1> tc_exp(0)
(f0.1) mov (8) TempRow0(0)<1> -tc_exp(0)
// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
add.sat (8) TempRow1B(0)<2> P0 TempRow0(0) // p0+delta
// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
add.sat (8) TempRow0B(0)<2> Q0 -TempRow0(0) // q0-delta
mov (8) NewP0 TempRow1B(0) // p0'
mov (8) NewQ0 TempRow0B(0) // q0'
endif
UV_ENDIF2:
UV_ENDIF1:
endif
RETURN
#endif // !defined(__AVC_ILDB_CHROMA_CORE__)

View File

@@ -0,0 +1,176 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
////////// AVC ILDB Chroma Core Mbaff /////////////////////////////////////////////////////////////////////////////////
//
// This core performs AVC U or V ILDB filtering on one horizontal edge (8 pixels) of a MB.
// If data is transposed, it can also de-block a vertical edge.
//
// Bafore calling this subroutine, caller needs to set the following parameters.
//
// - EdgeCntlMap1 // Edge control map A
// - EdgeCntlMap2 // Edge control map B
// - P_AddrReg // Src and dest address register for P pixels
// - Q_AddrReg // Src and dest address register for Q pixels
// - alpha // alpha corresponding to the edge to be filtered
// - beta // beta corresponding to the edge to be filtered
// - tc0 // tc0 corresponding to the edge to be filtered
//
// U or V:
// +----+----+----+----+
// | P1 | p0 | q0 | q1 |
// +----+----+----+----+
//
// p1 = r[P_AddrReg, 0]<16;8,2>
// p0 = r[P_AddrReg, 16]<16;8,2>
// q0 = r[Q_AddrReg, 0]<16;8,2>
// q1 = r[Q_AddrReg, 16]<16;8,2>
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// The region is both src and dest
// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values
#undef P1
#undef P0
#undef Q0
#undef Q1
#define P1 r[P_AddrReg, 0]<16;8,2>:ub
#define P0 r[P_AddrReg, 16]<16;8,2>:ub
#define Q0 r[Q_AddrReg, 0]<16;8,2>:ub
#define Q1 r[Q_AddrReg, 16]<16;8,2>:ub
// New region as dest
#undef NewP0
#undef NewQ0
#define NewP0 r[P_AddrReg, 16]<2>:ub
#define NewQ0 r[Q_AddrReg, 0]<2>:ub
// Filter one chroma edge - mbaff
FILTER_UV_MBAFF:
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x1112:w
#endif
//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
//===== Assume f0.0 contains MaskA when entering this routine
// mov (1) f0.0:uw MaskA:uw
add (8) q0_p0(0)<1> Q0 -P0 // q0-p0
add (8) TempRow0(0)<1> P1 -P0 // p1-p0
add (8) TempRow1(0)<1> Q1 -Q0 // q1-q0
// Build FilterSampleFlag
// abs(q0-p0) < alpha
(f0.0) cmp.l.f0.0 (16) null:w (abs)q0_p0(0) Mbaff_ALPHA(0)
// abs(p1-p0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow0(0) Mbaff_BETA(0)
// abs(q1-q0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow1(0) Mbaff_BETA(0)
//-----------------------------------------------------------------------------------------
// if
(f0.0) if (8) MBAFF_UV_ENDIF1
// For channels whose edge control map1 = 1 ---> perform de-blocking
// mov (1) f0.1:w MaskB:w {NoMask} // Now check for which algorithm to apply
(f0.1) if (8) MBAFF_UV_ELSE2
// For channels whose edge control map2 = 1 ---> bS = 4 algorithm
// p0' = (2*p1 + P0 + q1 + 2) >> 2
// q0' = (2*q1 + q0 + p1 + 2) >> 2
//------------------------------------------------------------------------------------
// p0' = (2*p1 + p0 + q1 + 2) >> 2
add (8) acc0<1>:w Q1 2:w
mac (8) acc0<1>:w P1 2:w
add (8) acc0<1>:w acc0<8;8,1>:w P0
shr.sat (8) TempRow0B(0)<2> acc0<8;8,1>:w 2:w
// q0' = (2*q1 + q0 + p1 + 2) >> 2
add (8) acc0<1>:w P1 2:w
mac (8) acc0<1>:w Q1 2:w
add (8) acc0<1>:w acc0<8;8,1>:w Q0
shr.sat (8) TempRow1B(0)<2> acc0<8;8,1>:w 2:w
mov (8) NewP0 TempRow0B(0) // p0'
mov (8) NewQ0 TempRow1B(0) // q0'
MBAFF_UV_ELSE2:
else (8) MBAFF_UV_ENDIF2
// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
// tc_exp = tc0_exp + 1
add (8) tc_exp(0)<1> Mbaff_TC0(0) 1:w
// delta = Clip3(-tc, tc, ((((q0 - p0)<<2) + (p1-q1) + 4) >> 3))
// 4 * (q0-p0) + p1 - q1 + 4
add (8) acc0<1>:w P1 4:w
mac (8) acc0<1>:w q0_p0(0) 4:w
add (8) acc0<1>:w acc0<8;8,1>:w -Q1
shr (8) TempRow0(0)<1> acc0<8;8,1>:w 3:w
// tc clip
cmp.g.f0.0 (8) null:w TempRow0(0) tc_exp(0) // Clip if > tc0
cmp.l.f0.1 (8) null:w TempRow0(0) -tc_exp(0) // Clip if < -tc0
(f0.0) mov (8) TempRow0(0)<1> tc_exp(0)
(f0.1) mov (8) TempRow0(0)<1> -tc_exp(0)
// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
add.sat (8) TempRow1B(0)<2> P0 TempRow0(0) // p0+delta
// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
add.sat (8) TempRow0B(0)<2> Q0 -TempRow0(0) // q0-delta
mov (8) NewP0 TempRow1B(0) // p0'
mov (8) NewQ0 TempRow0B(0) // q0'
endif
MBAFF_UV_ENDIF2:
MBAFF_UV_ENDIF1:
endif
RETURN

View File

@@ -0,0 +1,52 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//----- Close a Message Gateway -----
#if defined(_DEBUG)
mov (1) EntrySignature:b 0x4444:w
#endif
// Message descriptor
// bit 31 EOD
// 27:24 FFID = 0x0011 for msg gateway
// 23:20 msg length = 1 MRF
// 19:16 Response length = 0
// 1:0 SubFuncID = 01 for CloseGateway
// Message descriptor: 0 000 0011 0001 0000 + 0 0 000000000000 01 ==> 0000 0011 0001 0000 0000 0000 0000 0001
send (8) null:ud m7 r0.0<0;1,0>:ud MSG_GW CGWMSGDSC

View File

@@ -0,0 +1,216 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//---------- Check dependency and spawn all MBs ----------
// Launch the 1st round of child threads for Vertical ILDB
#if defined(_DEBUG)
mov (1) EntrySignature:w 0x3333:w
#endif
//=====================================================================
// Jump Table 1
// 0 0 ---> Goto ALL_SPAWNED
// 0 1 ---> Goto ALL_SPAWNED
// 1 0 ---> Goto SLEEP_ENTRY
// 1 1 ---> Goto POST_SLEEP
mov (2) JumpTable.0<1>:d 0:d { NoDDClr }
#if defined(CHROMA_ROOT)
mov (1) JumpTable.2:d SLEEP_ENTRY_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d { NoDDClr, NoDDChk }
mov (1) JumpTable.3:d POST_SLEEP_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d { NoDDChk }
#else
mov (1) JumpTable.2:d SLEEP_ENTRY_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d { NoDDClr, NoDDChk }
mov (1) JumpTable.3:d POST_SLEEP_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d { NoDDChk }
#endif
//=====================================================================
mov (2) f0.0<1>:w 0:w
// Get m0 most of fields ready for URB write
mov (8) MRF0<1>:ud MSGSRC.0<8;8,1>:ud
// Add child kernel offset
add (1) CT_R0Hdr.2:ud r0.2:ud CHILD_OFFSET:w
// Init
mov (1) Col_Boundary:w 2:w
mov (1) Row_Boundary:w LastRow:w
mov (1) TopRowForScan:w 0:w
mov (2) OutstandingThreads<1>:w 0:w
// Init Scoreboard (idle = 0x00FF, busy = 0x0000)
// Low word is saved col. High word is busy/idle status
mov (16) GatewayAperture(0)<1> 0x00FF00FF:ud // Init r6-r7
mov (16) GatewayAperture(2)<1> 0x00FF00FF:ud // Init r8-r9
mov (16) GatewayAperture(4)<1> 0x00FF00FF:ud // Init r10-r11
mov (16) GatewayAperture(6)<1> 0x00FF00FF:ud // Init r12-r13
mov (16) GatewayAperture(8)<1> 0x00FF00FF:ud // Init r14-r15
mul (1) StatusAddr:w CurRow:w 4:w // dword to bytes offset conversion
//=====================================================================
//SPAWN_LOOP:
//===== OutstandingThreads < ThreadLimit ? ============================
cmp.l.f0.1 (1) null:w OutstandingThreads:w ThreadLimit:w // Check the thread limit
#if defined(CHROMA_ROOT)
(f0.1) jmpi ILDB_LABEL(POST_SLEEP_UV)
#else // LUMA_ROOT
(f0.1) jmpi ILDB_LABEL(POST_SLEEP_Y)
#endif
#if defined(CHROMA_ROOT)
ILDB_LABEL(SLEEP_ENTRY_UV):
#else // LUMA_ROOT
ILDB_LABEL(SLEEP_ENTRY_Y):
#endif
//===== Goto Sleep ====================================================
// Either reached max thread limit or no child thread can be spawned due to dependency.
add (1) OutstandingThreads:w OutstandingThreads:w -1:w // Do this before wait is faster
wait n0.0:d
#if defined(CHROMA_ROOT)
ILDB_LABEL(POST_SLEEP_UV):
#else // LUMA_ROOT
ILDB_LABEL(POST_SLEEP_Y):
#endif
//===== Luma Status[CurRow] == busy ? =====
cmp.z.f0.0 (1) null:uw r[StatusAddr, GatewayApertureB+ScoreBd_Idx]:uw 0:uw // Check west neighbor
cmp.g.f0.1 (1) null:w CurCol:w LastCol:w // Check if the curCol > LastCol
#if defined(CHROMA_ROOT)
mov (16) acc0.0<1>:w URBOffsetUVBase<0;1,0>:w // Add offset to UV base (MBsCntY * URB_EBTRIES_PER_MB)
mac (1) URBOffset:w CurRow:w 4:w // 4 entries per row
#else
mul (1) URBOffset:w CurRow:w 4:w // 4 entries per row
#endif
#if defined(CHROMA_ROOT)
(f0.0) jmpi ILDB_LABEL(SLEEP_ENTRY_UV) // Current row has a child thread running, can not spawn a new child thread, go back to sleep
(f0.1) jmpi ILDB_LABEL(NEXT_MB_UV) // skip MB if the curCol > LastCol
#else // LUMA_ROOT
(f0.0) jmpi ILDB_LABEL(SLEEP_ENTRY_Y) // Current row has a child thread running, can not spawn a new child thread, go back to sleep
(f0.1) jmpi ILDB_LABEL(NEXT_MB_Y) // skip MB if the curCol > LastCol
#endif
//========== Spwan a child thread ========================================
// Save cur col and set Status[CurRow] to busy
mov (2) r[StatusAddr, GatewayApertureB]<1>:uw CurColB<2;2,1>:ub // Store the new col
// Increase OutstandingThreads and ProcessedMBs by 1
add (2) OutstandingThreads<1>:w OutstandingThreads<2;2,1>:w 1:w
#include "AVC_ILDB_SpawnChild.asm"
//===== Find next MB ===================================================
#if defined(CHROMA_ROOT)
ILDB_LABEL(NEXT_MB_UV):
#else // LUMA_ROOT
ILDB_LABEL(NEXT_MB_Y):
#endif
// Check pic boundary, results are in f0.0 bit0 and bit1
cmp.ge.f0.0 (2) null<1>:w CurCol<2;2,1>:w Col_Boundary<2;2,1>:w
// Update TopRowForScan if the curCol = LastCol
(f0.1) add (1) TopRowForScan:w CurRow:w 1:w
// cmp.l.f0.1 (1) null<1>:w ProcessedMBs:w TotalBlocks:w // Processed all blocks ?
// 2 sets compare
// ProcessedMBs:w < TotalBlocks:w OutstandingThreads:w < ThreadLimit:wProcessedMBs:w
// 0 0 ---> Goto ALL_SPAWNED
// 0 1 ---> Goto ALL_SPAWNED
// 1 0 ---> Goto SLEEP_ENTRY
// 1 1 ---> Goto POST_SLEEP
cmp.l.f0.1 (2) null<1>:w OutstandingThreads<2;2,1>:w ThreadLimit<2;2,1>:w
// Just do it in stalled cycles
mov (1) acc0.0:w 4:w
mac (1) StatusAddr:w CurRow:w 4:w // dword to bytes offset conversion
add (2) CurCol<1>:w CurCol<2;2,1>:w StepToNextMB<2;2,1>:b // CurCol -= 2 and CurRow += 1
// Set f0.0 if turning around is needed, assuming bit 15 - 2 are zeros for correct comparison.
cmp.nz.f0.0 (1) null<1>:w f0.0:w 0x01:w
mul (1) JumpAddr:w f0.1:w 4:w // byte offet in dword count
// The next MB is at the row TopRowForScan
(f0.0) mul (1) StatusAddr:w TopRowForScan:w 4:w // dword to bytes offset conversion
(f0.0) mov (1) CurRow:w TopRowForScan:w { NoDDClr } // Restart from the top row that has MBs not deblocked yet.
(f0.0) add (1) CurCol:w r[StatusAddr, GatewayApertureB]:uw 1:w { NoDDChk }
//===== Processed all blocks ? =========================================
// (f0.1) jmpi SPAWN_LOOP
jmpi r[JumpAddr, JUMPTABLE_BASE]:d
//JUMP_BASE:
//======================================================================
// All MB are spawned at this point, check for outstanding thread count
#if defined(CHROMA_ROOT)
ILDB_LABEL(ALL_SPAWNED_UV):
#else // LUMA_ROOT
ILDB_LABEL(ALL_SPAWNED_Y):
#endif
cmp.e.f0.1 (1) null:w OutstandingThreads:w 0:w // Check before goto sleep
#if defined(CHROMA_ROOT)
(f0.1) jmpi ILDB_LABEL(ALL_DONE_UV)
#else // LUMA_ROOT
(f0.1) jmpi ILDB_LABEL(ALL_DONE_Y)
#endif
wait n0.0:d // Wake up by a finished child thread
add (1) OutstandingThreads:w OutstandingThreads:w -1:w
#if defined(CHROMA_ROOT)
// One thread is free and give it to luma thread limit --- Increase luma thread limit by one.
#include "AVC_ILDB_LumaThrdLimit.asm"
#endif
#if defined(CHROMA_ROOT)
jmpi ILDB_LABEL(ALL_SPAWNED_UV) // Waked up and goto dependency check
#else // LUMA_ROOT
jmpi ILDB_LABEL(ALL_SPAWNED_Y) // Waked up and goto dependency check
#endif
// All child threads are finsihed at this point
#if defined(CHROMA_ROOT)
ILDB_LABEL(ALL_DONE_UV):
#else // LUMA_ROOT
ILDB_LABEL(ALL_DONE_Y):
#endif

View File

@@ -0,0 +1,253 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
////////// AVC ILDB filter horizontal Mbaff UV ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of UV.
//
// It sssumes the data for horizontal de-blocking is already transposed.
//
// Chroma:
//
// +-------+-------+ H0 Edge
// | | |
// | | |
// | | |
// +-------+-------+ H1 Edge
// | | |
// | | |
// | | |
// +-------+-------+
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBC:w
#endif
//=============== Chroma deblocking ================
//---------- Deblock UV external top edge ----------
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterTopMbEdgeFlag:w // Check for FilterTopMbEdgeFlag
mov (1) f0.1:w DualFieldMode:w // Check for dual field mode
// Get Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<0;1,0>:uw RRampW(0)
shr (16) TempRow1(0)<1> r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz0]<0;1,0>:uw RRampW(0)
(f0.0) jmpi H0_UV_DONE // Skip H0 UV edge
(f0.1) jmpi DUAL_FIELD_UV
// Non dual field mode
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
// Ext U
// p1 = Prev MB U row 0
// p0 = Prev MB U row 1
// q0 = Cur MB U row 0
// q1 = Cur MB U row 1
mov (1) P_AddrReg:w PREV_MB_U_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_U_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Cb]<1;2,0>:ub
// Store UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
// Ext V
mov (1) P_AddrReg:w PREV_MB_V_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_V_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Cr]<1;2,0>:ub
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
jmpi H0_UV_DONE
DUAL_FIELD_UV:
// Dual field mode, FieldModeCurrentMbFlag=0 && FieldModeAboveMbFlag=1
//===== Ext U, Top field
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+32:w { NoDDChk }
mov (16) ABOVE_CUR_MB_UW(0)<1> PREV_MB_UW(0, 0)<16;8,1> // Copy p1, p0
mov (16) ABOVE_CUR_MB_UW(1)<1> SRC_UW(0, 0)<16;8,1> // Copy q1, q0
//===== Ext U, top field
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Cb]<1;2,0>:ub
// Store UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1) // Ext U, top field
//===== Ext V, top field
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE+1:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+33:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Cr]<1;2,0>:ub
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1) // Ext U, top field
// Prefetch for bottom field
// Get bot field Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz1]<0;1,0>:uw RRampW(0)
shr (16) TempRow1(0)<1> r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz1]<0;1,0>:uw RRampW(0)
// Save deblocked top field rows
mov (8) PREV_MB_UW(1, 0)<1> ABOVE_CUR_MB_UW(0, 8) // Copy p0
mov (8) SRC_UW(0, 0)<1> ABOVE_CUR_MB_UW(1, 0) // Copy q0
//==========================================================================
//===== Ext U, Bot field
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+32:w { NoDDChk }
mov (16) ABOVE_CUR_MB_UW(0)<1> PREV_MB_UW(0, 8)<16;8,1> // Copy p1, p0
mov (16) ABOVE_CUR_MB_UW(1)<1> SRC_UW(0, 8)<16;8,1> // Copy q1, q0
//===== Ext U, bottom field
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop1_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop1_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_1_Cb]<1;2,0>:ub
// Store UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1) // Ext U, bottom field
//===== Ext V, bot field
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE+1:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+33:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop1_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop1_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_1_Cr]<1;2,0>:ub
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1) // Ext V, bottom field
// Save deblocked bot field rows
mov (8) PREV_MB_UW(1, 8)<1> ABOVE_CUR_MB_UW(0, 8) // Copy p0
mov (8) SRC_UW(0, 8)<1> ABOVE_CUR_MB_UW(1, 0) // Copy q0
//========================================
H0_UV_DONE:
//---------- Deblock U internal horz middle edge ----------
//***** Need to take every other bit to form U maskA in core
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]<0;1,0>:uw RRampW(0)
// p1 = Cur MB U row 2
// p0 = Cur MB U row 3
// q0 = Cur MB U row 4
// q1 = Cur MB U row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h20_Cb]<1;2,0>:ub
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
// Store UV MaskA and MaskB
mov (1) f0.1:uw 0:w
mov (1) MaskB:uw 0:w { NoDDClr }
mov (1) MaskA:uw f0.0:uw { NoDDChk }
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------
//---------- Deblock V internal horz middle edge ----------
// p1 = Cur MB V row 2
// p0 = Cur MB V row 3
// q0 = Cur MB V row 4
// q1 = Cur MB V row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h20_Cr]<1;2,0>:ub
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------

View File

@@ -0,0 +1,239 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
////////// AVC LDB filter vertical Mbaff UV ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all vertical edges of UV.
//
// It sssumes the data for vertical de-blocking is already transposed.
//
// Chroma:
//
// +-------+-------+
// | | |
// | | |
// | | |
// +-------+-------+
// | | |
// | | |
// | | |
// +-------+-------+
//
// V0 V1
// Edge Edge
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBC:w
#endif
//=============== Chroma deblocking ================
//---------- Deblock U external left edge ----------
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterLeftMbEdgeFlag:w // Check for FilterLeftMbEdgeFlag
cmp.z.f0.1 (1) null:w VertEdgePattern:uw LEFT_FIELD_CUR_FRAME:w
// Get Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<0;1,0>:uw RRampW(0)
shr (16) TempRow1(0)<1> r[ECM_AddrReg, wEdgeCntlMapB_ExtLeftVert0]<0;1,0>:uw RRampW(0)
(f0.0) jmpi BYPASS_V0_UV // Do not deblock Left ext edge
cmp.z.f0.0 (1) null:w VertEdgePattern:uw LEFT_FRAME_CUR_FIELD:w
(-f0.1) jmpi V0_U_NEXT1 // Jump if not LEFT_FIELD_CUR_FRAME
//----- For LEFT_FIELD_CUR_FRAME
// Extract UV MaskA and MaskB from every other 2 bits of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<4;2,1> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<4;2,1> 1:w
// For FieldModeLeftMbFlag=1 && FieldModeCurrentMbFlag=0
mov (4) Mbaff_ALPHA(0,0)<2> r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub { NoDDClr }
mov (4) Mbaff_ALPHA(0,1)<2> r[ECM_AddrReg, bAlphaLeft1_Cb]<0;1,0>:ub { NoDDChk }
mov (4) Mbaff_BETA(0,0)<2> r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub { NoDDClr }
mov (4) Mbaff_BETA(0,1)<2> r[ECM_AddrReg, bBetaLeft1_Cb]<0;1,0>:ub { NoDDChk }
mov (4) Mbaff_TC0(0,0)<2> r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub { NoDDClr }
mov (4) Mbaff_TC0(0,1)<2> r[ECM_AddrReg, bTc0_v00_1_Cb]<4;4,1>:ub { NoDDChk }
jmpi V0_U_NEXT3
V0_U_NEXT1:
(-f0.0) jmpi V0_U_NEXT2 // Jump if not LEFT_FRAME_CUR_FIELD
//----- For LEFT_FRAME_CUR_FIELD
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
// For FieldModeLeftMbFlag=0 && FieldModeCurrentMbFlag=1
mov (4) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub { NoDDClr }
mov (4) Mbaff_ALPHA(0,4)<1> r[ECM_AddrReg, bAlphaLeft1_Cb]<0;1,0>:ub { NoDDChk }
mov (4) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub { NoDDClr }
mov (4) Mbaff_BETA(0,4)<1> r[ECM_AddrReg, bBetaLeft1_Cb]<0;1,0>:ub { NoDDChk }
mov (4) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub { NoDDClr }
mov (4) Mbaff_TC0(0,4)<1> r[ECM_AddrReg, bTc0_v00_1_Cb]<4;4,1>:ub { NoDDChk }
jmpi V0_U_NEXT3
V0_U_NEXT2:
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
// Both are frames or fields
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Cb]<1;2,0>:ub
V0_U_NEXT3:
// p1 = Prev MB U row 0
// p0 = Prev MB U row 1
// q0 = Cur MB U row 0
// q1 = Cur MB U row 1
mov (1) P_AddrReg:w PREV_MB_U_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_U_BASE:w { NoDDChk }
// Store UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------
//---------- Deblock V external left edge ----------
// No change to MaskA and MaskB
cmp.z.f0.0 (4) null:w VertEdgePattern:uw LEFT_FIELD_CUR_FRAME:w
cmp.z.f0.1 (4) null:w VertEdgePattern:uw LEFT_FRAME_CUR_FIELD:w
// both are frame or field
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Cr]<1;2,0>:ub
// p1 = Prev MB V row 0
// p0 = Prev MB V row 1
// q0 = Cur MB V row 0
// q1 = Cur MB V row 1
mov (1) P_AddrReg:w PREV_MB_V_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_V_BASE:w { NoDDChk }
// For FieldModeLeftMbFlag=1 && FieldModeCurrentMbFlag=0
(f0.0) mov (4) Mbaff_ALPHA(0,0)<2> r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub { NoDDClr }
(f0.0) mov (4) Mbaff_ALPHA(0,1)<2> r[ECM_AddrReg, bAlphaLeft1_Cr]<0;1,0>:ub { NoDDChk }
(f0.0) mov (4) Mbaff_BETA(0,0)<2> r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub { NoDDClr }
(f0.0) mov (4) Mbaff_BETA(0,1)<2> r[ECM_AddrReg, bBetaLeft1_Cr]<0;1,0>:ub { NoDDChk }
(f0.0) mov (4) Mbaff_TC0(0,0)<2> r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub { NoDDClr }
(f0.0) mov (4) Mbaff_TC0(0,1)<2> r[ECM_AddrReg, bTc0_v00_1_Cr]<4;4,1>:ub { NoDDChk }
// For FieldModeLeftMbFlag=0 && FieldModeCurrentMbFlag=1
(f0.1) mov (4) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub { NoDDClr }
(f0.1) mov (4) Mbaff_ALPHA(0,4)<1> r[ECM_AddrReg, bAlphaLeft1_Cr]<0;1,0>:ub { NoDDChk }
(f0.1) mov (4) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub { NoDDClr }
(f0.1) mov (4) Mbaff_BETA(0,4)<1> r[ECM_AddrReg, bBetaLeft1_Cr]<0;1,0>:ub { NoDDChk }
(f0.1) mov (4) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub { NoDDClr }
(f0.1) mov (4) Mbaff_TC0(0,4)<1> r[ECM_AddrReg, bTc0_v00_1_Cr]<4;4,1>:ub { NoDDChk }
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------
BYPASS_V0_UV:
// Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.
// Same alpha and beta for all internal vert and horiz edges
//---------- Deblock U internal vert middle edge ----------
//***** Need to take every other bit to form U or V maskA
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]<0;1,0>:uw RRampW(0)
// p1 = Cur MB U row 2
// p0 = Cur MB U row 3
// q0 = Cur MB U row 4
// q1 = Cur MB U row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDClr } // Skip 2 U rows and 2 V rows
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v02_Cb]<1;2,0>:ub
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
// Store MaskA and MaskB
mov (1) f0.1:uw 0:w
mov (1) MaskB:uw 0:w { NoDDClr }
mov (1) MaskA:uw f0.0:uw { NoDDChk }
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------
//---------- Deblock V internal vert middle edge ----------
// P1 = Cur MB V row 2
// P0 = Cur MB V row 3
// Q0 = Cur MB V row 4
// Q1 = Cur MB V row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDClr } // Skip 2 U rows and 2 V rows
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDChk }
// Put MaskA into f0.0
// Put MaskB into f0.1
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v02_Cr]<1;2,0>:ub
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------

View File

@@ -0,0 +1,264 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
////////// AVC ILDB filter horizontal Mbaff Y ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of Y.
//
// It sssumes the data for horizontal de-blocking is already transposed.
//
// Luma:
//
// +-------+-------+-------+-------+ H0 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+ H1 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+ H2 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+ H3 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBB:w
#endif
//========== Luma deblocking ==========
//---------- Deblock Y external top edge (H0) ----------
// Bypass deblocking if it is the top edge of the picture.
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterTopMbEdgeFlag:w // Check for FilterTopMbEdgeFlag
mov (1) f0.1:w DualFieldMode:w // Check for dual field mode
// Non dual field mode
// Get (alpha >> 2) + 2
shr (16) Mbaff_ALPHA2(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Y]<0;1,0>:ub 2:w // alpha >> 2
mov (2) MaskA<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw
// Ext Y
mov (16) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Y]<0;1,0>:ub
mov (16) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Y]<0;1,0>:ub
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Y]<1;4,0>:ub
add (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA2(0,0)<16;16,1> 2:w // alpha2 = (alpha >> 2) + 2
(f0.0) jmpi H0_Y_DONE // Skip Ext Y deblocking
(f0.1) jmpi DUAL_FIELD_Y
mov (1) P_AddrReg:w PREV_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_Y_BASE:w { NoDDChk }
CALL(FILTER_Y_MBAFF, 1) // Non dual field deblocking
jmpi H0_Y_DONE
DUAL_FIELD_Y:
// Dual field mode, FieldModeCurrentMbFlag=0 && FieldModeAboveMbFlag=1
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+64:w { NoDDChk }
// Must use PREV_MB_YW. TOP_MB_YW is not big enough.
// Get top field rows
mov (16) ABOVE_CUR_MB_YW(0)<1> PREV_MB_YW(0, 0)<16;8,1> // Copy p3, p2
mov (16) ABOVE_CUR_MB_YW(1)<1> PREV_MB_YW(2, 0)<16;8,1> // Copy p1, p0
mov (16) ABOVE_CUR_MB_YW(2)<1> SRC_YW(0, 0)<16;8,1> // Copy q0, q1
mov (16) ABOVE_CUR_MB_YW(3)<1> SRC_YW(2, 0)<16;8,1> // Copy q2, q3
CALL(FILTER_Y_MBAFF, 1) // Ext Y, top field
// Save deblocked top field rows
mov (8) PREV_MB_YW(1, 0)<1> ABOVE_CUR_MB_YW(0, 8) // Copy p2
mov (8) PREV_MB_YW(2, 0)<1> ABOVE_CUR_MB_YW(1, 0) // Copy p1
mov (8) PREV_MB_YW(3, 0)<1> ABOVE_CUR_MB_YW(1, 8) // Copy p0
mov (8) SRC_YW(0, 0)<1> ABOVE_CUR_MB_YW(2, 0) // Copy q0
mov (8) SRC_YW(1, 0)<1> ABOVE_CUR_MB_YW(2, 8) // Copy q1
mov (8) SRC_YW(2, 0)<1> ABOVE_CUR_MB_YW(3, 0) // Copy q2
//==================================================================================
// Bottom field
// Get (alpha >> 2) + 2
shr (16) Mbaff_ALPHA2(0,0)<1> r[ECM_AddrReg, bAlphaTop1_Y]<0;1,0>:ub 2:w // alpha >> 2
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+64:w { NoDDChk }
// Get bot field rows
mov (16) ABOVE_CUR_MB_YW(0)<1> PREV_MB_YW(0, 8)<16;8,1> // Copy p3, p2
mov (16) ABOVE_CUR_MB_YW(1)<1> PREV_MB_YW(2, 8)<16;8,1> // Copy p1, p0
mov (16) ABOVE_CUR_MB_YW(2)<1> SRC_YW(0, 8)<16;8,1> // Copy q0, q1
mov (16) ABOVE_CUR_MB_YW(3)<1> SRC_YW(2, 8)<16;8,1> // Copy q2, q3
mov (2) MaskA<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz1]<2;2,1>:uw
mov (16) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop1_Y]<0;1,0>:ub
mov (16) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop1_Y]<0;1,0>:ub
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_1_Y]<1;4,0>:ub
add (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA2(0,0)<16;16,1> 2:w // alpha2 = (alpha >> 2) + 2
CALL(FILTER_Y_MBAFF, 1) // Ext Y, bot field
// Save deblocked top field rows
mov (8) PREV_MB_YW(1, 8)<1> ABOVE_CUR_MB_YW(0, 8) // Copy p2
mov (8) PREV_MB_YW(2, 8)<1> ABOVE_CUR_MB_YW(1, 0) // Copy p1
mov (8) PREV_MB_YW(3, 8)<1> ABOVE_CUR_MB_YW(1, 8) // Copy p0
mov (8) SRC_YW(0, 8)<1> ABOVE_CUR_MB_YW(2, 0) // Copy q0
mov (8) SRC_YW(1, 8)<1> ABOVE_CUR_MB_YW(2, 8) // Copy q1
mov (8) SRC_YW(2, 8)<1> ABOVE_CUR_MB_YW(3, 0) // Copy q2
//==================================================================================
H0_Y_DONE:
//BYPASS_H0_Y:
//------------------------------------------------------------------
// Same alpha, alpha2, beta and MaskB for all internal edges
// Get (alpha >> 2) + 2
shr (16) Mbaff_ALPHA2(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub 2:w // alpha >> 2
// alpha = bAlphaInternal_Y
// beta = bBetaInternal_Y
mov (16) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub
mov (16) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Y]<0;1,0>:ub
mov (1) MaskB:uw 0:w // Set MaskB = 0 for all 3 edges, so it always uses bS < 4 algorithm.
add (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA2(0,0)<16;16,1> 2:w // alpha2 = (alpha >> 2) + 2
//---------- Deblock Y internal top edge (H1) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_H1_Y
// p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntTopHorz]:uw
// tc0 has bTc0_h13_Y + bTc0_h12_Y + bTc0_h11_Y + bTc0_h10_Y
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h10_Y]<1;4,0>:ub
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
//BYPASS_H1_Y:
//------------------------------------------------------------------
//---------- Deblock Y internal mid horizontal edge (H2) ----------
// Bypass deblocking if FilterInternal8x8EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal8x8EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_H2_Y
// p3 = Cur MB Y row 4 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 5 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 6 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 7 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 8 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 9 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw
// tc0 has bTc0_h23_Y + bTc0_h22_Y + bTc0_h21_Y + bTc0_h20_Y
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h20_Y]<1;4,0>:ub
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
//BYPASS_H2_Y:
//-----------------------------------------------
//---------- Deblock Y internal bottom edge (H3) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_H3_Y
// p3 = Cur MB Y row 8 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 9 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw
// tc0 has bTc0_h33_Y + bTc0_h32_Y + bTc0_h31_Y + bTc0_h30_Y
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h30_Y]<1;4,0>:ub
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
//BYPASS_H3_Y:
//-----------------------------------------------

View File

@@ -0,0 +1,299 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
////////// AVC ILDB filter vertical Mbaff Y ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all vertical edges of Y.
//
// It sssumes the data for vertical de-blocking is already transposed.
//
// Luma:
//
// +-------+-------+-------+-------+
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
//
// V0 V1 V2 V3
// Edge Edge Edge Edge
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBB:w
#endif
//========== Luma deblocking ==========
//---------- Deblock Y external left edge (V0) ----------
cmp.z.f0.0 (8) null:w VertEdgePattern:uw LEFT_FIELD_CUR_FRAME:w
cmp.z.f0.1 (8) null:w VertEdgePattern:uw LEFT_FRAME_CUR_FIELD:w
// Intial set for both are frame or field
mov (16) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub
mov (16) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Y]<1;4,0>:ub
// For FieldModeCurrentMbFlag=1 && FieldModeLeftMbFlag=0
(f0.0) mov (8) Mbaff_ALPHA(0,0)<2> r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub { NoDDClr }
(f0.0) mov (8) Mbaff_ALPHA(0,1)<2> r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub { NoDDChk }
(f0.0) mov (8) Mbaff_BETA(0,0)<2> r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub { NoDDClr }
(f0.0) mov (8) Mbaff_BETA(0,1)<2> r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub { NoDDChk }
(f0.0) mov (8) Mbaff_TC0(0,0)<2> r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub { NoDDClr }
(f0.0) mov (8) Mbaff_TC0(0,1)<2> r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub { NoDDChk }
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterLeftMbEdgeFlag:w // Check for FilterLeftMbEdgeFlag
// For FieldModeCurrentMbFlag=0 && FieldModeLeftMbFlag=1
(f0.1) mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub { NoDDClr }
(f0.1) mov (8) Mbaff_ALPHA(0,8)<1> r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub { NoDDChk }
(f0.1) mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub { NoDDClr }
(f0.1) mov (8) Mbaff_BETA(0,8)<1> r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub { NoDDChk }
(f0.1) mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub { NoDDClr }
(f0.1) mov (8) Mbaff_TC0(0,8)<1> r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub { NoDDChk }
// Get (alpha >> 2) + 2
shr (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA(0) 2:w // alpha >> 2
// p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
// p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
// p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
// p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 0 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 1 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 2 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 3 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w PREV_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_Y_BASE:w { NoDDChk }
// Set MaskA and MaskB
mov (2) MaskA<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<2;2,1>:uw
add (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA2(0,0)<16;16,1> 2:w // alpha2 = (alpha >> 2) + 2
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
//BYPASS_V0_Y:
//------------------------------------------------------------------
/*
//---------- Deblock Y external left edge (V0) ----------
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterLeftMbEdgeFlag:w // Check for FilterLeftMbEdgeFlag
(f0.0) jmpi ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_Y)
// Get vertical border edge control data
// mov (1) f0.0 0:w
and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
cmp.z.f0.0 (1) null:w CTemp1_W:uw LEFT_FIELD_CUR_FRAME:w
(-f0.0) jmpi LEFT_EDGE_Y_NEXT1
// For FieldModeCurrentMbFlag=1 && FieldModeLeftMbFlag=0
mov (8) Mbaff_ALPHA(0,0)<2> r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub { NoDDClr }
mov (8) Mbaff_ALPHA(0,1)<2> r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub { NoDDChk }
mov (8) Mbaff_BETA(0,0)<2> r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub { NoDDClr }
mov (8) Mbaff_BETA(0,1)<2> r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub { NoDDChk }
mov (8) Mbaff_TC0(0,0)<2> r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub { NoDDClr }
mov (8) Mbaff_TC0(0,1)<2> r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub { NoDDChk }
jmpi LEFT_EDGE_Y_ALPHA_BETA_TC0_SELECTED
LEFT_EDGE_Y_NEXT1:
cmp.z.f0.0 (1) null:w CTemp1_W:uw LEFT_FRAME_CUR_FIELD:w
(-f0.0) jmpi LEFT_EDGE_Y_NEXT2
// For FieldModeCurrentMbFlag=0 && FieldModeLeftMbFlag=1
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub { NoDDClr }
mov (8) Mbaff_ALPHA(0,8)<1> r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub { NoDDChk }
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub { NoDDClr }
mov (8) Mbaff_BETA(0,8)<1> r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub { NoDDChk }
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub { NoDDClr }
mov (8) Mbaff_TC0(0,8)<1> r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub { NoDDChk }
jmpi LEFT_EDGE_Y_ALPHA_BETA_TC0_SELECTED
LEFT_EDGE_Y_NEXT2:
// both are frame or field
mov (16) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub
mov (16) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Y]<1;4,0>:ub
LEFT_EDGE_Y_ALPHA_BETA_TC0_SELECTED:
mov (2) MaskA<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<2;2,1>:uw
// p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
// p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
// p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
// p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 0 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 1 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 2 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 3 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w PREV_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_Y_BASE:w { NoDDChk }
// Get (alpha >> 2) + 2
shr (16) Mbaff_ALPHA2(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub 2:w // alpha >> 2
add (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA2(0,0)<16;16,1> 2:w // alpha2 = (alpha >> 2) + 2
CALL(FILTER_Y_MBAFF, 1)
ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_Y):
//------------------------------------------------------------------
*/
// Same alpha, alpha2, beta and MaskB for all internal edges
// Get (alpha >> 2) + 2
shr (16) Mbaff_ALPHA2(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub 2:w // alpha >> 2
// alpha = bAlphaInternal_Y
// beta = bBetaInternal_Y
mov (16) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub
mov (16) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Y]<0;1,0>:ub
mov (1) MaskB:uw 0:w // Set MaskB = 0 for all 3 edges, so it always uses bS < 4 algorithm.
add (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA2(0,0)<16;16,1> 2:w // alpha2 = (alpha >> 2) + 2
//---------- Deblock Y internal left edge (V1) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_V1_Y
// p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]:uw
// tc0 has bTc0_v31_Y + bTc0_v21_Y + bTc0_v11_Y + bTc0_v01_Y
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v01_Y]<1;4,0>:ub
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
BYPASS_V1_Y:
//------------------------------------------------------------------
//---------- Deblock Y internal mid vert edge (V2) ----------
// Bypass deblocking if FilterInternal8x8EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal8x8EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_V2_Y
// p3 = Cur MB Y row 4 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 5 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 6 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 7 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 8 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 9 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]:uw
// tc0 has bTc0_v32_Y + bTc0_v22_Y + bTc0_v12_Y + bTc0_v02_Y
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v02_Y]<1;4,0>:ub
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
BYPASS_V2_Y:
//-----------------------------------------------
//---------- Deblock Y interal right edge (V3) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_V3_Y
// p3 = Cur MB Y row 8 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 9 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntRightVert]:uw
// tc0 has bTc0_v33_Y + bTc0_v23_Y + bTc0_v13_Y + bTc0_v03_Y
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v03_Y]<1;4,0>:ub
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
BYPASS_V3_Y:
//-----------------------------------------------

View File

@@ -0,0 +1,175 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
////////// AVC ILDB filter horizontal UV ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of UV.
//
// It sssumes the data for horizontal de-blocking is already transposed.
//
// Chroma:
//
// +-------+-------+ H0 Edge
// | | |
// | | |
// | | |
// +-------+-------+ H1 Edge
// | | |
// | | |
// | | |
// +-------+-------+
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBC:w
#endif
//=============== Chroma deblocking ================
//---------- Deblock U external top edge ----------
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterTopMbEdgeFlag:w // Check for FilterTopMbEdgeFlag
// (f0.0) jmpi BYPASS_EXT_TOP_EDGE_UV
// Get horizontal border edge control data.
//***** Need to take every other bit to form U maskA and mask B
// Get Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<0;1,0>:uw RRampW(0)
shr (16) TempRow1(0)<1> r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz0]<0;1,0>:uw RRampW(0)
(f0.0) jmpi ILDB_LABEL(BYPASS_EXT_TOP_EDGE_UV)
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
//---------- Deblock U external edge ----------
// p1 = Prev MB U row 0
// p0 = Prev MB U row 1
// q0 = Cur MB U row 0
// q1 = Cur MB U row 1
// mov (1) P_AddrReg:w PREV_MB_U_BASE:w { NoDDClr }
mov (1) P_AddrReg:w TOP_MB_U_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_U_BASE:w { NoDDChk }
// alpha = bAlphaTop0_Cb, beta = bBetaTop0_Cb
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaTop0_Cb]<2;2,1>:ub { NoDDClr }
// tc0 has bTc0_h03_0_Cb + bTc0_h02_0_Cb + bTc0_h01_0_Cb + bTc0_h00_0_Cb
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_h00_0_Cb]<4;4,1>:ub { NoDDChk }
// UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV, 1)
//---------- Deblock V external top edge ----------
// p1 = Prev MB V row 0
// p0 = Prev MB V row 1
// q0 = Cur MB V row 0
// q1 = Cur MB V row 1
// mov (1) P_AddrReg:w PREV_MB_V_BASE:w { NoDDClr }
mov (1) P_AddrReg:w TOP_MB_V_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_V_BASE:w { NoDDChk }
// alpha = bAlphaTop0_Cr, beta = bBetaTop0_Cr
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaTop0_Cr]<2;2,1>:ub { NoDDClr }
// tc0 has bTc0_h03_0_Cr + bTc0_h02_0_Cr + bTc0_h01_0_Cr + bTc0_h00_0_Cr
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_h00_0_Cr]<4;4,1>:ub { NoDDChk }
// UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV, 1)
ILDB_LABEL(BYPASS_EXT_TOP_EDGE_UV):
// Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
// and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_4x4_DEBLOCK_H
//---------- Deblock U internal horz middle edge ----------
//***** Need to take every other bit to form U maskA
// Get Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]<0;1,0>:uw RRampW(0)
// p1 = Cur MB U row 2
// p0 = Cur MB U row 3
// q0 = Cur MB U row 4
// q1 = Cur MB U row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDClr } // Skip 2 U rows and 2 V rows
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDChk }
// alpha = bAlphaInternal_Cb, beta = bBetaInternal_Cb
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaInternal_Cb]<2;2,1>:ub { NoDDClr }
// tc0 has bTc0_h23_Cb + bTc0_h22_Cb + bTc0_h21_Cb + bTc0_h20_Cb
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_h20_Cb]<4;4,1>:ub { NoDDChk }
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
// UV MaskA and MaskB
mov (1) f0.1:uw 0:w
mov (1) MaskB:uw 0:w { NoDDClr }
mov (1) MaskA:uw f0.0:uw { NoDDChk }
CALL(FILTER_UV, 1)
//---------- Deblock V internal horz middle edge ----------
// p1 = Cur MB V row 2
// p0 = Cur MB V row 3
// q0 = Cur MB V row 4
// q1 = Cur MB V row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDClr } // Skip 2 U rows and 2 V rows
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDChk }
// alpha = bAlphaInternal_Cr, beta = bBetaInternal_Cr
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaInternal_Cr]<2;2,1>:ub { NoDDClr }
// tc0 has bTc0_h23_Cr + bTc0_h22_Cr + bTc0_h21_Cr + bTc0_h20_Cr
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_h20_Cr]<4;4,1>:ub { NoDDChk }
// UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV, 1)
//BYPASS_4x4_DEBLOCK_H:

View File

@@ -0,0 +1,175 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
////////// AVC LDB filter vertical UV ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all vertical edges of UV.
//
// It sssumes the data for vertical de-blocking is already transposed.
//
// Chroma:
//
// +-------+-------+
// | | |
// | | |
// | | |
// +-------+-------+
// | | |
// | | |
// | | |
// +-------+-------+
//
// V0 V1
// Edge Edge
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBC:w
#endif
//=============== Chroma deblocking ================
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterLeftMbEdgeFlag:w // Check for FilterLeftMbEdgeFlag
// (f0.0) jmpi BYPASS_EXT_LEFT_EDGE_UV
// Get vertical border edge control data.
// Get Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<0;1,0>:uw RRampW(0)
shr (16) TempRow1(0)<1> r[ECM_AddrReg, wEdgeCntlMapB_ExtLeftVert0]<0;1,0>:uw RRampW(0)
(f0.0) jmpi ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_UV)
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
//---------- Deblock U external edge ----------
// p1 = Prev MB U row 0
// p0 = Prev MB U row 1
// q0 = Cur MB U row 0
// q1 = Cur MB U row 1
mov (1) P_AddrReg:w PREV_MB_U_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_U_BASE:w { NoDDChk }
// alpha = bAlphaLeft0_Cb, beta = bBetaLeft0_Cb
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaLeft0_Cb]<2;2,1>:ub { NoDDClr }
// tc0 has bTc0_v30_0_Cb + bTc0_v20_0_Cb + bTc0_v10_0_Cb + bTc0_v00_0_Cb
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub { NoDDChk }
// UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV, 1)
//---------- Deblock V external edge ----------
// p1 = Prev MB V row 0
// p0 = Prev MB V row 1
// q0 = Cur MB V row 0
// q1 = Cur MB V row 1
mov (1) P_AddrReg:w PREV_MB_V_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_V_BASE:w { NoDDChk }
// for vert edge: alpha = bAlphaLeft0_Cr, beta = bBetaLeft0_Cr
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaLeft0_Cr]<2;2,1>:ub { NoDDClr }
// tc0 has bTc0_v30_0_Cr + bTc0_v20_0_Cr + bTc0_v10_0_Cr + bTc0_v00_0_Cr
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub { NoDDChk }
// UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV, 1)
ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_UV):
// Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.
// Same alpha and beta for all internal vert and horiz edges
//***** Need to take every other bit to form U or V maskA
// Get Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]<0;1,0>:uw RRampW(0)
//---------- Deblock U internal edge ----------
// p1 = Cur MB U row 2
// p0 = Cur MB U row 3
// q0 = Cur MB U row 4
// q1 = Cur MB U row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDChk }
// alpha = bAlphaInternal_Cb, beta = bBetaInternal_Cb
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaInternal_Cb]<2;2,1>:ub { NoDDClr }
// tc0 has bTc0_v32_Cb + bTc0_v22_Cb + bTc0_v12_Cb + bTc0_v02_Cb
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_v02_Cb]<4;4,1>:ub { NoDDChk }
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
// UV MaskA and MaskB
mov (1) f0.1:uw 0:w
mov (1) MaskB:uw 0:w { NoDDClr }
mov (1) MaskA:uw f0.0:uw { NoDDChk }
CALL(FILTER_UV, 1)
//---------- Deblock V internal edge ----------
// P1 = Cur MB V row 2
// P0 = Cur MB V row 3
// Q0 = Cur MB V row 4
// Q1 = Cur MB V row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDChk }
// alpha = bAlphaInternal_Cr, beta = bBetaInternal_Cr
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaInternal_Cr]<2;2,1>:ub { NoDDClr }
// tc0 has bTc0_v32_Cr + bTc0_v22_Cr + bTc0_v12_Cr + bTc0_v02_Cr
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_v02_Cr]<4;4,1>:ub { NoDDChk }
// UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV, 1)
//BYPASS_4x4_DEBLOCK_V:

View File

@@ -0,0 +1,229 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
////////// AVC ILDB filter horizontal Y ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of Y.
//
// It sssumes the data for horizontal de-blocking is already transposed.
//
// Luma:
//
// +-------+-------+-------+-------+ H0 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+ H1 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+ H2 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+ H3 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBB:w
#endif
//========== Luma deblocking ==========
//---------- Deblock Y external top edge (H0) ----------
// Bypass deblocking if it is the top edge of the picture.
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterTopMbEdgeFlag:w // Check for FilterTopMbEdgeFlag
// and.z.f0.1 (1) null:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]:uw 0xFFFF:uw // MaskA = 0?
// Get (alpha >> 2) + 2
shr (1) alpha2:w r[ECM_AddrReg, bAlphaTop0_Y]:ub 2:w // alpha >> 2
// p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
// p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
// p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
// p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 0 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 1 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 2 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 3 = r[Q_AddrReg, 48]<16;16,1>
// mov (1) P_AddrReg:w PREV_MB_Y_BASE:w { NoDDClr }
mov (1) P_AddrReg:w TOP_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_Y_BASE:w { NoDDChk }
// Get horizontal border edge control data
// alpha = bAlphaTop0_Y
// beta = bBetaTop0_Y
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaTop0_Y]<2;2,1>:ub { NoDDClr } // 2 channels for alpha and beta
mov (2) MaskA<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw { NoDDClr, NoDDChk }
// tc0 has bTc0_h03_0_Y | bTc0_h02_0_Y | bTc0_h01_0_Y | bTc0_h00_0_Y
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_h00_0_Y]<4;4,1>:ub { NoDDChk }
// (f0.0) jmpi BYPASS_EXT_TOP_EDGE_Y
// (f0.0.anyv) jmpi BYPASS_EXT_TOP_EDGE_Y
add (1) alpha2:w alpha2:w 2:w // alpha2 = (alpha >> 2) + 2
// CALL(FILTER_Y, 1)
PRED_CALL(-f0.0, FILTER_Y, 1)
//BYPASS_EXT_TOP_EDGE_Y:
//------------------------------------------------------------------
// Same alpha, alpha2, beta and MaskB for all internal edges
// Get (alpha >> 2) + 2
shr (1) alpha2:w r[ECM_AddrReg, bAlphaInternal_Y]:ub 2:w // alpha >> 2
// alpha = bAlphaInternal_Y
// beta = bBetaInternal_Y
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaInternal_Y]<2;2,1>:ub { NoDDClr }
// Set MaskB = 0 for all 3 int edges, so it always uses bS < 4 algorithm.
mov (1) MaskB:uw 0:w { NoDDChk }
add (1) alpha2:w alpha2:w 2:w // alpha2 = (alpha >> 2) + 2
//---------- Deblock Y internal top edge (H1) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntTopHorz]:uw { NoDDClr }
// tc0 has bTc0_h13_Y + bTc0_h12_Y + bTc0_h11_Y + bTc0_h10_Y
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_h10_Y]<4;4,1>:ub { NoDDChk }
// (f0.0) jmpi BYPASS_4x4_DEBLOCK_H
// (f0.0.anyv) jmpi BYPASS_4x4_DEBLOCK_H
// CALL(FILTER_Y, 1)
PRED_CALL(-f0.0, FILTER_Y, 1)
//BYPASS_4x4_DEBLOCK_H:
//------------------------------------------------------------------
//---------- Deblock Y internal mid horizontal edge (H2) ----------
// Bypass deblocking if FilterInternal8x8EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal8x8EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// and.z.f0.1 (1) null:uw r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw 0xFFFF:uw // MaskA = 0?
// p3 = Cur MB Y row 4 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 5 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 6 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 7 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 8 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 9 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw { NoDDClr }
// mov (1) MaskB:uw 0:w // Set MaskB = 0, so it always uses bS < 4 algorithm.
// tc0 has bTc0_h23_Y + bTc0_h22_Y + bTc0_h21_Y + bTc0_h20_Y
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_h20_Y]<4;4,1>:ub { NoDDChk }
// (f0.0) jmpi BYPASS_8x8_DEBLOCK_H
// (f0.0.anyv) jmpi BYPASS_8x8_DEBLOCK_H
// CALL(FILTER_Y, 1)
PRED_CALL(-f0.0, FILTER_Y, 1)
//BYPASS_8x8_DEBLOCK_H:
//-----------------------------------------------
//---------- Deblock Y internal bottom edge (H3) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// and.z.f0.1 (1) null:uw r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw 0xFFFF:uw // MaskA = 0?
// p3 = Cur MB Y row 8 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 9 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw { NoDDClr }
// mov (1) MaskB:uw 0:w // Set MaskB = 0, so it always uses bS < 4 algorithm.
// tc0 has bTc0_h33_Y + bTc0_h32_Y + bTc0_h31_Y + bTc0_h30_Y
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_h30_Y]<4;4,1>:ub { NoDDChk }
// (f0.0) jmpi BYPASS_4x4_DEBLOCK_H2
// (f0.0.anyv) jmpi BYPASS_4x4_DEBLOCK_H2
// CALL(FILTER_Y, 1)
PRED_CALL(-f0.0, FILTER_Y, 1)
//BYPASS_4x4_DEBLOCK_H2:
//-----------------------------------------------

View File

@@ -0,0 +1,233 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
////////// AVC ILDB filter vertical Y ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all vertical edges of Y.
//
// It sssumes the data for vertical de-blocking is already transposed.
//
// Luma:
//
// +-------+-------+-------+-------+
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
//
// V0 V1 V2 V3
// Edge Edge Edge Edge
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBB:w
#endif
//========== Luma deblocking ==========
//---------- Deblock Y external left edge (V0) ----------
// Bypass deblocking if it is left edge of the picture.
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterLeftMbEdgeFlag:w // Check for FilterLeftMbEdgeFlag
// and.z.f0.1 (1) null:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]:uw 0xFFFF:uw // MaskA = 0?
// Get (alpha >> 2) + 2
shr (1) alpha2:w r[ECM_AddrReg, bAlphaLeft0_Y]:ub 2:w // alpha >> 2
// p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
// p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
// p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
// p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 0 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 1 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 2 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 3 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w PREV_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_Y_BASE:w { NoDDChk }
// Get vertical border edge control data
// alpha = bAlphaLeft0_Y
// beta = bBetaLeft0_Y
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaLeft0_Y]<2;2,1>:ub { NoDDClr } // 2 channels for alpha and beta
mov (2) MaskA<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<2;2,1>:uw { NoDDClr, NoDDChk }
// tc0 has bTc0_v30_0_Y | bTc0_v20_0_Y | bTc0_v10_0_Y | bTc0_v00_0_Y
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_v00_0_Y]<4;4,1>:ub { NoDDChk }
// (f0.0) jmpi BYPASS_EXT_LEFT_EDGE_Y
// (f0.0.anyv) jmpi BYPASS_EXT_LEFT_EDGE_Y
add (1) alpha2:w alpha2:w 2:w // alpha2 = (alpha >> 2) + 2
// CALL(FILTER_Y, 1)
PRED_CALL(-f0.0, FILTER_Y, 1)
//BYPASS_EXT_LEFT_EDGE_Y:
//------------------------------------------------------------------
// Same alpha, alpha2, beta and MaskB for all internal edges
// Get (alpha >> 2) + 2
shr (1) alpha2:w r[ECM_AddrReg, bAlphaInternal_Y]:ub 2:w // alpha >> 2
// alpha = bAlphaInternal_Y
// beta = bBetaInternal_Y
mov (2) alpha<1>:w r[ECM_AddrReg, bAlphaInternal_Y]<2;2,1>:ub { NoDDClr }
// Set MaskB = 0 for all 3 int edges, so it always uses bS < 4 algorithm.
mov (1) MaskB:uw 0:w { NoDDChk }
add (1) alpha2:w alpha2:w 2:w // alpha2 = (alpha >> 2) + 2
//---------- Deblock Y internal left edge (V1) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// and.z.f0.1 (1) null:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]:uw 0xFFFF:uw // MaskA = 0?
// p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]:uw { NoDDClr }
// tc0 has bTc0_v31_Y + bTc0_v21_Y + bTc0_v11_Y + bTc0_v01_Y
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_v01_Y]<4;4,1>:ub { NoDDChk }
// (f0.0) jmpi BYPASS_4x4_DEBLOCK_V
// (f0.0.anyv) jmpi BYPASS_4x4_DEBLOCK_V
// CALL(FILTER_Y, 1)
PRED_CALL(-f0.0, FILTER_Y, 1)
//BYPASS_4x4_DEBLOCK_V:
//------------------------------------------------------------------
//---------- Deblock Y internal mid vert edge (V2) ----------
// Bypass deblocking if FilterInternal8x8EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal8x8EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// and.z.f0.1 (1) null:uw r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]:uw 0xFFFF:uw // MaskA = 0?
// p3 = Cur MB Y row 4 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 5 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 6 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 7 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 8 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 9 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]:uw { NoDDClr }
// mov (1) MaskB:uw 0:w // Set MaskB = 0, so it always uses bS < 4 algorithm.
// tc0 has bTc0_v32_Y + bTc0_v22_Y + bTc0_v12_Y + bTc0_v02_Y
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_v02_Y]<4;4,1>:ub { NoDDChk }
// (f0.0) jmpi BYPASS_8x8_DEBLOCK_V
// (f0.0.anyv) jmpi BYPASS_8x8_DEBLOCK_V
// CALL(FILTER_Y, 1)
PRED_CALL(-f0.0, FILTER_Y, 1)
//BYPASS_8x8_DEBLOCK_V:
//-----------------------------------------------
//---------- Deblock Y interal right edge (V3) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// and.z.f0.1 (1) null:uw r[ECM_AddrReg, wEdgeCntlMap_IntRightVert]:uw 0xFFFF:uw // MaskA = 0?
// p3 = Cur MB Y row 8 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 9 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntRightVert]:uw { NoDDClr }
// mov (1) MaskB:uw 0:w // Set MaskB = 0, so it always uses bS < 4 algorithm.
// tc0 has bTc0_v33_Y + bTc0_v23_Y + bTc0_v13_Y + bTc0_v03_Y
mov (4) tc0<1>:ub r[ECM_AddrReg, bTc0_v03_Y]<4;4,1>:ub { NoDDChk }
// (f0.0) jmpi BYPASS_4x4_DEBLOCK_V2
// (f0.0.anyv) jmpi BYPASS_4x4_DEBLOCK_V2
// CALL(FILTER_Y, 1)
PRED_CALL(-f0.0, FILTER_Y, 1)
//BYPASS_4x4_DEBLOCK_V2:
//-----------------------------------------------

View File

@@ -0,0 +1,87 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//========== Forward message to root thread through gateway ==========
// Each child thread write a byte into the root GRF r50 defiend in open Gataway.
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x7777:w
#endif
// Init payload to r0
mov (8) GatewayPayload<1>:ud 0:w //{ NoDDClr }
// Forward a message:
// Offset = x relative to r50 (defiend in open gataway), x = ORIX >> 4 [bit 28:16]
// Need to shift left 16
// shift 2 more bits for byte to word offset
//shl (1) Offset_Length:ud GateWayOffsetC:w 16:w { NoDDClr, NoDDChk }
shl (1) Offset_Length:ud GateWayOffsetC:w 18:w
// 2 bytes offset
add (1) Offset_Length:ud Offset_Length:ud 0x00020000:d { NoDDClr }
// Length = 1 byte, [bit 10:8 = 000]
//000 xxxxxxxxxxxxx 00000 000 00000000 ==> 000x xxxx xxxx xxxx 0000 0000 0000 0000
//mov (1) DispatchID:ub r0.20:ub // Dispatch ID
//Move in EUid and Thread ID that we received from the PARENT thread
mov (1) EUID_TID:uw r0.6:uw { NoDDClr, NoDDChk }
mov (1) GatewayPayloadKey:uw 0x1212:uw { NoDDClr, NoDDChk } // Key
//mov (4) GatewayPayload<1>:ud 0:ud { NoDDClr, NoDDChk } // Init payload low 4 dword
// Write back one byte (value = 0xFF) to root thread GRF to indicate this child thread is finished
// All lower 4 bytes must be assigned to the same byte value.
mov (4) GatewayPayload<1>:ub 0xFFFF:uw { NoDDChk }
// msg descriptor bit 15 set to '1' for notification
#ifdef GW_DCN
// For ILK, EOT bit should also be set to terminate the thread. This is to fix a timing related HW issue.
//
send (8) null:ud m0 GatewayPayload<8;8,1>:ud MSG_GW_EOT FWDMSGDSC+NOTIFYMSG
#else
send (8) null:ud m0 GatewayPayload<8;8,1>:ud MSG_GW FWDMSGDSC+NOTIFYMSG
#endif // GW_DCN
//========== Forward Msg Done ========================================

View File

@@ -0,0 +1,76 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//========== Forward message to root thread through gateway ==========
// Chroma root kenrel updates luma thread limit.
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x7788:w
#endif
// Init payload to r0
mov (8) GatewayPayload<1>:ud 0:w { NoDDClr }
// Forward a message:
// Offset = x relative to r50 (defiend in open gataway), x = ORIX >> 4 [bit 28:16]
// Need to shift left 16
mov (1) Offset_Length:ud THREAD_LIMIT_OFFSET:ud { NoDDClr, NoDDChk }
// Length = 1 byte, [bit 10:8 = 000]
//000 xxxxxxxxxxxxx 00000 000 00000000 ==> 000x xxxx xxxx xxxx 0000 0000 0000 0000
//mov (1) DispatchID:ub r0.20:ub // Dispatch ID
// Copy EUid and Thread ID that we received from the PARENT thread
mov (1) EUID_TID:uw r0.6:uw { NoDDClr, NoDDChk }
mov (1) GatewayPayloadKey:uw 0x1212:uw { NoDDChk } // Key
//mov (4) GatewayPayload<1>:ud 0:ud { NoDDClr, NoDDChk } // Init payload low 4 dword
// Write back one byte (value = 0xFF) to root thread GRF to indicate this child thread is finished
// All lower 4 bytes must be assigned to the same byte value.
add (1) Temp1_W:w MaxThreads:uw -OutstandingThreads:uw
mov (4) GatewayPayload<1>:ub Temp1_B<0;1,0>:ub
send (8) GatewayResponse:ud m0 GatewayPayload<8;8,1>:ud MSG_GW FWDMSGDSC
//========== Forward Msg Done ========================================

View File

@@ -0,0 +1,449 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#if !defined(__AVC_ILDB_LUMA_CORE__) // Make sure this file is only included once
#define __AVC_ILDB_LUMA_CORE__
////////// AVC ILDB Luma Core /////////////////////////////////////////////////////////////////////////////////
//
// This core performs AVC LUMA ILDB filtering on one horizontal edge (16 pixels) of a MB.
// If data is transposed, it can also de-block a vertical edge.
//
// Bafore calling this subroutine, caller needs to set the following parameters.
//
// - EdgeCntlMap1 // Edge control map A
// - EdgeCntlMap2 // Edge control map B
// - P_AddrReg // Src and dest address register for P pixels
// - Q_AddrReg // Src and dest address register for Q pixels
// - alpha // alpha corresponding to the edge to be filtered
// - beta // beta corresponding to the edge to be filtered
// - tc0 // tc0 corresponding to the edge to be filtered
//
//
// +----+----+----+----+----+----+----+----+
// | p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |
// +----+----+----+----+----+----+----+----+
//
// p3 = r[P_AddrReg, 0]<16;16,1>
// p2 = r[P_AddrReg, 16]<16;16,1>
// p1 = r[P_AddrReg, 32]<16;16,1>
// p0 = r[P_AddrReg, 48]<16;16,1>
// q0 = r[Q_AddrReg, 0]<16;16,1>
// q1 = r[Q_AddrReg, 16]<16;16,1>
// q2 = r[Q_AddrReg, 32]<16;16,1>
// q3 = r[Q_AddrReg, 48]<16;16,1>
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// The region is both src and dest
// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values
#undef P3
#undef P2
#undef P1
#undef P0
#undef Q0
#undef Q1
#undef Q2
#undef Q3
#define P3 r[P_AddrReg, 0]<16;16,1>:ub
#define P2 r[P_AddrReg, 16]<16;16,1>:ub
#define P1 r[P_AddrReg, 32]<16;16,1>:ub
#define P0 r[P_AddrReg, 48]<16;16,1>:ub
#define Q0 r[Q_AddrReg, 0]<16;16,1>:ub
#define Q1 r[Q_AddrReg, 16]<16;16,1>:ub
#define Q2 r[Q_AddrReg, 32]<16;16,1>:ub
#define Q3 r[Q_AddrReg, 48]<16;16,1>:ub
// New region as dest
#undef NewP2
#undef NewP1
#undef NewP0
#undef NewQ0
#undef NewQ1
#undef NewQ2
#define NewP2 r[P_AddrReg, 16]<1>:ub
#define NewP1 r[P_AddrReg, 32]<1>:ub
#define NewP0 r[P_AddrReg, 48]<1>:ub
#define NewQ0 r[Q_AddrReg, 0]<1>:ub
#define NewQ1 r[Q_AddrReg, 16]<1>:ub
#define NewQ2 r[Q_AddrReg, 32]<1>:ub
// Filter one luma edge
FILTER_Y:
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x1111:w
#endif
//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
// bS is in MaskA
// Src copy of the p3, p2, p1, p0, q0, q1, q2, q3
// mov (16) p0123_W(0)<1> r[P_AddrReg]<16;16,1>:uw
// mov (16) p0123_W(1)<1> r[P_AddrReg, 32]<16;16,1>:uw
// mov (16) q0123_W(0)<1> r[Q_AddrReg]<16;16,1>:uw
// mov (16) q0123_W(1)<1> r[Q_AddrReg, 32]<16;16,1>:uw
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
add (16) q0_p0(0)<1> Q0 -P0 // q0-p0
add (16) TempRow0(0)<1> P1 -P0 // p1-p0
add (16) TempRow1(0)<1> Q1 -Q0 // q1-q0
// Build FilterSampleFlag
// abs(q0-p0) < alpha
(f0.0) cmp.l.f0.0 (16) null:w (abs)q0_p0(0) alpha:w
// abs(p1-p0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow0(0) beta:w
// abs(q1-q0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow1(0) beta:w
//-----------------------------------------------------------------------------------------
(f0.0) if (16) Y_ENDIF1
// For channels whose edge control map1 = 1 ---> perform de-blocking
// mov (1) f0.1:uw MaskB:uw {NoMask} // Now check for which algorithm to apply
// (abs)ap = |p2-p0|
add (16) ap(0)<1> P2 -P0 // ap = p2-p0
// (abs)aq = |q2-q0|
add (16) aq(0)<1> Q2 -Q0 // aq = q2-q0
// Make a copy of unmodified p0 and p1 for use in q0'and q1' calculation
mov (16) p0123_W(1)<1> r[P_AddrReg, 32]<16;16,1>:uw {NoMask}
(f0.1) if (16) Y_ELSE2
// For channels whose edge control map2 = 1 ---> bS = 4 algorithm
// Compute q0', q1' and q2'
//-----------------------------------------------------------------------------
// bS = 4 Algorithm :
//
// gama = |p0-q0| < ((alpha >> 2) + 2)
// deltap = (ap<beta) && gama; // deep filter flag
// if (deltap) {
// p0' = ( p2 +2*p1 +2*p0 +2*q0 + q1 + 4) >> 3;
// p1' = ( p2 + p1 + p0 + q0 + 2) >> 2;
// p2' = (2*p3 +3*p2 + p1 + p0 + q0 + 4) >> 3;
// } else {
// p0' = ( 2*p1 + p0 + q1 + 2) >> 2;
// }
//-----------------------------------------------------------------------------
// gama = |p0-q0| < ((alpha >> 2) + 2) = |p0-q0| < alpha2
cmp.l.f0.1 (16) null:w (abs)q0_p0(0) alpha2:w
// Common P01 = p0 + p1
add (16) P0_plus_P1(0)<1> P0 P1
// Common Q01 = q0 + q1
add (16) Q0_plus_Q1(0)<1> Q0 Q1
// mov (1) CTemp1_W:w f0.1:uw {NoMask}
mov (1) f0.0:uw f0.1:uw {NoMask}
// deltap = ((abs)ap < beta) && gama
(f0.1) cmp.l.f0.1 (16) null:w (abs)ap(0) beta<0;1,0>:w // (abs)ap < beta ?
// deltaq = ((abs)aq < beta) && gama
(f0.0) cmp.l.f0.0 (16) null:w (abs)aq(0) beta<0;1,0>:w // (abs)aq < beta ?
// mov (1) CTemp1_W:w f0.0:uw {NoMask} // gama = |p0-q0| < ((alpha >> 2) + 2) for each channel
// and (1) f0.1:w f0.1:uw CTemp1_W:w {NoMask} // deltap = (ap<beta) && gama
(f0.1) if (16) Y_ELSE3 // for channels its deltap = true
add (16) P2_plus_P3(0)<1> P2 P3
// A = (p1 + p0) + q0 = P01 + q0
add (16) A(0)<1> P0_plus_P1(0) Q0 // A = P01 + q0
// Now acc0 = A
// B = p2 + (p1 + p0 + q0) + 4 = p2 + A + 4
// add (16) acc0.0<1>:w P2 4:w // p2 + 4
// add (16) BB(0)<1> acc0.0<16;16,1>:w A(0) // B = p2 + A + 4
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // p2 + 4
add (16) BB(0)<1> acc0.0<16;16,1>:w P2 // B = p2 + A + 4
// Now acc0 = B
// p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3
// mov (16) acc0.0<1>:w BB(0)
mac (16) acc0.0<1>:w P2_plus_P3(0) 2:w
shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
// p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2
add (16) acc0.0<1>:w BB(0) -2:w
shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w
// p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3
add (16) acc0.0<1>:w Q1 A(0) // B + A
add (16) acc0.0<1>:w acc0.0<16;16,1>:w BB(0) // B + A + q1
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w // (B + A + q1) >> 3
// p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3
// mov (16) acc0.0<1>:w BB(0)
// mac (16) acc0.0<1>:w P2_plus_P3(0) 2:w
// shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
mov (16) NewP2 TempRow3B(0) // p2'
mov (16) NewP1 TempRow1B(0) // p1'
mov (16) NewP0 TempRow0B(0) // p0'
Y_ELSE3:
else (16) Y_ENDIF3 // for channels its deltap = false
// p0' = (2*p1 + p0 + q1 + 2) >> 2 = (p1 + P01 + q1 + 2) >> 2
add (16) acc0.0<1>:w P1 P0_plus_P1(0) // p1 + P01 (TempRow1(0) = P01)
add (16) acc0.0<1>:w acc0.0<16;16,1>:w Q1
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 2:w // p1 + P01 + q1 + 2
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 2:w // >> 2
mov (16) NewP0 TempRow0B(0) // p0'
endif
Y_ENDIF3:
// Compute q0', q1' and q2'
//-----------------------------------------------------------------------------
// bS = 4 Algorithm (cont):
//
// deltaq = (aq<beta) && gama; // deep filter flag
// if (deltaq) {
// q0' = ( q2 +2*q1 +2*q0 +2*p0 + p1 + 4) >> 3;
// q1' = ( q2 + q1 + q0 + p0 + 2) >> 2;
// q2' = (2*q3 +3*q2 + q1 + q0 + p0 + 4) >> 3;
// } else {
// q0' = ( 2*q1 + q0 + p1 + 2) >> 2;
// }
// deltaq = ((abs)aq < beta) && gama
// cmp.l.f0.1 (16) null:w (abs)aq(0) beta<0;1,0>:w // (abs)aq < beta ?
// Common Q01 = q0 + q1
// add (16) Q0_plus_Q1(0)<1> Q0 Q1
// and (1) f0.1:w f0.1:uw CTemp1_W:w {NoMask} // deltaq = ((abs)ap < beta) && gama
(f0.0) if (16) Y_ELSE4 // for channels its deltaq = true
add (16) Q2_plus_Q3(0)<1> Q2 Q3
// A = (q1 + q0) + p0 = Q01 + p0
add (16) A(0)<1> Q0_plus_Q1(0) p0(0) // A = q1+q0 + p0
// Acc0 = A
// B = q2 + q1 + q0 + p0 + 4 = q2 + A + 4
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // q2 + 4
add (16) BB(0)<1> acc0.0<16;16,1>:w Q2 // B = q2 + A + 4
// Acc0 = B
// q2' = (2*q3 +3*q2 + A + 4) >> 3 = (2*(q3+q2) + B) >> 3
// mov (16) acc0.0<1>:w BB(0)
mac (16) acc0.0<1>:w Q2_plus_Q3(0) 2:w
shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
// q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2
add (16) acc0.0<1>:w BB(0) -2:w
shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w
// q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3
add (16) acc0.0<1>:w p1(0) A(0)
add (16) acc0.0<1>:w acc0.0<16;16,1>:w BB(0)
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w
mov (16) NewQ2 TempRow3B(0) // q2'
mov (16) NewQ1 TempRow1B(0) // q1'
mov (16) NewQ0 TempRow0B(0) // q0'
Y_ELSE4:
else (16) Y_ENDIF4 // for channels its deltaq = false
// q0' = (2*q1 + q0 + p1 + 2) >> 2 = (q1 + Q01 + p1 + 2) >> 2
// Use original p1 values in p1(0)
add (16) acc0.0<1>:w p1(0) Q0_plus_Q1(0) // p1 + P01 (TempRow1(0) = P01)
add (16) acc0.0<1>:w acc0.0<16;16,1>:w Q1
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 2:w // p1 + P01 + q1 + 2
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 2:w // >> 2
mov (16) NewQ0 TempRow0B(0) // q0'
endif
Y_ENDIF4:
// Done with bS = 4 algorithm
Y_ELSE2:
else (16) Y_ENDIF2
// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
//-----------------------------------------------------------------------------
// bS < 4 Algorithm :
// tc = tc0 + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)
// delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))
// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
// if (|p2-p0|<Beta)
// p1' = p1 + Clip3(-tc0, tc0, (p2 + ((p0+q0+1)>>1) - (p1<<1)) >> 1 )
// if (|q2-q0|<Beta)
// q1' = q1 + Clip3(-tc0, tc0, (q2 + ((p0+q0+1)>>1) - (q1<<1)) >> 1 )
//-----------------------------------------------------------------------------
// Expand tc0
mov (16) tc_exp(0)<1> tc0<1;4,0>:ub {NoMask}
mov (16) tc0_exp(0)<1> tc0<1;4,0>:ub {NoMask} // tc0_exp = tc0, each tc0 is duplicated 4 times for 4 adjcent 4 pixels
// tc_exp = tc0_exp + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)
// mov (16) tc_exp(0)<1> tc0_exp(0) // tc = tc0_exp first
cmp.l.f0.0 (16) null:w (abs)ap(0) beta:w // |p2-p0|< Beta ? ---> (abs)ap < Beta ?
cmp.l.f0.1 (16) null:w (abs)aq(0) beta:w // |q2-q0|< Beta ? ---> (abs)aq < Beta ?
//--- Use free cycles here ---
// delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))
// 4 * (q0-p0) + p1 - q1 + 4
add (16) acc0<1>:w P1 4:w // p1 + 4
mac (16) acc0<1>:w q0_p0(0) 4:w // 4 * (q0-p0) + p1 + 4
add (16) acc0<1>:w acc0<16;16,1>:w -Q1 // 4 * (q0-p0) + p1 - q1 + 4
shr (16) TempRow0(0)<1> acc0<16;16,1>:w 3:w
// Continue on getting tc_exp
(f0.0) add (16) tc_exp(0)<1> tc_exp(0) 1:w // tc0_exp + (|p2-p0|<Beta ? 1 : 0)
mov (2) CTemp1_W<1>:w f0.0<2;2,1>:w {NoMask} // Save |p2-p0|<Beta flag
(f0.1) add (16) tc_exp(0)<1> tc_exp(0) 1:w // tc_exp = tc0_exp + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)
// Continue on cliping tc to get delta
cmp.g.f0.0 (16) null:w TempRow0(0) tc_exp(0) // Clip if delta' > tc
cmp.l.f0.1 (16) null:w TempRow0(0) -tc_exp(0) // Clip if delta' < -tc
//--- Use free cycles here ---
// common = (p0+q0+1) >> 1 ---> TempRow2(0)
// Same as avg of p0 and q0
avg (16) TempRow2(0)<1> P0 Q0
// Continue on cliping tc to get delta
(f0.0) mov (16) TempRow0(0)<1> tc_exp(0)
(f0.1) mov (16) TempRow0(0)<1> -tc_exp(0)
//--- Use free cycles here ---
mov (2) f0.0<1>:w CTemp1_W<2;2,1>:w {NoMask} // CTemp1_W = (|p2-p0|<Beta)
// CTemp2_W = (|q2-q0|<Beta)
//-----------------------------------------------------------------------
// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
add.sat (16) TempRow1B(0)<2> P0 TempRow0(0) // p0+delta
add.sat (16) TempRow0B(0)<2> Q0 -TempRow0(0) // q0-delta
mov (16) NewP0 TempRow1B(0) // p0'
mov (16) NewQ0 TempRow0B(0) // q0'
//-----------------------------------------------------------------------
// Now compute p1' and q1'
// if (|p2-p0|<Beta)
// mov (1) f0.0:w CTemp1_W:w {NoMask} // CTemp1_W = (|p2-p0|<Beta)
(f0.0) if (16) Y_ENDIF6
// p1' = p1 + Clip3(-tc0, tc0, adj)
// adj = (p2 + common - (p1<<1)) >> 1 = (p2 + common - (p1*2)) >> 1
add (16) acc0<1>:w P2 TempRow2(0) // TempRow2(0) = common = (p0+q0+1) >> 1
mac (16) acc0<1>:w P1 -2:w
shr (16) TempRow1(0)<1> acc0<16;16,1>:w 1:w
// tc clip to get tc_adj
cmp.g.f0.0 (16) null:w TempRow1(0) tc0_exp(0) // Clip if delta' > tc
cmp.l.f0.1 (16) null:w TempRow1(0) -tc0_exp(0) // Clip if delta' < -tc
(f0.0) mov (16) TempRow1(0)<1> tc0_exp(0)
(f0.1) mov (16) TempRow1(0)<1> -tc0_exp(0)
//--- Use free cycles here ---
mov (1) f0.1:w CTemp2_W:w {NoMask} // CTemp2_W = (|q2-q0|<Beta)
// p1' = p1 + tc_adj
add.sat (16) TempRow1B(0)<2> P1 TempRow1(0) // p1+tc_adj
mov (16) NewP1 TempRow1B(0) // p1'
//------------------------------------------------------------------------
Y_ENDIF6:
endif
// if (|q2-q0|<Beta)
// mov (1) f0.1:w CTemp2_W:w {NoMask} // CTemp2_W = (|q2-q0|<Beta)
(f0.1) if (16) Y_ENDIF7
// q1' = q1 + Clip3(-tc0, tc0, adj)
// adj = (q2 + common - (q1<<1)) >> 1
// same as q2 + common - (q1 * 2)
add (16) acc0<1>:w Q2 TempRow2(0)
mac (16) acc0<1>:w Q1 -2:w
shr (16) TempRow1(0)<1> acc0<16;16,1>:w 1:w
// tc clip to get tc_adj
cmp.g.f0.0 (16) null:w TempRow1(0) tc0_exp(0) // Clip if delta' > tc
cmp.l.f0.1 (16) null:w TempRow1(0) -tc0_exp(0) // Clip if delta' < -tc
(f0.0) mov (16) TempRow1(0)<1> tc0_exp(0)
(f0.1) mov (16) TempRow1(0)<1> -tc0_exp(0)
// q1' = q1 + tc_adj
add.sat (16) TempRow1B(0)<2> Q1 TempRow1(0) // q1+tc_adj
mov (16) NewQ1 TempRow1B(0) // q1'
//------------------------------------------------------------------------
Y_ENDIF7:
endif
endif
Y_ENDIF2:
Y_ENDIF1:
endif
RETURN
#endif // !defined(__AVC_ILDB_LUMA_CORE__)

View File

@@ -0,0 +1,421 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#if !defined(__AVC_ILDB_LUMA_CORE_MBAFF__) // Make sure this file is only included once
#define __AVC_ILDB_LUMA_CORE_MBAFF__
////////// AVC ILDB Luma Core Mbaff /////////////////////////////////////////////////////////////////////////////////
//
// This core performs AVC LUMA ILDB filtering on one horizontal edge (16 pixels) of a MB.
// If data is transposed, it can also de-block a vertical edge.
//
// Bafore calling this subroutine, caller needs to set the following parameters.
//
// - EdgeCntlMap1 // Edge control map A
// - EdgeCntlMap2 // Edge control map B
// - P_AddrReg // Src and dest address register for P pixels
// - Q_AddrReg // Src and dest address register for Q pixels
// - alpha // alpha corresponding to the edge to be filtered
// - beta // beta corresponding to the edge to be filtered
// - tc0 // tc0 corresponding to the edge to be filtered
//
//
// +----+----+----+----+----+----+----+----+
// | p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |
// +----+----+----+----+----+----+----+----+
//
// p3 = r[P_AddrReg, 0]<16;16,1>
// p2 = r[P_AddrReg, 16]<16;16,1>
// p1 = r[P_AddrReg, 32]<16;16,1>
// p0 = r[P_AddrReg, 48]<16;16,1>
// q0 = r[Q_AddrReg, 0]<16;16,1>
// q1 = r[Q_AddrReg, 16]<16;16,1>
// q2 = r[Q_AddrReg, 32]<16;16,1>
// q3 = r[Q_AddrReg, 48]<16;16,1>
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// The region is both src and dest
// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values
#undef P3
#undef P2
#undef P1
#undef P0
#undef Q0
#undef Q1
#undef Q2
#undef Q3
#define P3 r[P_AddrReg, 0]<16;16,1>:ub
#define P2 r[P_AddrReg, 16]<16;16,1>:ub
#define P1 r[P_AddrReg, 32]<16;16,1>:ub
#define P0 r[P_AddrReg, 48]<16;16,1>:ub
#define Q0 r[Q_AddrReg, 0]<16;16,1>:ub
#define Q1 r[Q_AddrReg, 16]<16;16,1>:ub
#define Q2 r[Q_AddrReg, 32]<16;16,1>:ub
#define Q3 r[Q_AddrReg, 48]<16;16,1>:ub
// New region as dest
#undef NewP2
#undef NewP1
#undef NewP0
#undef NewQ0
#undef NewQ1
#undef NewQ2
#define NewP2 r[P_AddrReg, 16]<1>:ub
#define NewP1 r[P_AddrReg, 32]<1>:ub
#define NewP0 r[P_AddrReg, 48]<1>:ub
#define NewQ0 r[Q_AddrReg, 0]<1>:ub
#define NewQ1 r[Q_AddrReg, 16]<1>:ub
#define NewQ2 r[Q_AddrReg, 32]<1>:ub
// Filter one luma edge - mbaff
FILTER_Y_MBAFF:
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x1111:w
#endif
//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
// bS is in MaskA
// Src copy of the p3, p2, p1, p0, q0, q1, q2, q3
// mov (16) p0123_W(0)<1> r[P_AddrReg]<16;16,1>:uw
// mov (16) p0123_W(1)<1> r[P_AddrReg, 32]<16;16,1>:uw
// mov (16) q0123_W(0)<1> r[Q_AddrReg]<16;16,1>:uw
// mov (16) q0123_W(1)<1> r[Q_AddrReg, 32]<16;16,1>:uw
// Move MaskA and MaskB to flag regs
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
add (16) q0_p0(0)<1> Q0 -P0 // q0-p0
add (16) TempRow0(0)<1> P1 -P0 // p1-p0
add (16) TempRow1(0)<1> Q1 -Q0 // q1-q0
// abs(q0-p0) < alpha
(f0.0) cmp.l.f0.0 (16) null:w (abs)q0_p0(0) Mbaff_ALPHA(0)
// abs(p1-p0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow0(0) Mbaff_BETA(0)
// abs(q1-q0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow1(0) Mbaff_BETA(0)
//-----------------------------------------------------------------------------------------
(f0.0) if (16) MBAFF_Y_ENDIF1
// For channels whose edge control map1 = 1 ---> perform de-blocking
// mov (1) f0.1:uw MaskB:uw {NoMask} // Now check for which algorithm to apply
// (abs)ap = |p2-p0|
add (16) ap(0)<1> P2 -P0
// (abs)aq = |q2-q0|
add (16) aq(0)<1> Q2 -Q0
// Make a copy of unmodified p0 and p1 for use in q0'and q1' calculation
mov (16) p0123_W(1)<1> r[P_AddrReg, 32]<16;16,1>:uw {NoMask}
(f0.1) if (16) MBAFF_Y_ELSE2
// For channels whose edge control map2 = 1 ---> bS = 4 algorithm
// Compute q0', q1' and q2'
//-----------------------------------------------------------------------------
// bS = 4 Algorithm :
//
// gama = |p0-q0| < ((alpha >> 2) + 2)
// deltap = (ap<beta) && gama; // deep filter flag
// if (deltap) {
// p0' = ( p2 +2*p1 +2*p0 +2*q0 + q1 + 4) >> 3;
// p1' = ( p2 + p1 + p0 + q0 + 2) >> 2;
// p2' = (2*p3 +3*p2 + p1 + p0 + q0 + 4) >> 3;
// } else {
// p0' = ( 2*p1 + p0 + q1 + 2) >> 2;
// }
//-----------------------------------------------------------------------------
// gama = |p0-q0| < ((alpha >> 2) + 2) = |p0-q0| < alpha2
cmp.l.f0.1 (16) null:w (abs)q0_p0(0) Mbaff_ALPHA2(0)
// Common P01 = p0 + p1
add (16) P0_plus_P1(0)<1> P0 P1
// Common Q01 = q0 + q1
add (16) Q0_plus_Q1(0)<1> Q0 Q1
mov (1) f0.0:uw f0.1:uw {NoMask}
// deltap = ((abs)ap < beta) && gama
(f0.1) cmp.l.f0.1 (16) null:w (abs)ap(0) Mbaff_BETA(0) // (abs)ap < beta ?
// deltaq = ((abs)aq < beta) && gama
(f0.0) cmp.l.f0.0 (16) null:w (abs)aq(0) Mbaff_BETA(0) // (abs)aq < beta ?
(f0.1) if (16) MBAFF_Y_ELSE3 // for channels its deltap = true
add (16) P2_plus_P3(0)<1> P2 P3
// A = p1 + p0 + q0 = P01 + q0
add (16) A(0)<1> P0_plus_P1(0) Q0 // A = P01 + q0
// Now acc0 = A
// B = p2 + p1 + p0 + q0 + 4 = p2 + A + 4
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // p2 + 4
add (16) BB(0)<1> acc0.0<16;16,1>:w P2 // B = p2 + A + 4
// Now acc0 = B
// p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3
mac (16) acc0.0<1>:w P2_plus_P3(0) 2:w
shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
// p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2
add (16) acc0.0<1>:w BB(0) -2:w
shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w
// p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3
add (16) acc0.0<1>:w Q1 A(0) // B + A
add (16) acc0.0<1>:w acc0.0<16;16,1>:w BB(0) // B + A + q1
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w // (B + A + q1) >> 3
mov (16) NewP2 TempRow3B(0) // p2'
mov (16) NewP1 TempRow1B(0) // p1'
mov (16) NewP0 TempRow0B(0) // p0'
MBAFF_Y_ELSE3:
else (16) MBAFF_Y_ENDIF3 // for channels its deltap = false
// p0' = (2*p1 + p0 + q1 + 2) >> 2 = (p1 + P01 + q1 + 2) >> 2
add (16) acc0.0<1>:w P1 P0_plus_P1(0) // p1 + P01 (TempRow1(0) = P01)
add (16) acc0.0<1>:w acc0.0<16;16,1>:w Q1
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 2:w // p1 + P01 + q1 + 2
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 2:w // >> 2
mov (16) NewP0 TempRow0B(0) // p0'
endif
MBAFF_Y_ENDIF3:
// Compute q0', q1' and q2'
//-----------------------------------------------------------------------------
// bS = 4 Algorithm (cont):
//
// deltaq = (aq<beta) && gama; // deep filter flag
// if (deltaq) {
// q0' = ( q2 +2*q1 +2*q0 +2*p0 + p1 + 4) >> 3;
// q1' = ( q2 + q1 + q0 + p0 + 2) >> 2;
// q2' = (2*q3 +3*q2 + q1 + q0 + p0 + 4) >> 3;
// } else {
// q0' = ( 2*q1 + q0 + p1 + 2) >> 2;
// }
(f0.0) if (16) MBAFF_Y_ELSE4 // for channels its deltaq = true
add (16) Q2_plus_Q3(0)<1> Q2 Q3
// A = q1 + q0 + p0 = Q01 + p0
add (16) A(0)<1> Q0_plus_Q1(0) p0(0) // A = q1+q0 + p0
// B = q2 + q1 + q0 + p0 + 4 = q2 + A + 4
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 4:w // q2 + 4
add (16) BB(0)<1> acc0.0<16;16,1>:w Q2 // B = q2 + A + 4
// Acc0 = B
// q2' = (2*q3 +3*q2 + A + 4) >> 3 = (2*(q3+q2) + B) >> 3
mac (16) acc0.0<1>:w Q2_plus_Q3(0) 2:w
shr.sat (16) TempRow3B(0)<2> acc0.0<16;16,1>:w 3:w
// q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2
add (16) acc0.0<1>:w BB(0) -2:w
shr.sat (16) TempRow1B(0)<2> acc0.0<16;16,1>:w 2:w
// q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3
add (16) acc0.0<1>:w p1(0) A(0)
add (16) acc0.0<1>:w acc0.0<16;16,1>:w BB(0)
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 3:w
mov (16) NewQ2 TempRow3B(0) // q2'
mov (16) NewQ1 TempRow1B(0) // q1'
mov (16) NewQ0 TempRow0B(0) // q0'
MBAFF_Y_ELSE4:
else (16) MBAFF_Y_ENDIF4 // for channels its deltaq = false
// q0' = (2*q1 + q0 + p1 + 2) >> 2 = (q1 + Q01 + p1 + 2) >> 2
// Use original p1 values in p1(0)
add (16) acc0.0<1>:w p1(0) Q0_plus_Q1(0) // p1 + P01 (TempRow1(0) = P01)
add (16) acc0.0<1>:w acc0.0<16;16,1>:w Q1
add (16) acc0.0<1>:w acc0.0<16;16,1>:w 2:w // p1 + P01 + q1 + 2
shr.sat (16) TempRow0B(0)<2> acc0.0<16;16,1>:w 2:w // >> 2
mov (16) NewQ0 TempRow0B(0) // q0'
endif
MBAFF_Y_ENDIF4:
// Done with bS = 4 algorithm
MBAFF_Y_ELSE2:
else (16) MBAFF_Y_ENDIF2
// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
//-----------------------------------------------------------------------------
// bS < 4 Algorithm :
// tc = tc0 + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)
// delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))
// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
// if (|p2-p0|<Beta)
// p1' = p1 + Clip3(-tc0, tc0, (p2 + ((p0+q0+1)>>1) - (p1<<1)) >> 1 )
// if (|q2-q0|<Beta)
// q1' = q1 + Clip3(-tc0, tc0, (q2 + ((p0+q0+1)>>1) - (q1<<1)) >> 1 )
//-----------------------------------------------------------------------------
mov (16) tc_exp(0)<1> Mbaff_TC0(0) // tc = tc0_exp first
cmp.l.f0.0 (16) null:w (abs)ap(0) Mbaff_BETA(0) // |p2-p0|<Beta ?
cmp.l.f0.1 (16) null:w (abs)aq(0) Mbaff_BETA(0) // |q2-q0|<Beta ?
//--- Use free cycles here ---
// delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))
// 4 * (q0-p0) + p1 - q1 + 4
add (16) acc0<1>:w P1 4:w // p1 + 4
mac (16) acc0<1>:w q0_p0(0) 4:w // 4 * (q0-p0) + p1 + 4
add (16) acc0<1>:w acc0<16;16,1>:w -Q1 // 4 * (q0-p0) + p1 - q1 + 4
shr (16) TempRow0(0)<1> acc0<16;16,1>:w 3:w
// Continue on getting tc_exp
(f0.0) add (16) tc_exp(0)<1> tc_exp(0) 1:w // tc0_exp + (|p2-p0|<Beta ? 1 : 0)
mov (2) CTemp1_W<1>:w f0.0<2;2,1>:w {NoMask} // Save |p2-p0|<Beta flag
(f0.1) add (16) tc_exp(0)<1> tc_exp(0) 1:w // tc_exp = tc0_exp + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)
// Continue on cliping tc to get delta
cmp.g.f0.0 (16) null:w TempRow0(0) tc_exp(0) // Clip if delta' > tc
cmp.l.f0.1 (16) null:w TempRow0(0) -tc_exp(0) // Clip if delta' < -tc
//--- Use free cycles here ---
// common = (p0+q0+1) >> 1 ---> TempRow2(0)
// Same as avg of p0 and q0
avg (16) TempRow2(0)<1> P0 Q0
// Continue on cliping tc to get delta
(f0.0) mov (16) TempRow0(0)<1> tc_exp(0)
(f0.1) mov (16) TempRow0(0)<1> -tc_exp(0)
//--- Use free cycles here ---
mov (2) f0.0<1>:w CTemp1_W<2;2,1>:w {NoMask} // CTemp1_W = (|p2-p0|<Beta)
// CTemp2_W = (|q2-q0|<Beta)
// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
add.sat (16) TempRow1B(0)<2> P0 TempRow0(0) // p0+delta
add.sat (16) TempRow0B(0)<2> Q0 -TempRow0(0) // q0-delta
mov (16) NewP0 TempRow1B(0) // p0'
mov (16) NewQ0 TempRow0B(0) // q0'
//-----------------------------------------------------------------------
// Now compute p1' and q1'
// if (|p2-p0|<Beta)
(f0.0) if (16) MBAFF_Y_ENDIF6
// p1' = p1 + Clip3(-tc0, tc0, adj)
// adj = (p2 + common - (p1<<1)) >> 1 = (p2 + common - (p1*2)) >> 1
add (16) acc0<1>:w P2 TempRow2(0) // TempRow2(0) = common = (p0+q0+1) >> 1
mac (16) acc0<1>:w P1 -2:w
shr (16) TempRow1(0)<1> acc0<16;16,1>:w 1:w
// tc clip to get tc_adj
cmp.g.f0.0 (16) null:w TempRow1(0) Mbaff_TC0(0) // Clip if delta' > tc
cmp.l.f0.1 (16) null:w TempRow1(0) -Mbaff_TC0(0) // Clip if delta' < -tc
(f0.0) mov (16) TempRow1(0)<1> Mbaff_TC0(0)
(f0.1) mov (16) TempRow1(0)<1> -Mbaff_TC0(0)
//--- Use free cycles here ---
mov (1) f0.1:w CTemp2_W:w {NoMask} // CTemp2_W = (|q2-q0|<Beta)
// p1' = p1 + tc_adj
add.sat (16) TempRow1B(0)<2> P1 TempRow1(0) // p1+tc_adj
mov (16) NewP1 TempRow1B(0) // p1'
//------------------------------------------------------------------------
MBAFF_Y_ENDIF6:
endif
// if (|q2-q0|<Beta)
(f0.1) if (16) MBAFF_Y_ENDIF7
// q1' = q1 + Clip3(-tc0, tc0, adj)
// adj = (q2 + common - (q1<<1)) >> 1
// same as q2 + common - (q1 * 2)
add (16) acc0<1>:w Q2 TempRow2(0)
mac (16) acc0<1>:w Q1 -2:w
shr (16) TempRow1(0)<1> acc0<16;16,1>:w 1:w
// tc clip to get tc_adj
cmp.g.f0.0 (16) null:w TempRow1(0) Mbaff_TC0(0) // Clip if delta' > tc
cmp.l.f0.1 (16) null:w TempRow1(0) -Mbaff_TC0(0) // Clip if delta' < -tc
(f0.0) mov (16) TempRow1(0)<1> Mbaff_TC0(0)
(f0.1) mov (16) TempRow1(0)<1> -Mbaff_TC0(0)
// q1' = q1 + tc_adj
add.sat (16) TempRow1B(0)<2> Q1 TempRow1(0) // q1+tc_adj
mov (16) NewQ1 TempRow1B(0) // q1'
//------------------------------------------------------------------------
MBAFF_Y_ENDIF7:
endif
endif
MBAFF_Y_ENDIF2:
MBAFF_Y_ENDIF1:
endif
RETURN
#endif // !defined(__AVC_ILDB_LUMA_CORE_MBAFF__)

View File

@@ -0,0 +1,73 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//----- Open a Message Gateway -----
// The parent thread is the recipient thread
#if defined(_DEBUG)
mov (1) EntrySignature:w 0x1111:w
#endif
mov (8) GatewayPayload<1>:ud r0.0<8;8,1>:ud // Init payload to r0
// r50- (16 GRFs) are the GRFs child thread can wtite to.
// Reg base is at bit 28:21, Gateway size is at [bit 10:8]
// r6: 6 = 00000110
//000 00000110 0000000000 100 00000000 ==> 0000 0000 1100 0000 0000 0100 0000 0000
mov (1) RegBase_GatewaySize:ud 0x00C00400:ud // Reg base + Gateway size (16 GRFs)
//000 00110010 0000000000 100 00000000 ==> 0000 0110 0100 0000 0000 0100 0000 0000
//mov (1) RegBase_GatewaySize:ud 0x06400400:ud // Reg base (r50 = 0x640 byte offset) + Gateway size (16 GRFs)
//mov (1) DispatchID:ub r0.20:ub // Dispatch ID
mov (1) GatewayPayloadKey:uw 0x1212:uw // Key=0x1212
// Message descriptor
// bit 31 EOD
// 27:24 FFID = 0x0011 for msg gateway
// 23:20 msg length = 1 MRF
// 19:16 Response length = 0
// 14 AckReg = 1
// 1:0 SubFuncID = 00 for OpenGateway
// Message descriptor: 0 000 0011 0001 0000 + 0 1 000000000000 00 ==> 0000 0011 0001 0000 0100 0000 0000 0000
// Send message to gateway: the ack message is put into response GRF r49 ==> Good for debugging
send (8) GatewayResponse:ud m7 GatewayPayload<8;8,1>:ud MSG_GW OGWMSGDSC
//----- End of Open a Message Gateway -----

View File

@@ -0,0 +1,39 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "AVC_ILDB_Root_UV.asm"

View File

@@ -0,0 +1,39 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "AVC_ILDB_Root_Y.asm"

View File

@@ -0,0 +1,170 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/////////////////////////////////////////////////////////////////////////////////////
// Kernel name: AVC_ILDB_Root_Mbaff.asm
//
// Root kernel serves as a scheduler for child threads.
//
//
// ***** Note *****
// Initial design bundle MB pair for each thread, and share AVC_ILDB_MB_Dep_Check.asm
// with non mbaff kernels.
//
// Optimization will be done later, putting top and bottom MBs on separate threads.
//
//
/////////////////////////////////////////////////////////////////////////////////////
//
// $Revision: 1 $
// $Date: 10/19/06 5:06p $
//
// ----------------------------------------------------
// AVC_ILDB_ROOT_MBAFF_UV
// ----------------------------------------------------
#define AVC_ILDB
.kernel AVC_ILDB_ROOT_MBAFF_UV
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_ROOT_UV):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
/////////////////////////////////////////////////////////////////////////////////////
// Init URB space for running on RTL. It satisfies reading an unwritten URB entries.
// Will remove it for production release.
//mov (8) m1:ud 0x11111111:ud
//mov (8) m2:ud 0x22222222:ud
//mov (8) m3:ud 0x33333333:ud
//mov (8) m4:ud 0x44444444:ud
//mov (1) Temp1_W:w 0:w
//ILDB_INIT_URB:
//mul (1) URBOffset:w Temp1_W:w 4:w
//shl (1) URBWriteMsgDescLow:uw URBOffset:w 4:w // Msg descriptor: URB write dest offset (9:4)
//mov (1) URBWriteMsgDescHigh:uw 0x0650:uw // Msg descriptor: URB write 5 MRFs (m0 - m4)
//#include "writeURB.asm"
//add (1) Temp1_W:w Temp1_W:w 1:w // Increase block count
//cmp.l.f0.0 (1) null Temp1_W:w MBsCntY:w // Check the block count limit
//(f0.0) jmpi ILDB_INIT_URB // Loop back
/////////////////////////////////////////////////////////////////////////////////////
mov (1) EntrySignature:w 0xEFF0:w
#endif
//----------------------------------------------------------------------------------------------------------------
// Set global variable
mov (32) ChildParam:uw 0:uw // Reset local variables
//mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // Total # of MB pairs
//add (1) GatewayApertureE:w MBsCntY:w GatewayApertureB:w // Aperture End = aperture Head + BlockCntY
// 2 URB entries for Y:
// Entry 0 - Child thread R0Hdr
// Entry 1 - input parameter to child kernel (child r1)
#define URB_ENTRIES_PER_MB 2
// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
mov (1) URB_EntriesPerMB_2:w URB_ENTRIES_PER_MB-1:w
shl (1) URB_EntriesPerMB_2:w URB_EntriesPerMB_2:w 10:w
#define CHROMA_ROOT // Compiling flag for chroma only
//mul (1) URBOffsetUVBase:w MBsCntY:w URB_ENTRIES_PER_MB:w // Right after Y entries
// URB base for UV kernels
#if defined(DEV_CL)
mov (1) URBOffsetUVBase:w 240:w
#else
mov (1) URBOffsetUVBase:w 320:w
#endif
mov (1) ChildThreadsID:uw 3:uw
shr (1) ThreadLimit:w MaxThreads:w 1:w // Initial luma thread limit to 50%
mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // MBs to be processed count down from TotalBlocks
//***** Init CT_R0Hdr fields that are common to all threads *************************
mov (8) CT_R0Hdr.0:ud r0.0<8;8,1>:ud // Init to root R0 header
mov (1) CT_R0Hdr.7:ud r0.6:ud // Copy Parent Thread Cnt; JJ did the change on 06/20/2006
mov (1) CT_R0Hdr.31:ub 0:w // Reset the highest byte
mov (1) CT_R0Hdr.3:ud 0x00000000
mov (1) CT_R0Hdr.6:uw sr0.0:uw // sr0.0: state reg contains general thread states, e.g. EUID/TID.
//***** Init ChildParam fields that are common to all threads ***********************
mov (8) ChildParam<1>:ud RootParam<8;8,1>:ud // Copy all root parameters
mov (4) CurCol<1>:w 0:w // Reset CurCol, CurRow
add (2) LastCol<1>:w MBsCntX<2;2,1>:w -1:w // Get LastCol and LastRow
mov (1) URBWriteMsgDesc:ud MSG_LEN(2)+URBWMSGDSC:ud
//===================================================================================
#include "AVC_ILDB_OpenGateway.asm" // Open root thread gateway for receiving notification
#include "AVC_ILDB_Dep_Check.asm" // Check dependency and spawn all MBs
//#include "AVC_ILDB_UpdateThrdLimit.asm" // Update thread limit in luma root thread via gateway
#include "AVC_ILDB_CloseGateway.asm" // Close root thread gateway
// Chroma root EOT = child send EOT : Request type = 1
END_CHILD_THREAD
#undef CHROMA_ROOT
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,170 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/////////////////////////////////////////////////////////////////////////////////////
// Kernel name: AVC_ILDB_Root_Mbaff.asm
//
// Root kernel serves as a scheduler for child threads.
//
//
// ***** Note *****
// Initial design bundle MB pair for each thread, and share AVC_ILDB_MB_Dep_Check.asm
// with non mbaff kernels.
//
// Optimization will be done later, putting top and bottom MBs on separate threads.
//
//
/////////////////////////////////////////////////////////////////////////////////////
//
// $Revision: 1 $
// $Date: 10/19/06 5:06p $
//
// ----------------------------------------------------
// AVC_ILDB_ROOT_MBAFF_Y
// ----------------------------------------------------
#define AVC_ILDB
.kernel AVC_ILDB_ROOT_MBAFF_Y
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_ROOT_Y):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
/////////////////////////////////////////////////////////////////////////////////////
// Init URB space for running on RTL. It satisfies reading an unwritten URB entries.
// Will remove it for production release.
//mov (8) m1:ud 0x11111111:ud
//mov (8) m2:ud 0x22222222:ud
//mov (8) m3:ud 0x33333333:ud
//mov (8) m4:ud 0x44444444:ud
//mov (1) Temp1_W:w 0:w
//ILDB_INIT_URB:
//mul (1) URBOffset:w Temp1_W:w 4:w
//shl (1) URBWriteMsgDescLow:uw URBOffset:w 4:w // Msg descriptor: URB write dest offset (9:4)
//mov (1) URBWriteMsgDescHigh:uw 0x0650:uw // Msg descriptor: URB write 5 MRFs (m0 - m4)
//#include "writeURB.asm"
//add (1) Temp1_W:w Temp1_W:w 1:w // Increase block count
//cmp.l.f0.0 (1) null Temp1_W:w MBsCntY:w // Check the block count limit
//(f0.0) jmpi ILDB_INIT_URB // Loop back
/////////////////////////////////////////////////////////////////////////////////////
mov (1) EntrySignature:w 0xEFF0:w
#endif
//----------------------------------------------------------------------------------------------------------------
// Set global variable
mov (32) ChildParam:uw 0:uw // Reset local variables
//mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // Total # of MB pairs
//add (1) GatewayApertureE:w MBsCntY:w GatewayApertureB:w // Aperture End = aperture Head + BlockCntY
// 2 URB entries for Y:
// Entry 0 - Child thread R0Hdr
// Entry 1 - input parameter to child kernel (child r1)
#undef URB_ENTRIES_PER_MB
#define URB_ENTRIES_PER_MB 2
// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
mov (1) URB_EntriesPerMB_2:w URB_ENTRIES_PER_MB-1:w
shl (1) URB_EntriesPerMB_2:w URB_EntriesPerMB_2:w 10:w
mov (1) ChildThreadsID:uw 1:uw // ChildThreadsID for chroma root
shr (1) ThreadLimit:w MaxThreads:w 1:w // Initial luma thread limit to 50%
mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // MBs to be processed count down from TotalBlocks
//***** Init CT_R0Hdr fields that are common to all threads *************************
mov (8) CT_R0Hdr.0:ud r0.0<8;8,1>:ud // Init to root R0 header
mov (1) CT_R0Hdr.7:ud r0.6:ud // Copy Parent Thread Cnt; JJ did the change on 06/20/2006
mov (1) CT_R0Hdr.31:ub 0:w // Reset the highest byte
mov (1) CT_R0Hdr.3:ud 0x00000000
mov (1) CT_R0Hdr.6:uw sr0.0:uw // sr0.0: state reg contains general thread states, e.g. EUID/TID.
//***** Init ChildParam fields that are common to all threads ***********************
mov (8) ChildParam<1>:ud RootParam<8;8,1>:ud // Copy all root parameters
mov (4) CurCol<1>:w 0:w // Reset CurCol, CurRow
add (2) LastCol<1>:w MBsCntX<2;2,1>:w -1:w // Get LastCol and LastRow
mov (1) URBWriteMsgDesc:ud MSG_LEN(2)+URBWMSGDSC:ud
//===================================================================================
#include "AVC_ILDB_OpenGateway.asm" // Open root thread gateway for receiving notification
#if defined(DEV_CL)
mov (1) URBOffset:uw 240:uw // Use chroma URB offset to spawn chroma root
#else
mov (1) URBOffset:uw 320:uw // Use chroma URB offset to spawn chroma root
#endif
#include "AVC_ILDB_SpawnChromaRoot.asm" // Spawn chroma root
mov (1) URBOffset:uw 0:uw // Use luma URB offset to spawn luma child
mov (1) ChildThreadsID:uw 2:uw // Starting ChildThreadsID for luma child threads
#include "AVC_ILDB_Dep_Check.asm" // Check dependency and spawn all MBs
// Wait for UV root thread to finish
ILDB_LABEL(WAIT_FOR_UV):
cmp.l.f0.0 (1) null:w ThreadLimit:w MaxThreads:w
(f0.0) jmpi ILDB_LABEL(WAIT_FOR_UV)
#include "AVC_ILDB_CloseGateway.asm" // Close root thread gateway
END_THREAD // End of root thread
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,157 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Kernel name: AVC_ILDB_Root_UV.asm
//
// Root kernel serves as a scheduler for child threads
//
// $Revision: 1 $
// $Date: 10/19/06 5:06p $
//
// ----------------------------------------------------
// AVC_ILDB_ROOT_UV
// ----------------------------------------------------
#define AVC_ILDB
.kernel AVC_ILDB_ROOT_UV
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_ROOT_UV):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignature:w 0xFF11:w
#endif
/////////////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
// Init URB space for running on RTL. It satisfies reading an unwritten URB entries.
// Will remove it for production release.
mov (8) m1:ud 0x55555555:ud
mov (8) m2:ud 0x66666666:ud
mov (8) m3:ud 0x77777777:ud
mov (8) m4:ud 0x88888888:ud
mov (1) Temp1_W:w MBsCntY:w
shl (1) Temp2_W:w MBsCntY:w 1:w
ILDB_LABEL(ILDB_INIT_URB_UV):
mul (1) URBOffset:uw Temp1_W:uw 4:w // Each thread uses 4 URB entries (1 r0 + 1 inline + 2 data)
mov (1) URBWriteMsgDesc:ud MSG_LEN(4)+URBWMSGDSC:ud // Msg descriptor: URB write msg length = 5
#include "writeURB.asm"
add (1) Temp1_W:w Temp1_W:w 1:w // Increase block count
cmp.l.f0.0 (1) null Temp1_W:w Temp2_W:w // Check the block count limit
(f0.0) jmpi ILDB_LABEL(ILDB_INIT_URB_UV) // Loop back
mov (1) EntrySignature:w 0xFFF0:w
#endif
/////////////////////////////////////////////////////////////////////////////////////
// Set global variable
mov (32) ChildParam:uw 0:uw // Reset local variables, 2 GRFs
//mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // Total # of blocks
//add (1) GatewayApertureE:w MBsCntY:w GatewayApertureB:w // Aperture End = aperture Head + BlockCntY
// 4 URB entries for Y:
// Entry 0 - Child thread R0Hdr
// Entry 1 - input parameter to child kernel (child r1)
// Entry 2 - Prev MB data UV 2x8
// Entry 3 - Unused
#define URB_ENTRIES_PER_MB 4
// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
mov (1) URB_EntriesPerMB_2:w URB_ENTRIES_PER_MB-1:w
shl (1) URB_EntriesPerMB_2:w URB_EntriesPerMB_2:w 10:w
#define CHROMA_ROOT // Compiling flag for chroma only
// URB base for UV kernels
#if defined(DEV_CL)
mov (1) URBOffsetUVBase:w 240:w
#else
mov (1) URBOffsetUVBase:w 320:w
#endif
mov (1) ChildThreadsID:uw 3:uw
shr (1) ThreadLimit:w MaxThreads:w 1:w // Initial luma thread limit to 50%
mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // MBs to be processed count down from TotalBlocks
//***** Init CT_R0Hdr fields that are common to all threads *************************
mov (8) CT_R0Hdr.0:ud r0.0<8;8,1>:ud // Init to root R0 header
mov (1) CT_R0Hdr.7:ud r0.6:ud // Copy Parent Thread Cnt; JJ did the change on 06/20/2006
mov (1) CT_R0Hdr.31:ub 0:w // Reset the highest byte
mov (1) CT_R0Hdr.3:ud 0x00000000
mov (1) CT_R0Hdr.6:uw sr0.0:uw // sr0.0: state reg contains general thread states, e.g. EUID/TID.
//***** Init ChildParam fields that are common to all threads ***********************
mov (8) ChildParam<1>:ud RootParam<8;8,1>:ud // Copy all root parameters
mov (4) CurCol<1>:w 0:w // Reset CurCol, CurRow,
add (2) LastCol<1>:w MBsCntX<2;2,1>:w -1:w // Get LastCol and LastRow
mov (1) URBWriteMsgDesc:ud MSG_LEN(2)+URBWMSGDSC:ud
//===================================================================================
#include "AVC_ILDB_OpenGateway.asm" // Open gateway for receiving notification
#include "AVC_ILDB_Dep_Check.asm" // Check dependency and spawn all luma child threads in parallel with chroma root
//#include "AVC_ILDB_LumaThrdLimit.asm" // Update thread limit in luma root thread via gateway
#include "AVC_ILDB_CloseGateway.asm" // Close root thread gateway
// Chroma root EOT = child send EOT : Request type = 1
END_CHILD_THREAD
#undef CHROMA_ROOT
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,160 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Kernel name: AVC_ILDB_Root_Y.asm
//
// Root kernel serves as a scheduler for child threads
//
// $Revision: 1 $
// $Date: 10/19/06 5:06p $
//
// ----------------------------------------------------
// AVC_ILDB_ROOT_Y
// ----------------------------------------------------
#define AVC_ILDB
.kernel AVC_ILDB_ROOT_Y
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_ROOT_Y):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
/////////////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
// Init URB space for running on RTL. It satisfies reading an unwritten URB entries.
// Will remove it for production release.
mov (8) m1:ud 0x11111111:ud
mov (8) m2:ud 0x22222222:ud
mov (8) m3:ud 0x33333333:ud
mov (8) m4:ud 0x44444444:ud
mov (1) Temp1_W:w 0:w
ILDB_LABEL(ILDB_INIT_URB_Y):
//mul (1) Temp2_W:w Temp1_W:w 4:w // URBOffset
//shl (1) URBWriteMsgDescLow:uw Temp2_W:w 4:w // Msg descriptor: URB write dest offset (9:4)
//mov (1) URBWriteMsgDescHigh:uw 0x0650:uw // Msg descriptor: URB write 5 MRFs (m0 - m4)
//mul (1) URBOffset:uw Temp1_W:uw 4:w // Each thread uses 4 URB entries (1 r0 + 1 inline + 2 data)
mul (1) URBOffset:uw Temp1_W:uw 2:w // Each thread uses 2 URB entries (1 r0 + 1 inline)
mov (1) URBWriteMsgDesc:ud MSG_LEN(2)+URBWMSGDSC:ud // Msg descriptor: URB write msg length = 3
#include "writeURB.asm"
add (1) Temp1_W:w Temp1_W:w 1:w // Increase block count
cmp.l.f0.0 (1) null Temp1_W:w MBsCntY:w // Check the block count limit
(f0.0) jmpi ILDB_LABEL(ILDB_INIT_URB_Y) // Loop back
mov (1) EntrySignature:w 0xFFF0:w
#endif
/////////////////////////////////////////////////////////////////////////////////////
// Set global variable
mov (32) ChildParam:uw 0:uw // Reset local variables, 2 GRFs
//mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // Total # of blocks
//add (1) GatewayApertureE:w MBsCntY:w GatewayApertureB:w // Aperture End = aperture Head + BlockCntY
// 4 URB entries for Y:
// Entry 0 - Child thread R0Hdr
// Entry 1 - input parameter to child kernel (child r1)
// Entry 2 - Prev MB data Y 4x16, col 1 and col 0
// Entry 3 - Prev MB data Y 4x16, col 3 and col 2
#undef URB_ENTRIES_PER_MB
#define URB_ENTRIES_PER_MB 4
// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
mov (1) URB_EntriesPerMB_2:w URB_ENTRIES_PER_MB-1:w
shl (1) URB_EntriesPerMB_2:w URB_EntriesPerMB_2:w 10:w
shr (1) ThreadLimit:w MaxThreads:w 1:w // Initial luma thread limit to 50%
mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // MBs to be processed count down from TotalBlocks
//***** Init CT_R0Hdr fields that are common to all threads *************************
mov (8) CT_R0Hdr.0:ud r0.0<8;8,1>:ud // Init to root R0 header
mov (1) CT_R0Hdr.7:ud r0.6:ud // Copy Parent Thread Cnt; JJ did the change on 06/20/2006
mov (1) CT_R0Hdr.31:ub 0:w // Reset the highest byte
mov (1) CT_R0Hdr.3:ud 0x00000000
mov (1) CT_R0Hdr.6:uw sr0.0:uw // sr0.0: state reg contains general thread states, e.g. EUID/TID.
//***** Init ChildParam fields that are common to all threads ***********************
mov (8) ChildParam<1>:ud RootParam<8;8,1>:ud // Copy all root parameters
mov (4) CurCol<1>:w 0:w // Reset CurCol, CurRow,
add (2) LastCol<1>:w MBsCntX<2;2,1>:w -1:w // Get LastCol and LastRow
mov (1) URBWriteMsgDesc:ud MSG_LEN(2)+URBWMSGDSC:ud
//===================================================================================
#include "AVC_ILDB_OpenGateway.asm" // Open gateway for receiving notification
#if defined(DEV_CL)
mov (1) URBOffset:uw 240:uw // Use chroma URB offset to spawn chroma root
#else
mov (1) URBOffset:uw 320:uw // Use chroma URB offset to spawn chroma root
#endif
#include "AVC_ILDB_SpawnChromaRoot.asm" // Spawn chroma root
mov (1) URBOffset:uw 0:uw // Use luma URB offset to spawn luma child
mov (1) ChildThreadsID:uw 2:uw // Starting ChildThreadsID for luma child threads
#include "AVC_ILDB_Dep_Check.asm" // Check dependency and spawn all luma child threads in parallel with chroma root
// Wait for UV root thread to finish
ILDB_LABEL(WAIT_FOR_UV):
cmp.l.f0.0 (1) null:w ThreadLimit:w MaxThreads:w
(f0.0) jmpi ILDB_LABEL(WAIT_FOR_UV)
#include "AVC_ILDB_CloseGateway.asm" // Close root thread gateway
END_THREAD // End of root thread
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,52 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//=============== Spawn a child thread for a vertical child ===============
#if defined(_DEBUG)
mov (1) EntrySignature:w 0x6666:w
#endif
mul (1) URBOffset:uw CurRow:uw 2:w // 5:w // Each row uses 5 URB entries (R0, child R0, 3 GRFs of data from left MB)
mov (8) CT_R0Hdr.0:ud r0.0<8;8,1>:ud // Init to root R0 header
// R0.2: Interface Discriptor Ptr. Add offset 16 for next Interface Discriptor for child kernel
add (1) CT_R0Hdr.2:ud r0.2:ud IDesc_Child_Offset:w
#include "AVC_ILDB_SpawnChild.asm"

View File

@@ -0,0 +1,85 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//=============== Spawn a child thread for Luma or Chroma ===============
//----- Create child thread R0 header -----
#if defined(_DEBUG)
mov (1) EntrySignature:w 0xAAAA:w
#endif
//***** Set CT_R0Hdr fields that change for every thread
// Restore CT_R0Hdr.4:ud to r0.4:ud
mov (1) CT_R0Hdr.4:ud r0.4:ud
// R0.2: Interface Discriptor Ptr. Add a child offset for child kernel
// add (1) CT_R0Hdr.2:ud r0.2:ud CHILD_OFFSET:w
// Assign a new Thread Count for this child
mov (1) CT_R0Hdr.6:ud ChildThreadsID:uw
//----- Prepare URB for launching a child thread -----
mov (16) m2.0:w ChildParam<16;16,1>:w
shr (1) MRF0.0:uw URBOffset:uw 1:w
add (1) ChildThreadsID:uw ChildThreadsID:uw 2:uw // Luma child=even, chroma child=odd
//--------------------------------------------------
// #include "writeURB.asm"
send null:uw MRF0 null:ud URBWRITE URBWriteMsgDesc:ud // URB write
//--------------------------------------------------
// Set URB handle for child thread launching:
// URB handle Length (bit 15:10) - 0000 0000 0000 0000 yyyy yy00 0000 0000
// URB handle offset (bit 9:0) - 0000 0000 0000 0000 0000 00xx xxxx xxxx
or (1) CT_R0Hdr.4:ud URB_EntriesPerMB_2:w URBOffset:uw
// 2 URB entries:
// Entry 0 - CT_R0Hdr
// Entry 1 - input parameter to child kernel
//----- Spawn a child now -----
send (8) null:ud CT_R0Hdr null:ud TS TSMSGDSC
// send (8) null:ud CT_Spawn_Reg null:ud 0x07100001
// Restore CT_R0Hdr.4:ud to r0.4:ud for next use
// mov (1) CT_R0Hdr.4:ud r0.4:ud

View File

@@ -0,0 +1,77 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//=============== Spawn a chroma root thread ===============
//----- Create chroma root thread R0 header -----
#if defined(_DEBUG)
mov (1) EntrySignature:w 0xAABA:w
#endif
// Restore CT_R0Hdr.4:ud to r0.4:ud
// mov (1) CT_R0Hdr.4:ud r0.4:ud
// R0.2: Interface Discriptor Ptr. Add child offset for child kernel
add (1) CT_R0Hdr.2:ud r0.2:ud CHROMA_ROOT_OFFSET:w
// Assign a new Thread Count for this child
mov (1) CT_R0Hdr.6:ud 1:w // ThreadID=1 for chroma root
//----- Copy luma root r1 for launching chroma root thread -----
mov (16) m2.0:w RootParam<16;16,1>:w
#include "writeURB.asm"
//--------------------------------------------------
// Set URB handle for child thread launching:
// URB handle Length (bit 15:10) - 0000 0000 0000 0000 yyyy yy00 0000 0000
// URB handle offset (bit 9:0) - 0000 0000 0000 0000 0000 00xx xxxx xxxx
or (1) CT_R0Hdr.4:ud URB_EntriesPerMB_2:w URBOffset:uw
// 2 URB entries:
// Entry 0 - CT_R0Hdr
// Entry 1 - input parameter to child kernel
//----- Spawn a child now -----
send (8) null:ud CT_R0Hdr null:ud TS TSMSGDSC
// Restore CT_R0Hdr.4:ud to r0.4:ud for next use
mov (1) CT_R0Hdr.4:ud r0.4:ud

View File

@@ -0,0 +1,54 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Modual name: Child_Undefs.inc
//
// Undefine global symbols for new process in child thread
//
#undef P1
#undef P2
#undef P3
#undef P4
#undef P5
#undef P6
#undef P7
#undef P8
#undef EDGECNTLMAP
#undef CLIP_NEGATIVE
#undef CLIP_DONE

View File

@@ -0,0 +1,336 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#if !defined(__ILDB_HEADER__) // Make sure this file is only included once
#define __ILDB_HEADER__
// Module name: ILDB_header.inc
//
.default_execution_size (16)
.default_register_type :ub
#undef NULLREG
#undef RETURN_REG
#undef EOTMSGDSC
#undef MSGSRC
#undef END_THREAD
#undef TSMSGDSC
// ----------- Common constant definitions ------------
//
// Bit position constants
//
#define BIT0 0x01
#define BIT1 0x02
#define BIT2 0x04
#define BIT3 0x08
#define BIT4 0x10
#define BIT5 0x20
#define BIT6 0x40
#define BIT7 0x80
#define BIT8 0x0100
#define BIT9 0x0200
#define BIT10 0x0400
#define BIT11 0x0800
#define BIT12 0x1000
#define BIT13 0x2000
#define BIT14 0x4000
#define BIT15 0x8000
#define BIT16 0x00010000
#define BIT17 0x00020000
#define BIT18 0x00040000
#define BIT19 0x00080000
#define BIT20 0x00100000
#define BIT21 0x00200000
#define BIT22 0x00400000
#define BIT23 0x00800000
#define BIT24 0x01000000
#define BIT25 0x02000000
#define BIT26 0x04000000
#define BIT27 0x08000000
#define BIT28 0x10000000
#define BIT29 0x20000000
#define BIT30 0x40000000
#define BIT31 0x80000000
// Common constants
//
#define INST_SIZE 16 // Instruction size in byte
#define GRFWIB 32 // GRF register width in byte
#define GRFWIW 16 // GRF register width in word
#define GRFWID 8 // GRF register width in dword
#define TOP_FIELD 0
#define BOTTOM_FIELD 1
#define PREVIOUS_FRAME 0 // Previous frame
#define CURRENT_FRAME 1 // Current frame
#define NEXT_FRAME 2 // Next frame
#define Y_ROW_WIDTH 16 // in bytes
#define UV_ROW_WIDTH 8
// Useful macros
//
#define REGION(Width,HStride) <Width*HStride;Width,HStride> // Region definition when ExecSize = Width
#define NULLREG null<1>:d
#define NULLREGW null<1>:w
#define RETURN_REG r62 // Return pointer for all sub-routine calls (type DWORD)
#define CALL(subFunc, skipInst) add (1) RETURN_REG<1>:ud ip:ud (1+skipInst)*INST_SIZE \n\
jmpi (1) subFunc
#define RETURN mov (1) ip:ud RETURN_REG<0;1,0>:ud // Return to calling module
#define PRED_CALL(flag, subFunc, skipInst) add (1) RETURN_REG<1>:ud ip:ud (1+skipInst)*INST_SIZE \n\
(flag) jmpi (1) subFunc
// Definitions for surface states, GRF regions, and common data fields
//
// Note: Each kernel needs to define a specific symbol before including this
// header file to ensure correct definitions.
//
#if defined(AVC_ILDB)
.reg_count_total 64
.reg_count_payload 4
// Binding Table Index
#define BI_CNTRL_DATA 0 // Control data map
#define BI_SRC_Y 1
#define BI_SRC_UV 2
#define BI_DEST_Y 3
#define BI_DEST_UV 4
//========== Left MB, 4x16 in r2 and r3 ==========
#define PREV_MB_Y_BASE 64 //2*GRFWIB // Byte offset to r2
.declare PREV_MB_YD Base=r2 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare PREV_MB_YW Base=r2 ElementSize=2 SrcRegion=REGION(8,1) Type=uw
.declare PREV_MB_YB Base=r2 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
#define PREV_MB_U_BASE 64 //2*GRFWIB // seperate thread from Y // Byte offset to r2
.declare PREV_MB_UD Base=r2 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare PREV_MB_UW Base=r2 ElementSize=2 SrcRegion=REGION(16,1) Type=uw
.declare PREV_MB_UB Base=r2 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
#define PREV_MB_V_BASE 65 //2*GRFWIB+1 // NV12 // Byte offset to r2.1
.declare PREV_MB_VB Base=r2.1 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
//========== Top MB, 16x4 in r4 and r5 ==========
#define TOP_MB_Y_BASE 128 //4*GRFWIB // Byte offset to r4
.declare TOP_MB_YD Base=r4 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare TOP_MB_YW Base=r4 ElementSize=2 SrcRegion=REGION(8,1) Type=uw
.declare TOP_MB_YB Base=r4 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
#define TOP_MB_U_BASE 128 //4*GRFWIB // seperate thread from Y // Byte offset to r4
.declare TOP_MB_UD Base=r4 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare TOP_MB_UW Base=r4 ElementSize=2 SrcRegion=REGION(16,1) Type=uw
.declare TOP_MB_UB Base=r4 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
#define TOP_MB_V_BASE 129 //4*GRFWIB+1 // NV12 // Byte offset to r4.1
.declare TOP_MB_VB Base=r4.1 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
//========== Current MB, 16x16 in r6-r13 ==========
#define SRC_MB_Y_BASE 192 //6*GRFWIB // Byte offset to r6
.declare SRC_YD Base=r6 ElementSize=4 SrcRegion=REGION(8,1) Type=ud // For read and write, 8 GRFs
.declare SRC_YW Base=r6 ElementSize=2 SrcRegion=REGION(8,1) Type=uw
.declare SRC_YB Base=r6 ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
#define SRC_MB_U_BASE 192 //6*GRFWIB // seperate thread from Y // Byte offset to r6
.declare SRC_UD Base=r6 ElementSize=4 SrcRegion=REGION(8,1) Type=ud // For read and write, 2 GRFs
.declare SRC_UW Base=r6 ElementSize=2 SrcRegion=REGION(16,1) Type=uw // For read and write, 4 GRFs
.declare SRC_UB Base=r6 ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 2 GRFs
#define SRC_MB_V_BASE 193 // 6*GRFWIB+1 // NV12 // Byte offset to r6.1
.declare SRC_VD Base=r6.1 ElementSize=4 SrcRegion=REGION(8,1) Type=ud // For read and write, 2 GRFs
.declare SRC_VW Base=r6.1 ElementSize=2 SrcRegion=REGION(16,1) Type=uw // For read and write, 4 GRFs
.declare SRC_VB Base=r6.1 ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 2 GRFs
#else // No kernel specified, define nothing.
.reg_count_total 64
.reg_count_payload 2
#define SRCAOFF r1.0:ud // Offset into alpha data
#define SRCOFF r1.1:ud // Offset into source YUV data
#define ORIX r1.4 // :w, H. origin of the destination block in pel
#define ORIY r1.5 // :w, V. origin of the destination block in pel
#endif
// ----------- Message Payload Header fields------------
//
#define IDP r0.2:ud // Interface Descriptor Pointer
#define BTP r0.4:ud // Binding Table Pointer
// ----------- Common Message Descriptor ------------
//
#ifdef DEV_ILK
#define GW_DCN // Should be enabled only for ILK-B0 and beyond
#define MSG_GW 0x03 // Message Gateway
#define MSG_GW_EOT 0x23 // Message Gateway plus EOT bit set (For ILK only)
#define DAPREAD 0x04 // Data Port Read Extended Message Descriptor,
#define DAPWRITE 0x05 // Data Port Write Extended Message Descriptor,
#define URBWRITE 0x06 // URB
#define TS 0x07 // Thread Spawner Extended Message Descriptor
#define TS_EOT 0x27 // End of Thread Extended Message Descriptor
#define EOTMSGDSC 0x02000000 // End of Thread Message Descriptor /w URB handle dereferenced (used by root kernel)
#define CHILD_EOTMSGDSC 0x02000012 // End of Child Thread Message Descriptor w/o URB handle dereferenced
// Data Port Message Descriptor
#define DWBRMSGDSC_RC 0x02086000 // DWORD Block Read Message Descriptor, reading from render cache = 6.
#define DWBRMSGDSC_RC_TF 0x02086600 // DWORD Block Read Message Descriptor, reading from render cache = 6.
#define DWBRMSGDSC_RC_BF 0x02086700 // DWORD Block Read Message Descriptor, reading from render cache = 6.
#define DWBRMSGDSC_SC 0x0208A000 // DWORD Block Read Message Descriptor, reading from sampler cache = A.
#define DWBRMSGDSC_SC_TF 0x0208E600 // DWORD Block Read Message Descriptor, reading top field from field mode sampler cache.
#define DWBRMSGDSC_SC_BF 0x0208E700 // DWORD Block Read Message Descriptor, reading bottom field from field mode sampler cache.
#define ILDBRMSGDSC 0x02085800 // AVC ILDB Control Data Read Msg Desc on Bearlake-C
#define DWBWMSGDSC 0x02082000 // DWORD Block Write Message Descriptor
#define DWBWMSGDSC_WC 0x0218A000 // DWORD Block Write Message Descriptor + write commit
// URB Message Descriptor
#define URBWMSGDSC 0x02080000 // URB Write Message Descriptor
// Thread Spawner Message Descriptor
#define TSMSGDSC 0x02000001
// Message Gateway Message Descriptors
#define OGWMSGDSC 0x02000000 // OpenGateway Message Descriptor
#define CGWMSGDSC 0x02000001 // CloseGateway Message Descriptor
#define FWDMSGDSC 0x02000002 // ForwardMsg Message Descriptor
#define NOTIFYMSG 0x00008000 // Send notification with ForwardMsg message
#define RESP_LEN(len) 0x100000*len
#define MSG_LEN(len) 0x2000000*len
#else // Pre DEV_ILK
#define MSG_GW
#define MSG_GW_EOT
#define DAPREAD
#define DAPWRITE
#define URBWRITE
#define TS
#define TS_EOT
#define EOTMSGDSC 0x87100000 // End of Thread Message Descriptor /w URB handle dereferenced (used by root kernel)
#define CHILD_EOTMSGDSC 0x87100012 // End of Child Thread Message Descriptor w/o URB handle dereferenced
// Data Port Message Descriptor
#define DWBRMSGDSC_RC 0x04106000 // DWORD Block Read Message Descriptor, reading from render cache = 6.
#define DWBRMSGDSC_RC_TF 0x04106600 // DWORD Block Read Message Descriptor, reading from render cache = 6.
#define DWBRMSGDSC_RC_BF 0x04106700 // DWORD Block Read Message Descriptor, reading from render cache = 6.
#define DWBRMSGDSC_SC 0x0410A000 // DWORD Block Read Message Descriptor, reading from sampler cache = A.
#define DWBRMSGDSC_SC_TF 0x0410A600 // DWORD Block Read Message Descriptor, reading top field from field mode sampler cache.
#define DWBRMSGDSC_SC_BF 0x0410A700 // DWORD Block Read Message Descriptor, reading bottom field from field mode sampler cache.
#define ILDBRMSGDSC 0x04105800 // AVC ILDB Control Data Read Msg Desc on Bearlake-C
#define DWBWMSGDSC 0x05102000 // DWORD Block Write Message Descriptor
#define DWBWMSGDSC_WC 0x0511A000 // DWORD Block Write Message Descriptor + write commit
// URB Message Descriptor
#define URBWMSGDSC 0x06100000 // URB Write Message Descriptor
// Thread Spawner Message Descriptor
#define TSMSGDSC 0x07100001
// Message Gateway Message Descriptors
#define OGWMSGDSC 0x03100000 // OpenGateway Message Descriptor
#define CGWMSGDSC 0x03100001 // CloseGateway Message Descriptor
#define FWDMSGDSC 0x03100002 // ForwardMsg Message Descriptor
#define NOTIFYMSG 0x00008000 // Send notification with ForwardMsg message
#define RESP_LEN(len) 0x10000*len
#define MSG_LEN(len) 0x100000*len
// bits 15 - 0 = 01 011 000 00000000 = 0101 1000 0000 0000 = 5800
// Render cache, AVC loop rd,
#endif // DEV_ILK
// Enable frame/field selection in message descriptor
#define ENMSGDSCFM 0x400 // Enable MSGDSC to select frame surface
#define ENMSGDSCTF 0x600 // Enable MSGDSC to select top field surface
#define ENMSGDSCBF 0x700 // Enable MSGDSC to select bottom field surface
#define END_THREAD send (8) NULLREG MSGHDR r0:ud TS_EOT EOTMSGDSC
#define END_CHILD_THREAD send (8) NULLREG MSGHDR r0:ud TS_EOT CHILD_EOTMSGDSC
// ----------- Message related register ------------
//
#define MSGHDR m1 // Message Payload Header
#define MSGHDRY m1 // Message Payload Header register for Y data
#define MSGHDRU m2 // Message Payload Header register for U data
#define MSGHDRV m3 // Message Payload Header register for V data
#define MSGHDRC m1 // Message Payload Header register for CUR MB
#define MSGHDRL m2 // Message Payload Header register for LEFT MB
#define MSGHDRT m3 // Message Payload Header register for TOP MB
#define MSGHDRYA m4 // Second Message Payload Header register for Y data
#define MSGSRC r63 // Message source register
#define MSGDSC a0.0:ud // Message Descriptor register (type DWORD)
#define MH_ORI MSGSRC.0 // DWORD block R/W message header block offset
#define MH_ORIX MSGSRC.0 // DWORD block R/W message header X offset
#define MH_ORIY MSGSRC.1 // DWORD block R/W message header Y offset
#define MH_SIZE MSGSRC.2 // DWORD block R/W message header block width & height
// M2 - M9 for message data payload
.declare MSGPAYLOADB Base=m2 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
.declare MSGPAYLOADW Base=m2 ElementSize=2 SrcRegion=REGION(16,1) Type=uw
.declare MSGPAYLOADD Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
.declare MSGPAYLOADF Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=f
// End of ILDB_header.inc
#endif // !defined(__ILDB_HEADER__)

View File

@@ -0,0 +1,110 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_ILDB_Cntrl_Data.asm
//
// This module loads AVC ILDB control data for one MB.
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// CNTRL_DATA_D: CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 8 GRFs
//
// Binding table index:
// BI_CNTRL_DATA: Binding table index of control data surface
//
//----------------------------------------------------------------
// We need to get control data offset for the bottom MB in mbaff mode.
// That is, get f0.1=1 if MbaffFlag==1 && BotFieldFlag==1
and (1) CTemp1_W:uw BitFields:uw MbaffFlag+BotFieldFlag:uw // Mute all other bits
and.nz.f0.0 (1) null:w BitFields:w CntlDataExpFlag:w // Get CntlDataExpFlag
cmp.e.f0.1 (1) NULLREGW CTemp1_W:uw MbaffFlag+BotFieldFlag:uw // Check mbaff and bot flags
(f0.0) jmpi ILDB_LABEL(READ_BLC_CNTL_DATA)
// On Crestline, MB control data in memory occupy 64 DWs (expanded).
// mov (1) MSGSRC.0<1>:ud 0:w { NoDDClr } // Block origin X
// mov (1) MSGSRC.1<1>:ud CntrlDataOffsetY:ud { NoDDClr, NoDDChk } // Block origin Y
// mov (1) MSGSRC.2<1>:ud 0x000F000F:ud { NoDDChk } // Block width and height (16x16=256 bytes)
mov (2) MSGSRC.0<1>:ud ORIX_CUR<2;2,1>:uw { NoDDClr } // Block origin X,Y
mov (1) MSGSRC.2<1>:ud 0x000F000F:ud { NoDDChk } // Block width and height (16x16=256 bytes)
(f0.1) add (1) MSGSRC.1:ud MSGSRC.1:ud 16:w // +16 to for bottom MB in a pair
send (8) CNTRL_DATA_D(0)<1> MSGHDRY MSGSRC<8;8,1>:ud DAPREAD DWBRMSGDSC_SC+0x00080000+BI_CNTRL_DATA // Receive 8 GRFs
jmpi ILDB_LABEL(READ_CNTL_DATA_DONE)
ILDB_LABEL(READ_BLC_CNTL_DATA):
// On Bearlake-C, MB control data in memory occupy 16 DWs. Data port returns 8 GRFs with expanded control data.
// Global offset
mov (1) MSGSRC.2:ud CntrlDataOffsetY:ud // CntrlDataOffsetY is the global offset
(f0.1) add (1) MSGSRC.2:ud MSGSRC.2:ud 64:w // +64 to the next MB control data (bot MB)
send (8) CNTRL_DATA_D(0)<1> MSGHDRY MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(8)+ILDBRMSGDSC+BI_CNTRL_DATA // Receive 8 GRFs
ILDB_LABEL(READ_CNTL_DATA_DONE):
// End of load_ILDB_Cntrl_Data.asm
// AVC ILDB control data message header format
//DWord Bit Description
//M0.7 31:0 Debug
//M0.6 31:0 Debug
//M0.5 31:8 Ignored
// 7:0 Dispatch ID. // This ID is assigned by the fixed function unit and is a unique identifier for the thread. It is used to free up resources used by the thread upon thread completion.
//M0.4 31:0 Ignored
//M0.3 31:0 Ignored
//M0.2 31:0 Global Offset. Specifies the global byte offset into the buffer.
// This offset must be OWord aligned (bits 3:0 MBZ) Format = U32 Range = [0,FFFFFFF0h]
//M0.1 31:0 Ignored
//M0.0 31:0 Ignored

View File

@@ -0,0 +1,92 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_ILDB_Cntrl_Data_16DW.asm
//
// This module loads AVC ILDB 64DW control data for one MB CTG.
// Dataport expands from 16DW to 64DW.
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// CNTRL_DATA_D: CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 8 GRFs
//
// Binding table index:
// BI_CNTRL_DATA: Binding table index of control data surface
//
//----------------------------------------------------------------
// On CTG, MB control data in memory occupy 16 DWs. Data port returns 8 GRFs with expanded control data.
#if defined(_MBAFF)
// We need to get control data offset for the bottom MB in mbaff mode.
// That is, get f0.1=1 if MbaffFlag==1 && BotFieldFlag==1
// and (1) CTemp1_W:uw BitFields:uw MbaffFlag+BotFieldFlag:uw // Mute all other bits
// cmp.e.f0.1 (1) NULLREGW CTemp1_W:uw MbaffFlag+BotFieldFlag:uw // Check mbaff and bot flags
and.ne.f0.1 (1) NULLREGW BitFields:uw BotFieldFlag:uw
// Global offset
mov (1) MSGSRC.2:ud CntrlDataOffsetY:ud
(f0.1) add (1) MSGSRC.2:ud MSGSRC.2:ud 64:w // +64 to the next MB control data (bot MB)
#endif
send (8) CNTRL_DATA_D(0)<1> MSGHDRY MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(8)+ILDBRMSGDSC+BI_CNTRL_DATA // Receive 8 GRFs
// End of load_ILDB_Cntrl_Data_16DW.asm
// AVC ILDB control data message header format
//DWord Bit Description
//M0.7 31:0 Debug
//M0.6 31:0 Debug
//M0.5 31:8 Ignored
// 7:0 Dispatch ID. // This ID is assigned by the fixed function unit and is a unique identifier for the thread. It is used to free up resources used by the thread upon thread completion.
//M0.4 31:0 Ignored
//M0.3 31:0 Ignored
//M0.2 31:0 Global Offset. Specifies the global byte offset into the buffer.
// This offset must be OWord aligned (bits 3:0 MBZ) Format = U32 Range = [0,FFFFFFF0h]
//M0.1 31:0 Ignored
//M0.0 31:0 Ignored

View File

@@ -0,0 +1,66 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_ILDB_Cntrl_Data_22DW.asm
//
// ********** Apple only module **********
//
// This module loads AVC ILDB 22DW control data for one MB for CLN.
// The reduced control data set is for progressive picture ONLY.
//
// Control data memory layout for each MB is 8x11 = 88 bytes.
// It ocuppies 3 GRFs after reading in.
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// CNTRL_DATA_D: CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 3 GRFs
//
// Binding table index:
// BI_CNTRL_DATA: Binding table index of control data surface
//
//----------------------------------------------------------------
mul (1) MSGSRC.0<1>:ud ORIX:uw 8:uw { NoDDClr } // Block origin X
mul (1) MSGSRC.1<1>:ud ORIY:uw 11:uw { NoDDClr, NoDDChk } // Block origin Y
mov (1) MSGSRC.2<1>:ud 0x000A0007:ud { NoDDChk } // Block width and height (8x11=88 bytes)
send (8) CNTRL_DATA_D(0)<1> MSGHDRY MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(3)+DWBRMSGDSC_SC+BI_CNTRL_DATA // Receive 3 GRFs
// End of load_ILDB_Cntrl_Data_22DW.asm

View File

@@ -0,0 +1,72 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_ILDB_Cntrl_Data_64DW.asm
//
// This module loads AVC ILDB 64DW control data for one MB for CLN.
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// CNTRL_DATA_D: CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 8 GRFs
//
// Binding table index:
// BI_CNTRL_DATA: Binding table index of control data surface
//
//----------------------------------------------------------------
// On CLN, MB control data in memory occupy 64 DWs.
#if defined(_MBAFF)
// We need to get control data offset for the bottom MB in mbaff mode.
// That is, set f0.1=1 if MbaffFlag==1 && BotFieldFlag==1
and (1) acc0.0:uw BitFields:uw MbaffFlag+BotFieldFlag:uw // Mute all other bits
cmp.e.f0.1 (1) NULLREGW acc0.0:uw MbaffFlag+BotFieldFlag:uw // Check mbaff and bot flags
#endif // CTemp1_W
mov (2) MSGSRC.0<1>:ud ORIX_CUR<2;2,1>:uw { NoDDClr } // Block origin X,Y
mov (1) MSGSRC.2<1>:ud 0x000F000F:ud { NoDDChk } // Block width and height (16x16=256 bytes)
#if defined(_MBAFF)
(f0.1) add (1) MSGSRC.1:ud MSGSRC.1:ud 16:w // +16 to the bottom MB control data (bot MB)
#endif
send (8) CNTRL_DATA_D(0)<1> MSGHDRY MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(8)+DWBRMSGDSC_SC+BI_CNTRL_DATA // Receive 8 GRFs
// End of load_ILDB_Cntrl_Data_64DW.asm

View File

@@ -0,0 +1,98 @@
INTEL_ILDB_INC = \
AVC_ILDB.inc \
Child_Undefs.inc \
ILDB_header.inc \
Root_Undefs.inc \
$(NULL)
INTEL_ILDB_ASM = \
AVC_ILDB_Child_Field_UV.asm \
AVC_ILDB_Child_Field_Y.asm \
AVC_ILDB_Child_Mbaff_UV.asm \
AVC_ILDB_Child_Mbaff_Y.asm \
AVC_ILDB_Child_UV.asm \
AVC_ILDB_Child_Y.asm \
AVC_ILDB_Chroma_Core.asm \
AVC_ILDB_Chroma_Core_Mbaff.asm \
AVC_ILDB_CloseGateway.asm \
AVC_ILDB_Dep_Check.asm \
AVC_ILDB_Filter_Mbaff_UV_h.asm \
AVC_ILDB_Filter_Mbaff_UV_v.asm \
AVC_ILDB_Filter_Mbaff_Y_h.asm \
AVC_ILDB_Filter_Mbaff_Y_v.asm \
AVC_ILDB_Filter_UV_h.asm \
AVC_ILDB_Filter_UV_v.asm \
AVC_ILDB_Filter_Y_h.asm \
AVC_ILDB_Filter_Y_v.asm \
AVC_ILDB_ForwardMsg.asm \
AVC_ILDB_LumaThrdLimit.asm \
AVC_ILDB_Luma_Core.asm \
AVC_ILDB_Luma_Core_Mbaff.asm \
AVC_ILDB_OpenGateway.asm \
AVC_ILDB_Root_Field_UV.asm \
AVC_ILDB_Root_Field_Y.asm \
AVC_ILDB_Root_Mbaff_UV.asm \
AVC_ILDB_Root_Mbaff_Y.asm \
AVC_ILDB_Root_UV.asm \
AVC_ILDB_Root_Y.asm \
AVC_ILDB_Spawn.asm \
AVC_ILDB_SpawnChild.asm \
AVC_ILDB_SpawnChromaRoot.asm \
Load_ILDB_Cntrl_Data.asm \
Load_ILDB_Cntrl_Data_16DW.asm \
Load_ILDB_Cntrl_Data_22DW.asm \
Load_ILDB_Cntrl_Data_64DW.asm \
SetupVPKernel.asm \
TransposeNV12_16x16.asm \
TransposeNV12_4x16.asm \
Transpose_Cur_UV_2x8.asm \
Transpose_Cur_UV_8x8.asm \
Transpose_Cur_UV_Right_Most_2x8.asm \
Transpose_Cur_Y_16x16.asm \
Transpose_Cur_Y_4x16.asm \
Transpose_Cur_Y_Right_Most_4x16.asm \
Transpose_Left_UV_2x8.asm \
Transpose_Left_Y_4x16.asm \
loadNV12_16x16T.asm \
loadNV12_16x4.asm \
load_Cur_UV_8x8T.asm \
load_Cur_UV_8x8T_Mbaff.asm \
load_Cur_UV_Right_Most_2x8.asm \
load_Cur_Y_16x16T.asm \
load_Cur_Y_16x16T_Mbaff.asm \
load_Cur_Y_Right_Most_4x16.asm \
load_Left_UV_2x8T.asm \
load_Left_UV_2x8T_Mbaff.asm \
load_Left_Y_4x16T.asm \
load_Left_Y_4x16T_Mbaff.asm \
load_Top_UV_8x2.asm \
load_Top_UV_8x2_Mbaff.asm \
load_Top_Y_16x4.asm \
load_Top_Y_16x4_Mbaff.asm \
saveNV12_16x16.asm \
saveNV12_16x4.asm \
saveNV12_16x4T.asm \
save_Cur_UV_8x8.asm \
save_Cur_UV_8x8_Mbaff.asm \
save_Cur_Y_16x16.asm \
save_Cur_Y_16x16_Mbaff.asm \
save_Left_UV_8x2T.asm \
save_Left_UV_8x2T_Mbaff.asm \
save_Left_Y_16x4T.asm \
save_Left_Y_16x4T_Mbaff.asm \
save_Top_UV_8x2.asm \
save_Top_UV_8x2_Mbaff.asm \
save_Top_Y_16x4.asm \
save_Top_Y_16x4_Mbaff.asm \
writeURB.asm \
writeURB_UV_Child.asm \
writeURB_Y_Child.asm \
$(NULL)
EXTRA_DIST = \
$(INTEL_ILDB_ASM) \
$(INTEL_ILDB_INC) \
$(NULL)
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in

View File

@@ -0,0 +1,548 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = src/shaders/h264/ildb
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/src/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
SOURCES =
DIST_SOURCES =
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DRM_CFLAGS = @DRM_CFLAGS@
DRM_LIBS = @DRM_LIBS@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGL_CFLAGS = @EGL_CFLAGS@
EGL_LIBS = @EGL_LIBS@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GEN4ASM = @GEN4ASM@
GEN4ASM_CFLAGS = @GEN4ASM_CFLAGS@
GEN4ASM_LIBS = @GEN4ASM_LIBS@
GIT = @GIT@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INTEL_DRIVER_LT_LDFLAGS = @INTEL_DRIVER_LT_LDFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBDRM_VERSION = @LIBDRM_VERSION@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIBVA_DEPS_CFLAGS = @LIBVA_DEPS_CFLAGS@
LIBVA_DEPS_LIBS = @LIBVA_DEPS_LIBS@
LIBVA_DRIVERS_PATH = @LIBVA_DRIVERS_PATH@
LIBVA_DRM_DEPS_CFLAGS = @LIBVA_DRM_DEPS_CFLAGS@
LIBVA_DRM_DEPS_LIBS = @LIBVA_DRM_DEPS_LIBS@
LIBVA_PACKAGE_VERSION = @LIBVA_PACKAGE_VERSION@
LIBVA_WAYLAND_DEPS_CFLAGS = @LIBVA_WAYLAND_DEPS_CFLAGS@
LIBVA_WAYLAND_DEPS_LIBS = @LIBVA_WAYLAND_DEPS_LIBS@
LIBVA_X11_DEPS_CFLAGS = @LIBVA_X11_DEPS_CFLAGS@
LIBVA_X11_DEPS_LIBS = @LIBVA_X11_DEPS_LIBS@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
PYTHON2 = @PYTHON2@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
WAYLAND_LIBS = @WAYLAND_LIBS@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
wayland_protocoldir = @wayland_protocoldir@
wayland_scanner = @wayland_scanner@
INTEL_ILDB_INC = \
AVC_ILDB.inc \
Child_Undefs.inc \
ILDB_header.inc \
Root_Undefs.inc \
$(NULL)
INTEL_ILDB_ASM = \
AVC_ILDB_Child_Field_UV.asm \
AVC_ILDB_Child_Field_Y.asm \
AVC_ILDB_Child_Mbaff_UV.asm \
AVC_ILDB_Child_Mbaff_Y.asm \
AVC_ILDB_Child_UV.asm \
AVC_ILDB_Child_Y.asm \
AVC_ILDB_Chroma_Core.asm \
AVC_ILDB_Chroma_Core_Mbaff.asm \
AVC_ILDB_CloseGateway.asm \
AVC_ILDB_Dep_Check.asm \
AVC_ILDB_Filter_Mbaff_UV_h.asm \
AVC_ILDB_Filter_Mbaff_UV_v.asm \
AVC_ILDB_Filter_Mbaff_Y_h.asm \
AVC_ILDB_Filter_Mbaff_Y_v.asm \
AVC_ILDB_Filter_UV_h.asm \
AVC_ILDB_Filter_UV_v.asm \
AVC_ILDB_Filter_Y_h.asm \
AVC_ILDB_Filter_Y_v.asm \
AVC_ILDB_ForwardMsg.asm \
AVC_ILDB_LumaThrdLimit.asm \
AVC_ILDB_Luma_Core.asm \
AVC_ILDB_Luma_Core_Mbaff.asm \
AVC_ILDB_OpenGateway.asm \
AVC_ILDB_Root_Field_UV.asm \
AVC_ILDB_Root_Field_Y.asm \
AVC_ILDB_Root_Mbaff_UV.asm \
AVC_ILDB_Root_Mbaff_Y.asm \
AVC_ILDB_Root_UV.asm \
AVC_ILDB_Root_Y.asm \
AVC_ILDB_Spawn.asm \
AVC_ILDB_SpawnChild.asm \
AVC_ILDB_SpawnChromaRoot.asm \
Load_ILDB_Cntrl_Data.asm \
Load_ILDB_Cntrl_Data_16DW.asm \
Load_ILDB_Cntrl_Data_22DW.asm \
Load_ILDB_Cntrl_Data_64DW.asm \
SetupVPKernel.asm \
TransposeNV12_16x16.asm \
TransposeNV12_4x16.asm \
Transpose_Cur_UV_2x8.asm \
Transpose_Cur_UV_8x8.asm \
Transpose_Cur_UV_Right_Most_2x8.asm \
Transpose_Cur_Y_16x16.asm \
Transpose_Cur_Y_4x16.asm \
Transpose_Cur_Y_Right_Most_4x16.asm \
Transpose_Left_UV_2x8.asm \
Transpose_Left_Y_4x16.asm \
loadNV12_16x16T.asm \
loadNV12_16x4.asm \
load_Cur_UV_8x8T.asm \
load_Cur_UV_8x8T_Mbaff.asm \
load_Cur_UV_Right_Most_2x8.asm \
load_Cur_Y_16x16T.asm \
load_Cur_Y_16x16T_Mbaff.asm \
load_Cur_Y_Right_Most_4x16.asm \
load_Left_UV_2x8T.asm \
load_Left_UV_2x8T_Mbaff.asm \
load_Left_Y_4x16T.asm \
load_Left_Y_4x16T_Mbaff.asm \
load_Top_UV_8x2.asm \
load_Top_UV_8x2_Mbaff.asm \
load_Top_Y_16x4.asm \
load_Top_Y_16x4_Mbaff.asm \
saveNV12_16x16.asm \
saveNV12_16x4.asm \
saveNV12_16x4T.asm \
save_Cur_UV_8x8.asm \
save_Cur_UV_8x8_Mbaff.asm \
save_Cur_Y_16x16.asm \
save_Cur_Y_16x16_Mbaff.asm \
save_Left_UV_8x2T.asm \
save_Left_UV_8x2T_Mbaff.asm \
save_Left_Y_16x4T.asm \
save_Left_Y_16x4T_Mbaff.asm \
save_Top_UV_8x2.asm \
save_Top_UV_8x2_Mbaff.asm \
save_Top_Y_16x4.asm \
save_Top_Y_16x4_Mbaff.asm \
writeURB.asm \
writeURB_UV_Child.asm \
writeURB_Y_Child.asm \
$(NULL)
EXTRA_DIST = \
$(INTEL_ILDB_ASM) \
$(INTEL_ILDB_INC) \
$(NULL)
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in
all: all-am
.SUFFIXES:
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/shaders/h264/ildb/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu src/shaders/h264/ildb/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
tags TAGS:
ctags CTAGS:
cscope cscopelist:
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
clean: clean-am
clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-am
-rm -f Makefile
distclean-am: clean-am distclean-generic
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: all all-am check check-am clean clean-generic clean-libtool \
cscopelist-am ctags-am distclean distclean-generic \
distclean-libtool distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags-am uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@@ -0,0 +1,57 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Modual name: Root_Undefs.inc
//
// Undefine global symbols for new process in root thread
//
#undef READ_BI
#undef WRITE_BI
#undef ILDB_H_INDEPENDENT
#undef ILDB_H_INDEPENDENT_CONT
#undef ILDB_H_DEPENDENT
#undef ILDB_H_DEPENDENT_SCAN
#undef ILDB_H_NO_DEPENDENT
#undef ILDB_V_INDEPENDENT
#undef ILDB_V_INDEPENDENT_CONT
#undef ILDB_V_DEPENDENT
#undef ILDB_V_DEPENDENT_SCAN
#undef ILDB_V_NO_DEPENDENT

View File

@@ -0,0 +1,54 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Modual name: SetupVPKernel.asm
//
// Initial setup for running video-processing kernels
//
#include "ILDB_header.inc"
//
// Now, begin source code....
//
.code
mov (8) MSGSRC.0<1>:ud r0.0<8;8,1>:ud // Initialize message payload header with R0
// End of SetupVPKernel

View File

@@ -0,0 +1,165 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////
// Module name: TransposeNV12_16x16.asm
//
// Transpose a 16x16 NV12 MB. The output is also in NV12
//
//----------------------------------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region is :ub
// SRC_YB: SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
// SRC_UW: SRC_UW Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw // 4 GRFs
//
// Temp buffer:
// BUF_B: BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
// BUF_W: BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw // 4 GRFs
//
//////////////////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDDA:w
#endif
// Transpose Y (16x16 bytes)
// The first step
mov (16) BUF_B(0,0)<1> SRC_YB(0,0)<16;4,1>
mov (16) BUF_B(0,16)<1> SRC_YB(2,0)<16;4,1>
mov (16) BUF_B(1,0)<1> SRC_YB(4,0)<16;4,1>
mov (16) BUF_B(1,16)<1> SRC_YB(6,0)<16;4,1>
mov (16) BUF_B(2,0)<1> SRC_YB(0,4)<16;4,1>
mov (16) BUF_B(2,16)<1> SRC_YB(2,4)<16;4,1>
mov (16) BUF_B(3,0)<1> SRC_YB(4,4)<16;4,1>
mov (16) BUF_B(3,16)<1> SRC_YB(6,4)<16;4,1>
mov (16) BUF_B(4,0)<1> SRC_YB(0,8)<16;4,1>
mov (16) BUF_B(4,16)<1> SRC_YB(2,8)<16;4,1>
mov (16) BUF_B(5,0)<1> SRC_YB(4,8)<16;4,1>
mov (16) BUF_B(5,16)<1> SRC_YB(6,8)<16;4,1>
mov (16) BUF_B(6,0)<1> SRC_YB(0,12)<16;4,1>
mov (16) BUF_B(6,16)<1> SRC_YB(2,12)<16;4,1>
mov (16) BUF_B(7,0)<1> SRC_YB(4,12)<16;4,1>
mov (16) BUF_B(7,16)<1> SRC_YB(6,12)<16;4,1>
// The second step
mov (16) SRC_YB(0,0)<1> BUF_B(0,0)<32;8,4>
mov (16) SRC_YB(0,16)<1> BUF_B(0,1)<32;8,4>
mov (16) SRC_YB(1,0)<1> BUF_B(0,2)<32;8,4>
mov (16) SRC_YB(1,16)<1> BUF_B(0,3)<32;8,4>
mov (16) SRC_YB(2,0)<1> BUF_B(2,0)<32;8,4>
mov (16) SRC_YB(2,16)<1> BUF_B(2,1)<32;8,4>
mov (16) SRC_YB(3,0)<1> BUF_B(2,2)<32;8,4>
mov (16) SRC_YB(3,16)<1> BUF_B(2,3)<32;8,4>
mov (16) SRC_YB(4,0)<1> BUF_B(4,0)<32;8,4>
mov (16) SRC_YB(4,16)<1> BUF_B(4,1)<32;8,4>
mov (16) SRC_YB(5,0)<1> BUF_B(4,2)<32;8,4>
mov (16) SRC_YB(5,16)<1> BUF_B(4,3)<32;8,4>
mov (16) SRC_YB(6,0)<1> BUF_B(6,0)<32;8,4>
mov (16) SRC_YB(6,16)<1> BUF_B(6,1)<32;8,4>
mov (16) SRC_YB(7,0)<1> BUF_B(6,2)<32;8,4>
mov (16) SRC_YB(7,16)<1> BUF_B(6,3)<32;8,4>
// Y is transposed.
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Src U and V are mixed in NV12 format. U on even bytes, V on odd bytes.
// Transpose by treating UV pair as a word.
// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// First step (16) <1>:w <==== <8;4,1>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |33 33 32 32 31 31 30 30 23 23 22 22 21 21 20 20 13 13 12 12 11 11 10 10 03 03 02 02 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 73 72 72 71 71 70 70 63 63 62 62 61 61 60 60 53 53 52 52 51 51 50 50 43 43 42 42 41 41 40 40|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |37 37 36 36 35 35 34 34 27 27 26 26 25 25 24 24 17 17 16 16 15 15 14 14 07 07 06 06 05 05 04 04|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 76 76 75 75 74 74 67 67 66 66 65 65 64 64 57 57 56 56 55 55 54 54 47 47 46 46 45 45 44 44|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// Transpose UV (8x8 words), The first step
mov (16) BUF_W(0,0)<1> SRC_UW(0,0)<8;4,1>
mov (16) BUF_W(1,0)<1> SRC_UW(2,0)<8;4,1>
mov (16) BUF_W(2,0)<1> SRC_UW(0,4)<8;4,1>
mov (16) BUF_W(3,0)<1> SRC_UW(2,4)<8;4,1>
// Second step (16) <1>:w <=== <16;4,4>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 73 63 63 53 53 43 43 33 33 23 23 13 13 03 03 72 72 62 62 52 52 42 42 32 32 22 22 12 12 02 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |75 75 65 65 55 55 45 45 35 35 25 25 15 15 05 05 74 74 64 64 54 54 44 44 34 34 24 24 14 14 04 04|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 67 67 57 57 47 47 37 37 27 27 17 17 07 07 76 76 66 66 56 56 46 46 36 36 26 26 16 16 06 06|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// Transpose UV (8x8 words), The second step
mov (8) SRC_UW(0,0)<1> BUF_W(0,0)<16;4,4>
mov (8) SRC_UW(0,8)<1> BUF_W(0,1)<16;4,4>
mov (8) SRC_UW(1,0)<1> BUF_W(0,2)<16;4,4>
mov (8) SRC_UW(1,8)<1> BUF_W(0,3)<16;4,4>
mov (8) SRC_UW(2,0)<1> BUF_W(2,0)<16;4,4>
mov (8) SRC_UW(2,8)<1> BUF_W(2,1)<16;4,4>
mov (8) SRC_UW(3,0)<1> BUF_W(2,2)<16;4,4>
mov (8) SRC_UW(3,8)<1> BUF_W(2,3)<16;4,4>
// U and V are now transposed and separated.

View File

@@ -0,0 +1,124 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////
// Module name: TransposeNV12_4x16.asm
//
// Transpose a 4x16 internal planar to 16x4 internal planar block
//
//----------------------------------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region is :ub
// SRC_YB: SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
// SRC_UW: SRC_UB Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw // 4 GRFs
//
// Temp buffer:
// BUF_B: BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
// BUF_W: BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw // 4 GRFs
//
//////////////////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDDB:w
#endif
// Transpose Y (4x16) right most 4 columns
// The first step
mov (16) BUF_B(0,0)<1> SRC_YB(0,0)<16;4,1> // Read 2 rows, write 1 row
mov (16) BUF_B(0,16)<1> SRC_YB(2,0)<16;4,1>
mov (16) BUF_B(1,0)<1> SRC_YB(4,0)<16;4,1>
mov (16) BUF_B(1,16)<1> SRC_YB(6,0)<16;4,1>
// The second step
mov (16) BUF_B(2,0)<1> BUF_B(0,0)<32;8,4> // Read 2 rows, write 1 row
mov (16) BUF_B(2,16)<1> BUF_B(0,1)<32;8,4>
mov (16) BUF_B(3,0)<1> BUF_B(0,2)<32;8,4>
mov (16) BUF_B(3,16)<1> BUF_B(0,3)<32;8,4>
// Y is now transposed. the result is in BUF_B(2) and BUF_B(3).
// Transpose UV (4x8), right most 2 columns in word
// Use BUF_W(0) as temp buf
// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// First step (8) <1>:w <==== <8;2,1>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (8) BUF_W(0,0)<1> SRC_UW(0,0)<8;2,1>
mov (8) BUF_W(0,8)<1> SRC_UW(2,0)<8;2,1>
// Second step (16) <1>:w <==== <1;8,2>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (16) BUF_W(1,0)<1> BUF_W(0,0)<1;8,2>
// UV are now transposed. the result is in BUF_W(1).
//The first step
//mov (16) BUF_B(0,0)<1> SRC_UW(0,0)<8;2,1> // Read 2 rows, write 1 row
// The second step
//mov (8) SRC_UB(4,0)<1> BUF_B(0,0)<16;8,2> // Read 1 row, write 1 row
//mov (8) SRC_UB(4,8)<1> BUF_B(0,1)<16;8,2> // Read 1 row, write 1 row
// Transpose V (8x8), right most 2 columns
// The first step
//mov (16) BUF_B(0,0)<1> SRC_VB(0,1)<8;2,1> // Read 2 rows, write 1 row
// The second step
//mov (8) SRC_UB(4,16)<1> BUF_B(0,0)<16;8,2> // Read 1 row, write 1 row
//mov (8) SRC_UB(4,24)<1> BUF_B(0,1)<16;8,2> // Read 1 row, write 1 row
// U and V are now transposed. the result is in BUF_B(4).

View File

@@ -0,0 +1,86 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////
// Module name: Transpose_UV_2x8.asm
//
// Transpose UV 2x8 to 8x2 block (2x8U + 2x8V in NV12)
//
//----------------------------------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region is :ub
// SRC_UW: SRC_UB Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw // 4 GRFs
//
// Temp buffer:
// BUF_W: BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw // 4 GRFs
//
//////////////////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDDB:w
#endif
// Transpose UV (4x8), right most 2 columns in word
// Use BUF_W(0) as temp buf
// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// First step (8) <1>:w <==== <8;2,1>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 76 76 67 67 66 66 57 57 56 56 47 47 46 46 37 37 36 36 27 27 26 26 17 17 16 16 07 07 06 06|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (8) LEFT_TEMP_W(0,0)<1> SRC_UW(0,6)<8;2,1> { NoDDClr }
mov (8) LEFT_TEMP_W(0,8)<1> SRC_UW(2,6)<8;2,1> { NoDDChk }
// Second step (16) <1>:w <==== <1;8,2>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 67 67 57 57 47 47 37 37 27 27 17 17 07 07 76 76 66 66 56 56 46 46 36 36 26 26 16 16 06 06|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (16) LEFT_TEMP_W(1,0)<1> LEFT_TEMP_W(0,0)<1;8,2>
// UV are now transposed. the result is in BUF_W(1)

View File

@@ -0,0 +1,115 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////
// Module name: Transpose_UV_8x8.asm
//
// Transpose a 8x8 UV block. (8x8U + 8x8V) The output is also in NV12
//
//----------------------------------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region is :ub
// SRC_UW: SRC_UW Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw // 4 GRFs
//
// Temp buffer:
// BUF_W: BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw // 4 GRFs
//
//////////////////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDDA:w
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Src U and V are mixed in NV12 format. U on even bytes, V on odd bytes.
// Transpose by treating UV pair as a word.
// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// First step (16) <1>:w <==== <8;4,1>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |33 33 32 32 31 31 30 30 23 23 22 22 21 21 20 20 13 13 12 12 11 11 10 10 03 03 02 02 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 73 72 72 71 71 70 70 63 63 62 62 61 61 60 60 53 53 52 52 51 51 50 50 43 43 42 42 41 41 40 40|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |37 37 36 36 35 35 34 34 27 27 26 26 25 25 24 24 17 17 16 16 15 15 14 14 07 07 06 06 05 05 04 04|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 76 76 75 75 74 74 67 67 66 66 65 65 64 64 57 57 56 56 55 55 54 54 47 47 46 46 45 45 44 44|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// Transpose UV (8x8 words), The first step
mov (16) CUR_TEMP_W(0,0)<1> SRC_UW(0,0)<8;4,1>
mov (16) CUR_TEMP_W(1,0)<1> SRC_UW(2,0)<8;4,1>
mov (16) CUR_TEMP_W(2,0)<1> SRC_UW(0,4)<8;4,1>
mov (16) CUR_TEMP_W(3,0)<1> SRC_UW(2,4)<8;4,1>
// Second step (16) <1>:w <=== <16;4,4>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 73 63 63 53 53 43 43 33 33 23 23 13 13 03 03 72 72 62 62 52 52 42 42 32 32 22 22 12 12 02 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |75 75 65 65 55 55 45 45 35 35 25 25 15 15 05 05 74 74 64 64 54 54 44 44 34 34 24 24 14 14 04 04|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |77 77 67 67 57 57 47 47 37 37 27 27 17 17 07 07 76 76 66 66 56 56 46 46 36 36 26 26 16 16 06 06|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// Transpose UV (8x8 words), The second step
mov (8) SRC_UW(0,0)<1> CUR_TEMP_W(0,0)<16;4,4> { NoDDClr }
mov (8) SRC_UW(0,8)<1> CUR_TEMP_W(0,1)<16;4,4> { NoDDChk }
mov (8) SRC_UW(1,0)<1> CUR_TEMP_W(0,2)<16;4,4> { NoDDClr }
mov (8) SRC_UW(1,8)<1> CUR_TEMP_W(0,3)<16;4,4> { NoDDChk }
mov (8) SRC_UW(2,0)<1> CUR_TEMP_W(2,0)<16;4,4> { NoDDClr }
mov (8) SRC_UW(2,8)<1> CUR_TEMP_W(2,1)<16;4,4> { NoDDChk }
mov (8) SRC_UW(3,0)<1> CUR_TEMP_W(2,2)<16;4,4> { NoDDClr }
mov (8) SRC_UW(3,8)<1> CUR_TEMP_W(2,3)<16;4,4> { NoDDChk }
// U and V are now transposed and separated.

View File

@@ -0,0 +1,55 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Transpose Cur MB Right Most 2x8 to 8x2
// Assume source is LEFT_TEMP_W(0), and detination is LEFT_TEMP_W(1)
// Input from dport for transpose:
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// Output of transpose: <1> <=== <16;8,2>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// mov (8) LEFT_TEMP_W(1,0)<1> LEFT_TEMP_W(0,0)<16;8,2> { NoDDClr }
// mov (8) LEFT_TEMP_W(1,8)<1> LEFT_TEMP_W(0,1)<16;8,2> { NoDDChk }
mov (16) LEFT_TEMP_W(1,0)<1> LEFT_TEMP_W(0,0)<1;8,2>

View File

@@ -0,0 +1,104 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////
// Module name: Transpose_Y_16x16.asm
//
// Transpose Y 16x16 block.
//
//----------------------------------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region is :ub
// SRC_YB: SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
//
// Temp buffer:
// CUR_TEMP_B: BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
//
//////////////////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDDA:w
#endif
// Transpose Y (16x16 bytes)
// The first step
mov (16) CUR_TEMP_B(0,0)<1> SRC_YB(0,0)<16;4,1> { NoDDClr }
mov (16) CUR_TEMP_B(0,16)<1> SRC_YB(2,0)<16;4,1> { NoDDChk }
mov (16) CUR_TEMP_B(1,0)<1> SRC_YB(4,0)<16;4,1> { NoDDClr }
mov (16) CUR_TEMP_B(1,16)<1> SRC_YB(6,0)<16;4,1> { NoDDChk }
mov (16) CUR_TEMP_B(2,0)<1> SRC_YB(0,4)<16;4,1> { NoDDClr }
mov (16) CUR_TEMP_B(2,16)<1> SRC_YB(2,4)<16;4,1> { NoDDChk }
mov (16) CUR_TEMP_B(3,0)<1> SRC_YB(4,4)<16;4,1> { NoDDClr }
mov (16) CUR_TEMP_B(3,16)<1> SRC_YB(6,4)<16;4,1> { NoDDChk }
mov (16) CUR_TEMP_B(4,0)<1> SRC_YB(0,8)<16;4,1> { NoDDClr }
mov (16) CUR_TEMP_B(4,16)<1> SRC_YB(2,8)<16;4,1> { NoDDChk }
mov (16) CUR_TEMP_B(5,0)<1> SRC_YB(4,8)<16;4,1> { NoDDClr }
mov (16) CUR_TEMP_B(5,16)<1> SRC_YB(6,8)<16;4,1> { NoDDChk }
mov (16) CUR_TEMP_B(6,0)<1> SRC_YB(0,12)<16;4,1> { NoDDClr }
mov (16) CUR_TEMP_B(6,16)<1> SRC_YB(2,12)<16;4,1> { NoDDChk }
mov (16) CUR_TEMP_B(7,0)<1> SRC_YB(4,12)<16;4,1> { NoDDClr }
mov (16) CUR_TEMP_B(7,16)<1> SRC_YB(6,12)<16;4,1> { NoDDChk }
// The second step
mov (16) SRC_YB(0,0)<1> CUR_TEMP_B(0,0)<32;8,4> { NoDDClr }
mov (16) SRC_YB(0,16)<1> CUR_TEMP_B(0,1)<32;8,4> { NoDDChk }
mov (16) SRC_YB(1,0)<1> CUR_TEMP_B(0,2)<32;8,4> { NoDDClr }
mov (16) SRC_YB(1,16)<1> CUR_TEMP_B(0,3)<32;8,4> { NoDDChk }
mov (16) SRC_YB(2,0)<1> CUR_TEMP_B(2,0)<32;8,4> { NoDDClr }
mov (16) SRC_YB(2,16)<1> CUR_TEMP_B(2,1)<32;8,4> { NoDDChk }
mov (16) SRC_YB(3,0)<1> CUR_TEMP_B(2,2)<32;8,4> { NoDDClr }
mov (16) SRC_YB(3,16)<1> CUR_TEMP_B(2,3)<32;8,4> { NoDDChk }
mov (16) SRC_YB(4,0)<1> CUR_TEMP_B(4,0)<32;8,4> { NoDDClr }
mov (16) SRC_YB(4,16)<1> CUR_TEMP_B(4,1)<32;8,4> { NoDDChk }
mov (16) SRC_YB(5,0)<1> CUR_TEMP_B(4,2)<32;8,4> { NoDDClr }
mov (16) SRC_YB(5,16)<1> CUR_TEMP_B(4,3)<32;8,4> { NoDDChk }
mov (16) SRC_YB(6,0)<1> CUR_TEMP_B(6,0)<32;8,4> { NoDDClr }
mov (16) SRC_YB(6,16)<1> CUR_TEMP_B(6,1)<32;8,4> { NoDDChk }
mov (16) SRC_YB(7,0)<1> CUR_TEMP_B(6,2)<32;8,4> { NoDDClr }
mov (16) SRC_YB(7,16)<1> CUR_TEMP_B(6,3)<32;8,4> { NoDDChk }
// Y is transposed.

View File

@@ -0,0 +1,105 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////
// Module name: Transpose_Y_4x16.asm
//
// Transpose a 4x16 internal planar to 16x4 internal planar block.
// The src block is 16x16. Right moft 4 columns are transposed.
//
//----------------------------------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region is :ub
// SRC_YB: SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
//
// Temp buffer:
// BUF_B: BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
//
//////////////////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDDB:w
#endif
// Transpose Y (4x16) right most 4 columns
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |3f 3e 3d 3c 3b 3a 39 38 37 36 35 34 33 32 31 30 2f 2e 2d 2c 2b 2a 29 28 27 26 25 24 23 22 21 20|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |5f 5e 5d 5c 5b 5a 59 58 57 56 55 54 53 52 51 50 4f 4e 4d 4c 4b 4a 49 48 47 46 45 44 43 42 41 40|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |7f 7e 7d 7c 7b 7a 79 78 77 76 75 74 73 72 71 70 6f 6e 6d 6c 6b 6a 69 68 67 66 65 64 63 62 61 60|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |9f 9e 9d 9c 9b 9a 99 98 97 96 95 94 93 92 91 90 8f 8e 8d 8c 8b 8a 89 88 87 86 85 84 83 82 81 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |bf be bd bc bb ba b9 b8 b7 b6 b5 b4 b3 b2 b1 b0 af ae ad ac ab aa a9 a8 a7 a6 a5 a4 a3 a2 a1 a0|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |df de dd dc db da d9 d8 d7 d6 d5 d4 d3 d2 d1 d0 cf ce cd cc cb ca c9 c8 c7 c6 c5 c4 c3 c2 c1 c0|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |ff fe fd fc fb fa f9 f8 f7 f6 f5 f4 f3 f2 f1 f0 ef ee ed ec eb ea e9 e8 e7 e6 e5 e4 e3 e2 e1 e0|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// The first step
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |7f 7e 7d 7c 6f 6e 6d 6c 5f 5e 5d 5c 4f 4e 4d 4c 3f 3e 3d 3c 2f 2e 2d 2c 1f 1e 1d 1c 0f 0e 0d 0c|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |ff fe fd fc ef ee ed ec df de dd dc cf ce cd cc bf be bd bc af ae ad ac 9f 9e 9d 9c 8f 8e 8d 8c|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// The second step
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |fd ed dd cd bd ad 9d 8d 7d 6d 5d 4d 3d 2d 1d 0d fc ec dc cc bc ac 9c 8c 7c 6c 5c 4c 3c 2c 1c 0c|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |ff ef df cf bf af 9f 8f 7f 6f 5f 4f 3f 2f 1f 0f fe ee de ce be ae 9e 8e 7e 6e 5e 4e 3e 2e 1e 0e|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (16) LEFT_TEMP_B(0,0)<1> SRC_YB(0,12)<16;4,1> { NoDDClr }
mov (16) LEFT_TEMP_B(0,16)<1> SRC_YB(2,12)<16;4,1> { NoDDChk }
mov (16) LEFT_TEMP_B(1,0)<1> SRC_YB(4,12)<16;4,1> { NoDDClr }
mov (16) LEFT_TEMP_B(1,16)<1> SRC_YB(6,12)<16;4,1> { NoDDChk }
// The second step
mov (16) LEFT_TEMP_B(2,0)<1> LEFT_TEMP_B(0,0)<32;8,4> { NoDDClr }
mov (16) LEFT_TEMP_B(2,16)<1> LEFT_TEMP_B(0,1)<32;8,4> { NoDDChk }
mov (16) LEFT_TEMP_B(3,0)<1> LEFT_TEMP_B(0,2)<32;8,4> { NoDDClr }
mov (16) LEFT_TEMP_B(3,16)<1> LEFT_TEMP_B(0,3)<32;8,4> { NoDDChk }
// Y is now transposed. the result is in LEFT_TEMP_B(2) and LEFT_TEMP_B(3).

View File

@@ -0,0 +1,61 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Transpose cur Y right most 4x16 to 16x4
// Assume source is LEFT_TEMP_B(0), and detination is LEFT_TEMP_B(2)
// Input received from dport:
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// Output of transpose: <1> <= <32;8,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// Transpose the data, also occupy 2 GRFs
mov (16) LEFT_TEMP_B(2)<1> LEFT_TEMP_B(0, 0)<32;8,4> { NoDDClr }
mov (16) LEFT_TEMP_B(2, 16)<1> LEFT_TEMP_B(0, 1)<32;8,4> { NoDDChk }
mov (16) LEFT_TEMP_B(3)<1> LEFT_TEMP_B(0, 2)<32;8,4> { NoDDClr }
mov (16) LEFT_TEMP_B(3, 16)<1> LEFT_TEMP_B(0, 3)<32;8,4> { NoDDChk }

View File

@@ -0,0 +1,58 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Transpose left MB 2x8 to 8x2
// Assume source is LEFT_TEMP_W, and detination is PREV_MB_UW
// Input from dport for transpose:
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// Output of transpose: <1> <=== <16;8,2>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// mov (8) PREV_MB_UW(0,0)<1> BUF_W(0,0)<16;8,2> { NoDDClr }
// mov (8) PREV_MB_UW(0,8)<1> BUF_W(0,1)<16;8,2> { NoDDChk }
// mov (8) PREV_MB_UW(0,0)<1> LEFT_TEMP_W(0,0)<16;8,2> { NoDDClr }
// mov (8) PREV_MB_UW(0,8)<1> LEFT_TEMP_W(0,1)<16;8,2> { NoDDChk }
mov (16) PREV_MB_UW(0,0)<1> LEFT_TEMP_W(0,0)<1;8,2>

View File

@@ -0,0 +1,61 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Transpose left MB 4x16 to 16x4
// Assume source is LEFT_TEMP_B, and detination is PREV_MB_YB
// Input received from dport:
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// Output of transpose: <1> <= <32;8,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// Transpose the data, also occupy 2 GRFs
mov (16) PREV_MB_YB(0)<1> LEFT_TEMP_B(0, 0)<32;8,4> { NoDDClr }
mov (16) PREV_MB_YB(0, 16)<1> LEFT_TEMP_B(0, 1)<32;8,4> { NoDDChk }
mov (16) PREV_MB_YB(1)<1> LEFT_TEMP_B(0, 2)<32;8,4> { NoDDClr }
mov (16) PREV_MB_YB(1, 16)<1> LEFT_TEMP_B(0, 3)<32;8,4> { NoDDChk }

View File

@@ -0,0 +1,83 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: loadNV12_16x16T.asm
//
// Load and transpose NV12 16x16 block
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_YD: SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 8 GRFs
// SRC_UD: SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud (U+V for NV12) // 4 GRFs
//
// Source region is :ub. The same region as :ud region
// SRC_YB: SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 8 GRFs
// SRC_UB: SRC_UB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 2 GRFs
// SRC_VB: SRC_VB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub // 2 GRFs
//
// Binding table index:
// BI_SRC_Y: Binding table index of Y surface
// BI_SRC_UV: Binding table index of UV surface (NV12)
//
// Temp buffer:
// BUF_B: BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD1:w
#endif
// Read Y
mov (2) MSGSRC.0<1>:ud ORIX_CUR<2;2,1>:w // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F000F:ud // Block width and height (16x16)
send (8) SRC_YD(0)<1> MSGHDRY MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(8)+DWBRMSGDSC_RC+BI_SRC_Y // Read 8 GRFs
// Read U+V
asr (1) MSGSRC.1:ud MSGSRC.1:ud 1:w // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2<1>:ud 0x0007000F:ud // NV12 U+V block width and height (16x8)
send (8) SRC_UD(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(4)+DWBRMSGDSC_RC+BI_SRC_UV // Read 4 GRFs
#include "TransposeNV12_16x16.asm"
// #include "Transpose_Y_16x16.asm"
// #include "Transpose_NV12_UV_16x8.asm"
// End of loadNV12_16x16T

View File

@@ -0,0 +1,84 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module Name: Loadnv12_16X4.Asm
//
// Load Nv12 16X4 Block
//
//----------------------------------------------------------------
// Symbols Need To Be Defined Before Including This Module
//
// Source Region In :Ud
// Src_Yd: Src_Yd Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud // 3 Grfs (2 For Y, 1 For U+V)
//
// Source Region Is :Ub. The Same Region As :Ud Region
// Src_Yb: Src_Yb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub // 2 Grfs
// Src_Ub: Src_Ub Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub // 0.5 Grf
// Src_Vb: Src_Vb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub // 0.5 Grf
//
// Binding Table Index:
// Bi_Src_Y: Binding Table Index Of Y Surface
// Bi_Src_UV: Binding Table Index Of UV Surface (Nv12)
//
// Temp Buffer:
// Buf_D: Buf_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud
// Buf_B: Buf_B Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD2:w
#endif
// Read Y
mov (2) MSGSRC.0<1>:ud ORIX<2;2,1>:w // Block origin
mov (1) MSGSRC.2<1>:ud 0x0003000F:ud // Block width and height (16x4)
send (8) PREV_MB_YD(0)<1> MSGHDRY MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(2)+DWBRMSGDSC_RC+BI_SRC_Y // Read 2 GRFs
// Read U+V
asr (1) MSGSRC.1:ud MSGSRC.1:ud 1:w // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2<1>:ud 0x0001000F:ud // NV12 U+V block width and height (16x2)
// Load NV12 U+V tp a temp buf
send (8) BUF_D(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(1)+DWBRMSGDSC_RC+BI_SRC_UV // Read 1 GRF
// Convert NV12 U+V to internal planar U and V and place them right after Y.
// mov (16) SRC_UB(0,0)<1> BUF_B(0,0)<32;16,2>
// mov (16) SRC_VB(0,0)<1> BUF_B(0,1)<32;16,2>
// End of loadNV12_16x4.asm

View File

@@ -0,0 +1,95 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_Cur_UV_8x8T.asm
//
// Load and transpose UV 8x8 block (NV12: 8x8U and 8x8V mixed)
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_UD: SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud (U+V for NV12) // 4 GRFs
//
// Binding table index:
// BI_SRC_UV: Binding table index of UV surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD1:w
#endif
// Read U+V blk
#if defined(_PROGRESSIVE)
mov (1) MSGSRC.0:ud ORIX_CUR:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_CUR:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0007000F:ud { NoDDChk } // NV12 U+V block width and height (16x8 bytes)
//send (8) SRC_UD(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DWBRMSGDSC_SC+0x00040000+BI_SRC_UV
mov (1) MSGDSC RESP_LEN(4)+DWBRMSGDSC_SC+BI_SRC_UV:ud
#endif
#if defined(_FIELD)
// cmp.z.f0.0 (1) NULLREGW PicTypeC:w 0:w // Get pic type flag
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// they are used later in this file
mov (1) MSGSRC.0:ud ORIX_CUR:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_CUR:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0007000F:ud { NoDDChk } // NV12 U+V block width and height (16x8 bytes)
// Set message descriptor
// Frame picture
// (f0.0) mov (1) MSGDSC RESP_LEN(4)+DWBRMSGDSC_SC+BI_SRC_UV:ud // Read 4 GRFs from SRC_UV
// (f0.0) jmpi load_UV_8x8T
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(4)+DWBRMSGDSC_SC_BF+BI_SRC_UV:ud // Read 4 GRFs from SRC_UV bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(4)+DWBRMSGDSC_SC_TF+BI_SRC_UV:ud // Read 4 GRFs from SRC_UV top field
//load_UV_8x8T:
#endif
send (8) SRC_UD(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// #include "Transpose_Cur_UV_8x8.asm"
// End of load_UV_8x8T

View File

@@ -0,0 +1,92 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_Cur_UV_8x8T.asm
//
// Load and transpose UV 8x8 block (NV12: 8x8U and 8x8V mixed)
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_UD: SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud (U+V for NV12) // 4 GRFs
//
// Binding table index:
// BI_SRC_UV: Binding table index of UV surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD1:w
#endif
// FieldModeCurrentMbFlag determines how to access left MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// Read U+V
mov (1) MSGSRC.0:ud ORIX_CUR:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_CUR:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0007000F:ud { NoDDChk } // NV12 U+V block width and height (16x8 bytes)
// Set message descriptor
(f0.0) if (1) ILDB_LABEL(ELSE_UV_8X8T)
// Frame picture
mov (1) MSGDSC RESP_LEN(4)+DWBRMSGDSC_SC+BI_SRC_UV:ud // Read 4 GRFs from SRC_UV
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 8:w // Add vertical offset 8 for bot MB in MBAFF mode
ILDB_LABEL(ELSE_UV_8X8T):
else (1) ILDB_LABEL(ENDIF_UV_8X8T)
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(4)+DWBRMSGDSC_SC_BF+BI_SRC_UV:ud // Read 4 GRFs from SRC_UV bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(4)+DWBRMSGDSC_SC_TF+BI_SRC_UV:ud // Read 4 GRFs from SRC_UV top field
asr (1) MSGSRC.1:d MSGSRC.1:d 1:w // Reduce y by half in field access mode
endif
ILDB_LABEL(ENDIF_UV_8X8T):
send (8) SRC_UD(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// #include "Transpose_Cur_UV_8x8.asm"
// End of load_UV_8x8T

View File

@@ -0,0 +1,91 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module Name: Load_Cur_UV_Right_Most_2X8.Asm
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD0:w
#endif
#if defined(_PROGRESSIVE)
// Read U+V, (UV MB size = 16x8)
add (1) MSGSRC.0:ud ORIX_CUR:w 12:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_CUR:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x00070003:ud { NoDDChk } // NV12 U+V block width and height (4x8)
send (8) LEFT_TEMP_D(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV
#endif
#if defined(_FIELD) || defined(_MBAFF)
// FieldModeCurrentMbFlag determines how to access left MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// Read U+V
add (1) MSGSRC.0:ud ORIX_CUR:w 12:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_CUR:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x00070003:ud { NoDDChk } // NV12 U+V block width and height (4x8)
// Load NV12 U+V
// Set message descriptor
(f0.0) if (1) ILDB_LABEL(ELSE_Y_2x8T)
// Frame picture
mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud // Read 1 GRF from SRC_UV
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 8:w // Add vertical offset 8 for bot MB in MBAFF mode
ILDB_LABEL(ELSE_Y_2x8T):
else (1) ILDB_LABEL(ENDIF_Y_2x8T)
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y top field
endif
ILDB_LABEL(ENDIF_Y_2x8T):
// Read 1 GRF from DEST surface as the above MB has been deblocked.
// send (8) BUF_D(0)<1> MSGHDRU MSGSRC<8;8,1>:ud MSGDSC
send (8) LEFT_TEMP_D(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD MSGDSC
#endif

View File

@@ -0,0 +1,93 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_Y_16x16T.asm
//
// Load and transpose Y 16x16 block
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_YD: SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 8 GRFs
//
// Binding table index:
// BI_SRC_Y: Binding table index of Y surface
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD1:w
#endif
// Read Y
#if defined(_PROGRESSIVE)
mov (2) MSGSRC.0<1>:ud ORIX_CUR<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F000F:ud { NoDDChk } // Block width and height (16x16)
//send (8) SRC_YD(0)<1> MSGHDRC MSGSRC<8;8,1>:ud DWBRMSGDSC_SMPLR+0x00080000+BI_SRC_Y
mov (1) MSGDSC RESP_LEN(8)+DWBRMSGDSC_SC+BI_SRC_Y:ud
#endif
#if defined(_FIELD)
// cmp.z.f0.0 (1) NULLREGW PicTypeC:w 0:w // Get pic type flag
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// they are used later in this file
mov (2) MSGSRC.0<1>:ud ORIX_CUR<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F000F:ud { NoDDChk } // Block width and height (16x16)
// Set message descriptor
// Frame picture
// (f0.0) mov (1) MSGDSC RESP_LEN(8)+DWBRMSGDSC_SC+BI_SRC_Y:ud // Read 8 GRFs from SRC_Y
// (f0.0) jmpi load_Y_16x16T
// Non frame picture
(f0.1) mov (1) MSGDSC RESP_LEN(8)+DWBRMSGDSC_SC_BF+BI_SRC_Y:ud // Read 8 GRFs from SRC_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(8)+DWBRMSGDSC_SC_TF+BI_SRC_Y:ud // Read 8 GRFs from SRC_Y top field
//load_Y_16x16T:
#endif
send (8) SRC_YD(0)<1> MSGHDRC MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// #include "Transpose_Cur_Y_16x16.asm"
// End of load_Y_16x16T

View File

@@ -0,0 +1,92 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_Y_16x16T.asm
//
// Load and transpose Y 16x16 block
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_YD: SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 8 GRFs
//
// Binding table index:
// BI_SRC_Y: Binding table index of Y surface
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD1:w
#endif
// FieldModeCurrentMbFlag determines how to access left MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// Read Y
mov (2) MSGSRC.0<1>:d ORIX_CUR<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F000F:ud { NoDDChk } // Block width and height (16x16)
// Set message descriptor, etc.
(f0.0) if (1) ILDB_LABEL(ELSE_Y_16x16T)
// Frame picture
mov (1) MSGDSC RESP_LEN(8)+DWBRMSGDSC_SC+BI_SRC_Y:ud // Read 8 GRFs from SRC_Y
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 16:w // Add vertical offset 16 for bot MB in MBAFF mode
ILDB_LABEL(ELSE_Y_16x16T):
else (1) ILDB_LABEL(ENDIF_Y_16x16T)
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(8)+DWBRMSGDSC_SC_BF+BI_SRC_Y:ud // Read 8 GRFs from SRC_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(8)+DWBRMSGDSC_SC_TF+BI_SRC_Y:ud // Read 8 GRFs from SRC_Y top field
asr (1) MSGSRC.1:d MSGSRC.1:d 1:w // Reduce y by half in field access mode
endif
ILDB_LABEL(ENDIF_Y_16x16T):
send (8) SRC_YD(0)<1> MSGHDRY MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// #include "Transpose_Cur_Y_16x16.asm"
// End of load_Y_16x16T

View File

@@ -0,0 +1,115 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_Cur_Y_Right_Most_4x16.asm
//
// Load luma cur MB right most 4x16 into LEFT_TEMP_B
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD0:w
#endif
#if defined(_PROGRESSIVE)
// Read Y
add (1) MSGSRC.0<1>:ud ORIX_CUR:w 12:w { NoDDClr } // Block origin, move right 12 bytes
mov (1) MSGSRC.1<1>:ud ORIY_CUR:w { NoDDClr, NoDDChk } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F0003:ud { NoDDChk } // Block width and height (4x16)
send (8) LEFT_TEMP_D(0)<1> MSGHDRL MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y
#endif
#if defined(_FIELD) || defined(_MBAFF)
// FieldModeCurrentMbFlag determines how to access left MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// Read Y
add (1) MSGSRC.0<1>:ud ORIX_CUR:w 12:w { NoDDClr } // Block origin, move right 12 bytes
mov (1) MSGSRC.1<1>:ud ORIY_CUR:w { NoDDClr, NoDDChk } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F0003:ud { NoDDChk } // Block width and height (4x16)
// Set message descriptor, etc.
(f0.0) if (1) ILDB_LABEL(ELSE_Y_4x16T)
// Frame picture
mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 16:w // Add vertical offset 16 for bot MB in MBAFF mode
ILDB_LABEL(ELSE_Y_4x16T):
else (1) ILDB_LABEL(ENDIF_Y_4x16T)
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y top field
endif
ILDB_LABEL(ENDIF_Y_4x16T):
// send (8) BUF_D(0)<1> MSGHDRY MSGSRC<8;8,1>:ud MSGDSC
send (8) LEFT_TEMP_D(0)<1> MSGHDRL MSGSRC<8;8,1>:ud DAPREAD MSGDSC
#endif
// Transpose 4x16 to 16x4
// Input received from dport:
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// Output of transpose: <1> <= <32;8,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
/*
// Transpose the data, also occupy 2 GRFs
mov (16) PREV_MB_YB(0)<1> BUF_B(0, 0)<32;8,4> { NoDDClr }
mov (16) PREV_MB_YB(0, 16)<1> BUF_B(0, 1)<32;8,4> { NoDDChk }
mov (16) PREV_MB_YB(1)<1> BUF_B(0, 2)<32;8,4> { NoDDClr }
mov (16) PREV_MB_YB(1, 16)<1> BUF_B(0, 3)<32;8,4> { NoDDChk }
*/
// End of load_Y_4x16T

View File

@@ -0,0 +1,106 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module Name: Load_Left_UV_2X8T.Asm
//
// Load UV 8X2 Block
//
//----------------------------------------------------------------
// Symbols ceed To be defined before including this module
//
// Source Region Is :UB
// BUF_D: BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD
// Binding Table Index:
// BI_SRC_UV: Binding Table Index Of UV Surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD0:w
#endif
#if defined(_PROGRESSIVE)
// Read U+V
mov (1) MSGSRC.0:ud ORIX_LEFT:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_LEFT:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x00070003:ud { NoDDChk } // NV12 U+V block width and height (4x8)
send (8) LEFT_TEMP_D(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV
#endif
#if defined(_FIELD) || defined(_MBAFF)
// FieldModeCurrentMbFlag determines how to access left MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// Read U+V
mov (1) MSGSRC.0:ud ORIX_LEFT:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_LEFT:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x00070003:ud { NoDDChk } // NV12 U+V block width and height (4x8)
// Load NV12 U+V
// Set message descriptor
(f0.0) if (1) ILDB_LABEL(ELSE_Y_2x8T)
// Frame picture
mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud // Read 1 GRF from SRC_UV
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 8:w // Add vertical offset 8 for bot MB in MBAFF mode
ILDB_LABEL(ELSE_Y_2x8T):
else (1) ILDB_LABEL(ENDIF_Y_2x8T)
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y top field
endif
ILDB_LABEL(ENDIF_Y_2x8T):
// Read 1 GRF from DEST surface as the above MB has been deblocked.
// send (8) BUF_D(0)<1> MSGHDRU MSGSRC<8;8,1>:ud MSGDSC
send (8) LEFT_TEMP_D(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD MSGDSC
#endif
// End of load_Left_UV_2x8T.asm

View File

@@ -0,0 +1,109 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module Name: Load_Left_UV_2X8T.Asm
//
// Load UV 8X2 Block
//
//----------------------------------------------------------------
// Symbols ceed To be defined before including this module
//
// Source Region Is :UB
// BUF_D: BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD
// Binding Table Index:
// BI_SRC_UV: Binding Table Index Of UV Surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD0:w
#endif
// FieldModeCurrentMbFlag determines how to access left MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// Read U+V
mov (1) MSGSRC.0:ud ORIX_LEFT:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_LEFT:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x00070003:ud { NoDDChk } // NV12 U+V block width and height (4x8)
// Load NV12 U+V
// Set message descriptor
(f0.0) if (1) ILDB_LABEL(ELSE_Y_2x8T)
// Frame picture
mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud // Read 1 GRF from SRC_UV
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 8:w // Add vertical offset 8 for bot MB in MBAFF mode
ILDB_LABEL(ELSE_Y_2x8T):
else (1) ILDB_LABEL(ENDIF_Y_2x8T)
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y top field
asr (1) MSGSRC.1:d MSGSRC.1:d 1:w // Reduce y by half in field access mode
endif
ILDB_LABEL(ENDIF_Y_2x8T):
// Read 1 GRF from DEST surface as the above MB has been deblocked.
// send (8) BUF_D(0)<1> MSGHDRU MSGSRC<8;8,1>:ud MSGDSC
send (8) LEFT_TEMP_D(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// Input from dport for transpose:
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// Output of transpose: <1> <=== <16;8,2>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
/*
mov (8) PREV_MB_UW(0,0)<1> BUF_W(0,0)<16;8,2> { NoDDClr }
mov (8) PREV_MB_UW(0,8)<1> BUF_W(0,1)<16;8,2> { NoDDChk }
*/
// End of load_Left_UV_2x8T.asm

View File

@@ -0,0 +1,126 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_Y_4x16T.asm
//
// Load luma left MB 4x16 and transpose 4x16 to 16x4.
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// PREV_MB_YD: PREV_MB_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 2 GRFs
//
// Binding table index:
// BI_SRC_Y: Binding table index of Y surface
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD0:w
#endif
#if defined(_PROGRESSIVE)
// Read Y
mov (2) MSGSRC.0<1>:ud ORIX_LEFT<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F0003:ud { NoDDChk } // Block width and height (4x16)
// mov (1) MSGDSC DWBRMSGDSC_RC+0x00020000+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y
send (8) LEFT_TEMP_D(0)<1> MSGHDRL MSGSRC<8;8,1>:ud DAPREAD RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y
#endif
#if defined(_FIELD) || defined(_MBAFF)
// FieldModeCurrentMbFlag determines how to access left MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// Read Y
mov (2) MSGSRC.0<1>:ud ORIX_LEFT<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F0003:ud { NoDDChk } // Block width and height (4x16)
// Set message descriptor, etc.
(f0.0) if (1) ILDB_LABEL(ELSE_Y_4x16T)
// Frame picture
mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 16:w // Add vertical offset 16 for bot MB in MBAFF mode
ILDB_LABEL(ELSE_Y_4x16T):
else (1) ILDB_LABEL(ENDIF_Y_4x16T)
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y top field
endif
ILDB_LABEL(ENDIF_Y_4x16T):
// send (8) BUF_D(0)<1> MSGHDRY MSGSRC<8;8,1>:ud MSGDSC
send (8) LEFT_TEMP_D(0)<1> MSGHDRL MSGSRC<8;8,1>:ud DAPREAD MSGDSC
#endif
// Transpose 4x16 to 16x4
// Input received from dport:
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// Output of transpose: <1> <= <32;8,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
/*
// Transpose the data, also occupy 2 GRFs
mov (16) PREV_MB_YB(0)<1> BUF_B(0, 0)<32;8,4> { NoDDClr }
mov (16) PREV_MB_YB(0, 16)<1> BUF_B(0, 1)<32;8,4> { NoDDChk }
mov (16) PREV_MB_YB(1)<1> BUF_B(0, 2)<32;8,4> { NoDDClr }
mov (16) PREV_MB_YB(1, 16)<1> BUF_B(0, 3)<32;8,4> { NoDDChk }
*/
// End of load_Y_4x16T

View File

@@ -0,0 +1,114 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: load_Y_4x16T.asm
//
// Load luma left MB 4x16 and transpose 4x16 to 16x4.
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// PREV_MB_YD: PREV_MB_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 2 GRFs
//
// Binding table index:
// BI_SRC_Y: Binding table index of Y surface
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD0:w
#endif
// FieldModeCurrentMbFlag determines how to access left MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// Read Y
mov (2) MSGSRC.0<1>:ud ORIX_LEFT<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F0003:ud { NoDDChk } // Block width and height (4x16)
// Set message descriptor, etc.
(f0.0) if (1) ELSE_Y_4x16T
// Frame picture
mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 16:w // Add vertical offset 16 for bot MB in MBAFF mode
ELSE_Y_4x16T:
else (1) ENDIF_Y_4x16T
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud // Read 2 GRFs from DEST_Y top field
asr (1) MSGSRC.1:d MSGSRC.1:d 1:w // Reduce y by half in field access mode
endif
ENDIF_Y_4x16T:
// send (8) BUF_D(0)<1> MSGHDRY MSGSRC<8;8,1>:ud MSGDSC
send (8) LEFT_TEMP_D(0)<1> MSGHDRL MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// Transpose 4x16 to 16x4
// Input received from dport:
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// Output of transpose: <1> <= <32;8,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
/*
// Transpose the data, also occupy 2 GRFs
mov (16) PREV_MB_YB(0)<1> BUF_B(0, 0)<32;8,4> { NoDDClr }
mov (16) PREV_MB_YB(0, 16)<1> BUF_B(0, 1)<32;8,4> { NoDDChk }
mov (16) PREV_MB_YB(1)<1> BUF_B(0, 2)<32;8,4> { NoDDClr }
mov (16) PREV_MB_YB(1, 16)<1> BUF_B(0, 3)<32;8,4> { NoDDChk }
*/
// End of load_Y_4x16T

View File

@@ -0,0 +1,100 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module Name: Load_Top_UV_8X2.Asm
//
// Load UV 8X2 Block
//
//----------------------------------------------------------------
// Symbols ceed To be defined before including this module
//
// Source Region Is :UB
// BUF_D: BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD
// Binding Table Index:
// BI_SRC_UV: Binding Table Index Of UV Surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD2:w
#endif
#if defined(_PROGRESSIVE)
// Read U+V
mov (1) MSGSRC.0:ud ORIX_TOP:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_TOP:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0001000F:ud { NoDDChk } // NV12 U+V block width and height (16x2)
// Read 1 GRF from DEST surface as the above MB has been deblocked.
//send (8) TOP_MB_UD(0)<1> MSGHDRU MSGSRC<8;8,1>:ud RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV
mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud
#endif
#if defined(_FIELD)
// cmp.z.f0.0 (1) NULLREGW PicTypeC:w 0:w // Get pic type flag
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// They are used later in this file
// Read U+V
mov (1) MSGSRC.0:ud ORIX_TOP:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_TOP:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0001000F:ud { NoDDChk } // NV12 U+V block width and height (16x2)
// Load NV12 U+V
// Set message descriptor
// Frame picture
// (f0.0) mov (1) MSGDSC DWBRMSGDSC_RC+0x00010000+BI_DEST_UV:ud // Read 1 GRF from SRC_UV
// (f0.0) jmpi Load_Top_UV_8x2
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y top field
//Load_Top_UV_8x2:
// Read 1 GRF from DEST surface as the above MB has been deblocked.
// send (8) PREV_MB_UD(0)<1> MSGHDRU MSGSRC<8;8,1>:ud MSGDSC
#endif
send (8) TOP_MB_UD(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// End of load_Top_UV_8x2.asm

View File

@@ -0,0 +1,109 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module Name: Load_Top_UV_8X2.Asm
//
// Load UV 8X2 Block
//
//----------------------------------------------------------------
// Symbols ceed To be defined before including this module
//
// Source Region Is :UB
// BUF_D: BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD
// Binding Table Index:
// BI_SRC_UV: Binding Table Index Of UV Surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD2:w
#endif
// FieldModeCurrentMbFlag determines how to access above MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w
// Read U+V
mov (1) MSGSRC.0:ud ORIX_TOP:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:d ORIY_TOP:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0001000F:ud { NoDDChk } // NV12 U+V block width and height (16x2)
// Load NV12 U+V
// Set message descriptor
(f0.0) if (1) ELSE_UV_8X2
// Frame picture
mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud // Read 1 GRF from SRC_UV
// Add vertical offset 8 for bot MB in MBAFF mode
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 8:w
// Dual field mode setup
and.z.f0.1 (1) NULLREGW DualFieldMode:w 1:w
(f0.1) jmpi NOT_DUAL_FIELD_UV
add (1) MSGSRC.1:d MSGSRC.1:d -2:w { NoDDClr } // Load 4 lines in stead of 2
mov (1) MSGSRC.2:ud 0x0003000F:ud { NoDDChk } // New block width and height (16x8)
add (1) MSGDSC MSGDSC RESP_LEN(1):ud // 1 more GRF to receive
NOT_DUAL_FIELD_UV:
ELSE_UV_8X2:
else (1) ENDIF_UV_8X2
// Field picture
asr (1) MSGSRC.1:d ORIY_CUR:w 2:w // asr 1: NV12 U+V block origin y = half of Y comp
// asr 1: Reduce y by half in field access mode
(f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud // Read 1 GRF from SRC_Y top field
add (1) MSGSRC.1:d MSGSRC.1:d -2:w // for last 2 rows of above MB
endif
ENDIF_UV_8X2:
// Read 1 GRF from DEST surface as the above MB has been deblocked.
send (8) PREV_MB_UD(0)<1> MSGHDRU MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// End of load_Top_UV_8x2.asm

View File

@@ -0,0 +1,100 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module Name: Load_Y_16X4.asm
//
// Load Y 16X4 Block to PREV_MB_YD
//
//----------------------------------------------------------------
// Symbols Need To Be Defined Before Including This Module
//
// Source Region In :Ud
// Src_YD: Src_Yd Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud // 3 Grfs (2 For Y, 1 For U+V)
//
// Source Region Is :Ub. The Same Region As :Ud Region
// Src_YB: Src_Yb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub // 2 Grfs
//
// Binding Table Index:
// Bi_Src_Y: Binding Table Index Of Y Surface
//
// Temp Buffer:
// Buf_D: Buf_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud
// Buf_B: Buf_B Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD2:w
#endif
#if defined(_PROGRESSIVE)
// Read Y
mov (2) MSGSRC.0<1>:ud ORIX_TOP<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x0003000F:ud { NoDDChk } // Block width and height (16x4)
mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud // Read 2 GRFs from SRC_Y
#endif
#if defined(_FIELD)
// cmp.z.f0.0 (1) NULLREGW PicTypeC:w 0:w // Get pic type flag
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
// they are used later in this file
mov (2) MSGSRC.0<1>:ud ORIX_TOP<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x0003000F:ud { NoDDChk } // Block width and height (16x4)
// Set message descriptor
// Frame picture
// (f0.0) mov (1) MSGDSC DWBRMSGDSC_RC+0x00020000+BI_DEST_Y:ud // Read 2 GRFs from SRC_Y
// (f0.0) jmpi load_Y_16x4
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud // Read 2 GRFs from SRC_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud // Read 2 GRFs from SRC_Y top field
//load_Y_16x4:
// Read 2 GRFs from DEST surface, as the above MB has been deblocked
// send (8) PREV_MB_YD(0)<1> MSGHDRY MSGSRC<8;8,1>:ud MSGDSC
#endif
send (8) TOP_MB_YD(0)<1> MSGHDRT MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// End of load_Y_16x4.asm

View File

@@ -0,0 +1,111 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module Name: Load_Y_16X4.asm
//
// Load Y 16X4 Block to PREV_MB_YD
//
//----------------------------------------------------------------
// Symbols Need To Be Defined Before Including This Module
//
// Source Region In :Ud
// Src_YD: Src_Yd Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud // 3 Grfs (2 For Y, 1 For U+V)
//
// Source Region Is :Ub. The Same Region As :Ud Region
// Src_YB: Src_Yb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub // 2 Grfs
//
// Binding Table Index:
// Bi_Src_Y: Binding Table Index Of Y Surface
//
// Temp Buffer:
// Buf_D: Buf_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud
// Buf_B: Buf_B Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD2:w
#endif
// FieldModeCurrentMbFlag determines how to access above MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w
// Read Y
mov (2) MSGSRC.0<1>:ud ORIX_TOP<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x0003000F:ud { NoDDChk } // Block width and height (16x4)
// Set message descriptor
(f0.0) if (1) ELSE_Y_16x4
// Frame picture
mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud // Read 2 GRFs from SRC_Y
// Add vertical offset 16 for bot MB in MBAFF mode
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 16:w
// Dual field mode setup
and.z.f0.1 (1) NULLREGW DualFieldMode:w 1:w
(f0.1) jmpi NOT_DUAL_FIELD
add (1) MSGSRC.1:d MSGSRC.1:d -4:w { NoDDClr } // Load 8 lines in above MB
mov (1) MSGSRC.2:ud 0x0007000F:ud { NoDDChk } // New block width and height (16x8)
add (1) MSGDSC MSGDSC RESP_LEN(2):ud // 2 more GRF to receive
NOT_DUAL_FIELD:
ELSE_Y_16x4:
else (1) ENDIF_Y_16x4
asr (1) MSGSRC.1:d ORIY_CUR:w 1:w // Reduce y by half in field access mode
// Field picture
(f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud // Read 2 GRFs from SRC_Y bottom field
(-f0.1) mov (1) MSGDSC RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud // Read 2 GRFs from SRC_Y top field
add (1) MSGSRC.1:d MSGSRC.1:d -4:w // for last 4 rows of above MB
endif
ENDIF_Y_16x4:
// Read 2 GRFs from DEST surface, as the above MB has been deblocked
send (8) PREV_MB_YD(0)<1> MSGHDRY MSGSRC<8;8,1>:ud DAPREAD MSGDSC
// End of load_Y_16x4.asm

View File

@@ -0,0 +1,83 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: saveNV12_16x16.asm
//
// Save a NV12 16x16 block
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_YD: SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 8 GRFs
// SRC_UD: SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 4 GRF
//
// Binding table index:
// BI_DEST_Y: Binding table index of Y surface
// BI_DEST_UV: Binding table index of UV surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD4:w
#endif
mov (2) MSGSRC.0<1>:ud ORIX_CUR<2;2,1>:w // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F000F:ud // Block width and height (16x16)
// Pack Y
mov (16) MSGPAYLOADD(0)<1> SRC_YD(0) // Compressed inst
mov (16) MSGPAYLOADD(2)<1> SRC_YD(2)
mov (16) MSGPAYLOADD(4)<1> SRC_YD(4)
mov (16) MSGPAYLOADD(6)<1> SRC_YD(6)
send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSG_LEN(8)+DWBWMSGDSC+BI_DEST_Y // Write 8 GRFs
asr (1) MSGSRC.1:ud MSGSRC.1:ud 1:w // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2<1>:ud 0x0007000F:ud // NV12 U+V block width and height (16x8)
mov (16) MSGPAYLOADD(0)<1> SRC_UD(0) // Compressed inst
mov (16) MSGPAYLOADD(2)<1> SRC_UD(2)
send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV // Write 4 GRFs
// End of saveNV12_16x16.asm

View File

@@ -0,0 +1,80 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: saveNV12_16x4.asm
//
// Save a NV12 16x4 block
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_YD: SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 2 GRFs
// SRC_UD: SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 1 GRF
//
// Binding table index:
// BI_DEST_Y: Binding table index of Y surface
// BI_DEST_UV: Binding table index of UV surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD5:w
#endif
mov (2) MSGSRC.0<1>:ud ORIX_TOP<2;2,1>:w // Block origin
mov (1) MSGSRC.2<1>:ud 0x0003000F:ud // Block width and height (16x4)
// Pack Y
mov (16) MSGPAYLOADD(0)<1> SRC_YD(0) // Compressed inst
send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y // Write 2 GRFs
asr (1) MSGSRC.1:ud MSGSRC.1:ud 1:w // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2<1>:ud 0x0001000F:ud // NV12 U+V block width and height (16x2)
// Pack U and V
// mov (16) MSGPAYLOADB(0,0)<2> SRC_UB(0,0)
// mov (16) MSGPAYLOADB(0,1)<2> SRC_VB(0,0)
mov (8) MSGPAYLOADD(0,0)<1> SRC_UD(0)
send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV // Write 1 GRF
// End of saveNV12_16x4.asm

View File

@@ -0,0 +1,143 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: saveNV12_16x4T.asm
//
// Transpose 16x4 to 4x16 YNV12 data and write to memory
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Left MB region:
// PREV_MB_YB: Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
// PREV_MB_UW: Base=ryy ElementSize=2 SrcRegion=REGION(8,1) Type=uw
// Binding table index:
// BI_SRC_Y: Binding table index of Y surface
// BI_SRC_UV: Binding table index of UV surface (NV12)
//
// Temp buffer:
// BUF_B: BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
// BUF_W: BUF_W Base=rxx ElementSize=1 SrcRegion=REGION(8,1) Type=uw
//
//
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD6:w
#endif
mov (2) MSGSRC.0<1>:ud ORIX_LEFT<2;2,1>:w // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F0003:ud // 4x16
// Transpose Y, save them to MRFs
// 16x4 Y src in GRF (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// First step (16) <1> <=== <16;4,1>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// The first step
mov (16) BUF_B(0,0)<1> PREV_MB_YB(0,0)<16;4,1>
mov (16) BUF_B(0,16)<1> PREV_MB_YB(0,4)<16;4,1>
mov (16) BUF_B(1,0)<1> PREV_MB_YB(0,8)<16;4,1>
mov (16) BUF_B(1,16)<1> PREV_MB_YB(0,12)<16;4,1>
//
// Second step (16) <1> <=== <1;4,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// The second step
// mov (16) MSGPAYLOADB(0,0)<1> BUF_B(0,0)<32;8,4> // Read 2 rows, write 1 row
// mov (16) MSGPAYLOADB(0,16)<1> BUF_B(0,1)<32;8,4>
// mov (16) MSGPAYLOADB(1,0)<1> BUF_B(0,2)<32;8,4>
// mov (16) MSGPAYLOADB(1,16)<1> BUF_B(0,3)<32;8,4>
mov (16) MSGPAYLOADB(0,0)<1> BUF_B(0,0)<1;4,4>
mov (16) MSGPAYLOADB(0,16)<1> BUF_B(0,16)<1;4,4>
mov (16) MSGPAYLOADB(1,0)<1> BUF_B(1,0)<1;4,4>
mov (16) MSGPAYLOADB(1,16)<1> BUF_B(1,16)<1;4,4>
// Transposed Y in 4x16 is ready for writting to dataport.
//
send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y // Write 2 GRFs
/////////////////////////////////////////////////////////////////////////////////////////////////////
// Transpose U/V, save them to MRFs in NV12 format
asr (1) MSGSRC.1:ud MSGSRC.1:ud 1:w // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2<1>:ud 0x00070003:ud // NV12 U+V block width and height (4x8)
// 16x2 UV src in GRF (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// First step (8) <1> <=== <8;4,1>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 70 70 60 60 50 50 40 40 31 31 21 21 11 11 01 01 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (8) BUF_W(0,0)<1> PREV_MB_UW(0,0)<8;4,1>
mov (8) BUF_W(0,8)<1> PREV_MB_UW(0,4)<8;4,1>
// Second step (8) <1> <=== <1;2,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (8) MSGPAYLOADW(0,0)<1> BUF_W(0,0)<1;2,4>
mov (8) MSGPAYLOADW(0,8)<1> BUF_W(0,8)<1;2,4>
// Transposed U+V in NV12 in 4x8 is ready for writting to dataport.
send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV // Write 1 GRF

View File

@@ -0,0 +1,83 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Cur_UV_8x8.asm
//
// Save UV 8x8 block (8x8U + 8x8V in NV12)
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_UD: SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 4 GRF
//
// Binding table index:
// BI_DEST_UV: Binding table index of UV surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD4:w
#endif
#if defined(_FIELD)
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
#endif
mov (1) MSGSRC.0:ud ORIX_CUR:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_CUR:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0007000F:ud { NoDDChk } // NV12 U+V block width and height (16x8)
mov (16) MSGPAYLOADD(0)<1> SRC_UD(0) // Compressed inst
mov (16) MSGPAYLOADD(2)<1> SRC_UD(2)
#if defined(_PROGRESSIVE)
mov (1) MSGDSC MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV:ud
// send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DWBWMSGDSC+0x00400000+BI_DEST_UV
#endif
#if defined(_FIELD)
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud // Write 4 GRFs to DEST_UV bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud // Write 4 GRFs to DEST_UV top field
#endif
send (8) null:ud MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC
// End of save_Cur_UV_8x8.asm

View File

@@ -0,0 +1,92 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Cur_UV_8x8.asm
//
// Save UV 8x8 block (8x8U + 8x8V in NV12)
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_UD: SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 4 GRF
//
// Binding table index:
// BI_DEST_UV: Binding table index of UV surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD4:w
#endif
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w
mov (1) MSGSRC.0:ud ORIX_CUR:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_CUR:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0007000F:ud { NoDDChk } // NV12 U+V block width and height (16x8)
mov (16) MSGPAYLOADD(0)<1> SRC_UD(0) // Compressed inst
mov (16) MSGPAYLOADD(2)<1> SRC_UD(2)
// Set message descriptor
(f0.0) if (1) ELSE_UV_8X8
// Frame picture
mov (1) MSGDSC MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV:ud // Write 4 GRFs to DEST_UV
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 8:w // Add vertical offset 8 for bot MB in MBAFF mode
ELSE_UV_8X8:
else (1) ENDIF_UV_8X8
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud // Write 4 GRFs to DEST_UV bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud // Write 4 GRFs to DEST_UV top field
asr (1) MSGSRC.1:d MSGSRC.1:d 1:w // Reduce y by half in field access mode
endif
ENDIF_UV_8X8:
send (8) null:ud MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC
// End of save_Cur_UV_8x8.asm

View File

@@ -0,0 +1,86 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Cur_Y_16x16.asm
//
// Save a Y 16x16 block
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_YD: SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 8 GRFs
//
// Binding table index:
// BI_DEST_Y: Binding table index of Y surface
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD4:w
#endif
#if defined(_FIELD)
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
#endif
mov (2) MSGSRC.0<1>:ud ORIX_CUR<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F000F:ud { NoDDChk } // Block width and height (16x16)
// Pack Y
mov (16) MSGPAYLOADD(0)<1> SRC_YD(0) // Compressed inst
mov (16) MSGPAYLOADD(2)<1> SRC_YD(2)
mov (16) MSGPAYLOADD(4)<1> SRC_YD(4)
mov (16) MSGPAYLOADD(6)<1> SRC_YD(6)
#if defined(_PROGRESSIVE)
mov (1) MSGDSC MSG_LEN(8)+DWBWMSGDSC+BI_DEST_Y:ud
// send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DWBWMSGDSC+0x00800000+BI_DEST_Y
#endif
#if defined(_FIELD)
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud // Write 8 GRFs to DEST_Y bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud // Write 8 GRFs to DEST_Y top field
#endif
send (8) null:ud MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC
// End of save_Cur_Y_16x16.asm

View File

@@ -0,0 +1,94 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Cur_Y_16x16.asm
//
// Save a Y 16x16 block
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_YD: SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 8 GRFs
//
// Binding table index:
// BI_DEST_Y: Binding table index of Y surface
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD4:w
#endif
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w
mov (2) MSGSRC.0<1>:ud ORIX_CUR<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F000F:ud { NoDDChk } // Block width and height (16x16 or 12x16)
// Pack Y
mov (16) MSGPAYLOADD(0)<1> SRC_YD(0) // Compressed inst
mov (16) MSGPAYLOADD(2)<1> SRC_YD(2)
mov (16) MSGPAYLOADD(4)<1> SRC_YD(4)
mov (16) MSGPAYLOADD(6)<1> SRC_YD(6)
// Set message descriptor
(f0.0) if (1) ELSE_Y_16x16
// Frame picture
mov (1) MSGDSC MSG_LEN(8)+DWBWMSGDSC+BI_DEST_Y:ud // Write 8 GRFs to DEST_Y
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 16:w // Add vertical offset 16 for bot MB in MBAFF mode
ELSE_Y_16x16:
else (1) ENDIF_Y_16x16
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud // Write 8 GRFs to DEST_Y bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud // Write 8 GRFs to DEST_Y top field
asr (1) MSGSRC.1:d MSGSRC.1:d 1:w // Reduce y by half in field access mode
endif
ENDIF_Y_16x16:
send (8) null:ud MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC
// End of save_Cur_Y_16x16.asm

View File

@@ -0,0 +1,102 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Left_UV_8x2T.asm
//
// Transpose 8x2 to 2x8 UV data and write to memory
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Left MB region:
// PREV_MB_UW: Base=ryy ElementSize=2 SrcRegion=REGION(8,1) Type=uw
// Binding table index:
// BI_SRC_UV: Binding table index of UV surface (NV12)
//
// Temp buffer:
// BUF_W: BUF_W Base=rxx ElementSize=1 SrcRegion=REGION(8,1) Type=uw
//
//
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD6:w
#endif
#if defined(_FIELD)
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
#endif
// Transpose U/V, save them to MRFs in NV12 format
mov (1) MSGSRC.0:ud ORIX_LEFT:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_LEFT:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x00070003:ud { NoDDChk } // NV12 U+V block width and height (4x8)
// 16x2 UV src in GRF (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// First step (8) <1> <=== <8;4,1>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 70 70 60 60 50 50 40 40 31 31 21 21 11 11 01 01 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (8) LEFT_TEMP_W(0,0)<1> PREV_MB_UW(0,0)<8;4,1> { NoDDClr }
mov (8) LEFT_TEMP_W(0,8)<1> PREV_MB_UW(0,4)<8;4,1> { NoDDChk }
// Second step (8) <1> <=== <1;2,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (8) MSGPAYLOADW(0,0)<1> LEFT_TEMP_W(0,0)<1;2,4>
mov (8) MSGPAYLOADW(0,8)<1> LEFT_TEMP_W(0,8)<1;2,4>
// Transposed U+V in NV12 in 4x8 is ready for writting to dataport.
#if defined(_PROGRESSIVE)
mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV:ud
// send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DWBWMSGDSC+0x00100000+BI_DEST_UV
#endif
#if defined(_FIELD)
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud // Write 1 GRF to DEST_UV bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud // Write 1 GRF to DEST_UV top field
#endif
send (8) null:ud MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC

View File

@@ -0,0 +1,112 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Left_UV_8x2T.asm
//
// Transpose 8x2 to 2x8 UV data and write to memory
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Left MB region:
// PREV_MB_UW: Base=ryy ElementSize=2 SrcRegion=REGION(8,1) Type=uw
// Binding table index:
// BI_SRC_UV: Binding table index of UV surface (NV12)
//
// Temp buffer:
// BUF_W: BUF_W Base=rxx ElementSize=1 SrcRegion=REGION(8,1) Type=uw
//
//
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD6:w
#endif
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w
// Transpose U/V, save them to MRFs in NV12 format
mov (1) MSGSRC.0:ud ORIX_LEFT:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_LEFT:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x00070003:ud { NoDDChk } // NV12 U+V block width and height (4x8)
// 16x2 UV src in GRF (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// First step (8) <1> <=== <8;4,1>:w
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 61 61 51 51 41 41 70 70 60 60 50 50 40 40 31 31 21 21 11 11 01 01 30 30 20 20 10 10 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (8) BUF_W(0,0)<1> PREV_MB_UW(0,0)<8;4,1> { NoDDClr }
mov (8) BUF_W(0,8)<1> PREV_MB_UW(0,4)<8;4,1> { NoDDChk }
// Second step (8) <1> <=== <1;2,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
mov (8) MSGPAYLOADW(0,0)<1> BUF_W(0,0)<1;2,4>
mov (8) MSGPAYLOADW(0,8)<1> BUF_W(0,8)<1;2,4>
// Transposed U+V in NV12 in 4x8 is ready for writting to dataport.
// Set message descriptor
(f0.0) if (1) ELSE_UV_8X2T
// Frame picture
mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV:ud // Write 1 GRF to DEST_UV
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 8:w // Add vertical offset 8 for bot MB in MBAFF mode
ELSE_UV_8X2T:
else (1) ENDIF_UV_8X2T
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud // Write 1 GRF to DEST_UV bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud // Write 1 GRF to DEST_UV top field
asr (1) MSGSRC.1:d MSGSRC.1:d 1:w // Reduce y by half in field access mode
endif
ENDIF_UV_8X2T:
send (8) null:ud MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC

View File

@@ -0,0 +1,119 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Left_Y_16x4T.asm
//
// Transpose 16x4 to 4x16 Y data and write to memory
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Left MB region:
// PREV_MB_YB: Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
// Binding table index:
// BI_SRC_Y: Binding table index of Y surface
//
// Temp buffer:
// BUF_B: BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
//
//
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD6:w
#endif
#if defined(_FIELD)
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
#endif
mov (2) MSGSRC.0<1>:ud ORIX_LEFT<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F0003:ud { NoDDChk } // 4x16
// Transpose Y, save them to MRFs
// 16x4 Y src in GRF (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// First step (16) <1> <=== <16;4,1>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// The first step
mov (16) LEFT_TEMP_B(0,0)<1> PREV_MB_YB(0,0)<16;4,1> { NoDDClr }
mov (16) LEFT_TEMP_B(0,16)<1> PREV_MB_YB(0,4)<16;4,1> { NoDDChk }
mov (16) LEFT_TEMP_B(1,0)<1> PREV_MB_YB(0,8)<16;4,1> { NoDDClr }
mov (16) LEFT_TEMP_B(1,16)<1> PREV_MB_YB(0,12)<16;4,1> { NoDDChk }
//
// Second step (16) <1> <=== <1;4,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// The second step
mov (16) MSGPAYLOADB(0,0)<1> LEFT_TEMP_B(0,0)<1;4,4>
mov (16) MSGPAYLOADB(0,16)<1> LEFT_TEMP_B(0,16)<1;4,4>
mov (16) MSGPAYLOADB(1,0)<1> LEFT_TEMP_B(1,0)<1;4,4>
mov (16) MSGPAYLOADB(1,16)<1> LEFT_TEMP_B(1,16)<1;4,4>
// Transposed Y in 4x16 is ready for writting to dataport.
#if defined(_PROGRESSIVE)
mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y:ud
// send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DWBWMSGDSC+0x00200000+BI_DEST_Y
#endif
#if defined(_FIELD)
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y top field
#endif
send (8) null:ud MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC

View File

@@ -0,0 +1,131 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Left_Y_16x4T.asm
//
// Transpose 16x4 to 4x16 Y data and write to memory
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Left MB region:
// PREV_MB_YB: Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
// Binding table index:
// BI_SRC_Y: Binding table index of Y surface
//
// Temp buffer:
// BUF_B: BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
//
//
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD6:w
#endif
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w
mov (2) MSGSRC.0<1>:ud ORIX_LEFT<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x000F0003:ud { NoDDChk } // 4x16
// Transpose Y, save them to MRFs
// 16x4 Y src in GRF (each pix is specified as yx)
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// First step (16) <1> <=== <16;4,1>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// The first step
mov (16) BUF_B(0,0)<1> PREV_MB_YB(0,0)<16;4,1> { NoDDClr }
mov (16) BUF_B(0,16)<1> PREV_MB_YB(0,4)<16;4,1> { NoDDChk }
mov (16) BUF_B(1,0)<1> PREV_MB_YB(0,8)<16;4,1> { NoDDClr }
mov (16) BUF_B(1,16)<1> PREV_MB_YB(0,12)<16;4,1> { NoDDChk }
//
// Second step (16) <1> <=== <1;4,4>
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
// +-----------------------+-----------------------+-----------------------+-----------------------+
// |f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
// +-----------------------+-----------------------+-----------------------+-----------------------+
//
// The second step
mov (16) MSGPAYLOADB(0,0)<1> BUF_B(0,0)<1;4,4>
mov (16) MSGPAYLOADB(0,16)<1> BUF_B(0,16)<1;4,4>
mov (16) MSGPAYLOADB(1,0)<1> BUF_B(1,0)<1;4,4>
mov (16) MSGPAYLOADB(1,16)<1> BUF_B(1,16)<1;4,4>
// Transposed Y in 4x16 is ready for writting to dataport.
//***** Left MB is loaded the same as indicated by FieldModeCurrentMbFlag.
// Set message descriptor
(f0.0) if (1) ELSE_Y_16x4T
// Frame picture
mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 16:w // Add vertical offset 16 for bot MB in MBAFF mode
ELSE_Y_16x4T:
else (1) ENDIF_Y_16x4T
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y top field
asr (1) MSGSRC.1:d MSGSRC.1:d 1:w // Reduce y by half in field access mode
endif
ENDIF_Y_16x4T:
send (8) null:ud MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC

View File

@@ -0,0 +1,82 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Top_UV_8x2.asm
//
// Save UV 8x2 block (8x2U + 8x2V in NV12)
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_UD: SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 1 GRF
//
// Binding table index:
// BI_DEST_UV: Binding table index of UV surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD5:w
#endif
#if defined(_FIELD)
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
#endif
mov (1) MSGSRC.0:ud ORIX_TOP:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_TOP:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0001000F:ud { NoDDChk } // NV12 U+V block width and height (16x2)
mov (8) MSGPAYLOADD(0,0)<1> TOP_MB_UD(0)
#if defined(_PROGRESSIVE)
mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC_WC+BI_DEST_UV:ud
// send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DWBWMSGDSC+0x00100000+BI_DEST_UV
#endif
#if defined(_FIELD)
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_UV:ud // Write 1 GRF to DEST_Y bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_UV:ud // Write 1 GRF to DEST_Y top field
#endif
send (8) WritebackResponse(0)<1> MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC
// End of save_Top_UV_8x2.asm

View File

@@ -0,0 +1,99 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Top_UV_8x2.asm
//
// Save UV 8x2 block (8x2U + 8x2V in NV12)
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_UD: SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 1 GRF
//
// Binding table index:
// BI_DEST_UV: Binding table index of UV surface (NV12)
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD5:w
#endif
and.z.f0.1 (8) NULLREGW DualFieldMode<0;1,0>:w 1:w
// FieldModeCurrentMbFlag determines how to access above MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
// Pack U and V
mov (1) MSGSRC.0:ud ORIX_TOP:w { NoDDClr } // Block origin
asr (1) MSGSRC.1:ud ORIY_TOP:w 1:w { NoDDClr, NoDDChk } // NV12 U+V block origin y = half of Y comp
mov (1) MSGSRC.2:ud 0x0001000F:ud { NoDDChk } // NV12 U+V block width and height (16x2)
// Dual field mode
(f0.1) mov (8) MSGPAYLOADD(0)<1> PREV_MB_UD(0)
(-f0.1) mov (8) MSGPAYLOADD(0)<1> PREV_MB_UD(1) // for dual field mode, write last 2 rows
// Set message descriptor
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w
(f0.0) if (1) ELSE_UV_8X2_SAVE
// Frame picture
mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC_WC+BI_DEST_UV:ud // Write 1 GRFs to DEST_UV
// Add vertical offset 8 for bot MB in MBAFF mode
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 8:w
ELSE_UV_8X2_SAVE:
else (1) ENDIF_UV_8X2_SAVE
asr (1) MSGSRC.1:d ORIY_CUR:w 2:w // asr 1: NV12 U+V block origin y = half of Y comp
// asr 1: Reduce y by half in field access mode
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_UV:ud // Write 1 GRF to DEST_Y bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_UV:ud // Write 1 GRF to DEST_Y top field
add (1) MSGSRC.1:d MSGSRC.1:d -2:w // for last 4 rows of above MB
endif
ENDIF_UV_8X2_SAVE:
send (8) WritebackResponse(0)<1> MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC
// End of save_Top_UV_8x2.asm

View File

@@ -0,0 +1,82 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Top_Y_16x4.asm
//
// Save a Y 16x4 block
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_YD: SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 2 GRFs
//
// Binding table index:
// BI_DEST_Y: Binding table index of Y surface
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD5:w
#endif
#if defined(_FIELD)
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w // Get bottom field flag
#endif
mov (2) MSGSRC.0<1>:ud ORIX_TOP<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x0003000F:ud { NoDDChk } // Block width and height (16x4)
// Pack Y
mov (16) MSGPAYLOADD(0)<1> TOP_MB_YD(0) // Compressed inst
#if defined(_PROGRESSIVE)
mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC_WC+BI_DEST_Y:ud
// send (8) NULLREG MSGHDR MSGSRC<8;8,1>:ud DWBWMSGDSC+0x00200000+BI_DEST_Y
#endif
#if defined(_FIELD)
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y top field
#endif
send (8) WritebackResponse(0)<1> MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC
// End of save_Top_Y_16x4.asm

View File

@@ -0,0 +1,99 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: save_Top_Y_16x4.asm
//
// Save a Y 16x4 block
//
//----------------------------------------------------------------
// Symbols need to be defined before including this module
//
// Source region in :ud
// SRC_YD: SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 2 GRFs
//
// Binding table index:
// BI_DEST_Y: Binding table index of Y surface
//
//----------------------------------------------------------------
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xDDD5:w
#endif
and.z.f0.1 (16) NULLREGW DualFieldMode<0;1,0>:w 1:w
// FieldModeCurrentMbFlag determines how to access above MB
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FieldModeCurrentMbFlag:w
mov (2) MSGSRC.0<1>:ud ORIX_TOP<2;2,1>:w { NoDDClr } // Block origin
mov (1) MSGSRC.2<1>:ud 0x0003000F:ud { NoDDChk } // Block width and height (16x4)
// Pack Y
// Dual field mode
(f0.1) mov (16) MSGPAYLOADD(0)<1> PREV_MB_YD(0) // Compressed inst
(-f0.1) mov (16) MSGPAYLOADD(0)<1> PREV_MB_YD(2) // for dual field mode, write last 4 rows
// Set message descriptor
and.nz.f0.1 (1) NULLREGW BitFields:w BotFieldFlag:w
(f0.0) if (1) ELSE_Y_16x4_SAVE
// Frame picture
mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC_WC+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y
// Add vertical offset 16 for bot MB in MBAFF mode
(f0.1) add (1) MSGSRC.1:d MSGSRC.1:d 16:w
ELSE_Y_16x4_SAVE:
else (1) ENDIF_Y_16x4_SAVE
asr (1) MSGSRC.1:d ORIY_CUR:w 1:w // Reduce y by half in field access mode
// Field picture
(f0.1) mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y bottom field
(-f0.1) mov (1) MSGDSC MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_Y:ud // Write 2 GRFs to DEST_Y top field
add (1) MSGSRC.1:d MSGSRC.1:d -4:w // for last 4 rows of above MB
endif
ENDIF_Y_16x4_SAVE:
send (8) WritebackResponse(0)<1> MSGHDR MSGSRC<8;8,1>:ud DAPWRITE MSGDSC
// End of save_Top_Y_16x4.asm

View File

@@ -0,0 +1,68 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: WriteURB.asm
//
// General purpose module to write data to URB using the URB handle/offset in r0
//
//----------------------------------------------------------------
// Assume:
// - a0.0 and a0.1 is meg desc, has been assign with URB offset and msg size
// - MRFs are alrady assigned with data.
//----------------------------------------------------------------
//
// 16x16 byte pixel block can be saved using just 1 "send" instruction.
#if defined(_DEBUG)
mov (1) EntrySignature:w 0x3535:w
#endif
// URB write header:
//mov (8) MSGSRC.0:ud r0.0<8;8,1>:ud // Copy parent R0 header
//shr (1) Temp2_W:uw URBOffset:uw 1:w // divide by 2, because URB entry is counted by 512bits. Offset is counted by 256bits.
//add (1) MSGSRC.0:uw r0.0:uw Temp2_W:uw
shr (1) MSGSRC.0:uw URBOffset:uw 1:w // divide by 2, because URB entry is counted by 512bits. Offset is counted by 256bits.
//mov (1) MSGSRC.0:uw URBOffset_2:uw
//mov (1) MSGSRC.1:ud 0:ud // Reset Handle 1
send null:uw m0 MSGSRC<8;8,1>:uw URBWRITE URBWriteMsgDesc:ud // URB write
//send null:ud MRF0 null:ud URBWriteMsgDesc:ud // URB write

View File

@@ -0,0 +1,69 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: WriteURB_Child.asm
//
// General purpose module to write data to URB using the URB handle/offset in r0
//
//----------------------------------------------------------------
// Assume:
// - a0.0 and a0.1 is meg desc, has been assign with URB offset and msg size
// - MRFs are alrady assigned with data.
//----------------------------------------------------------------
//
// 16x16 byte pixel block can be saved using just 1 "send" instruction.
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x3535:w
#endif
// URB write header:
//mov (8) MSGSRC.0:ud r0.0<8;8,1>:ud // Copy parent R0 header
//shr (1) Temp2_W:uw URBOffsetC:uw 1:w // divide by 2, because URB entry is counted by 512bits. Offset is counted by 256bits.
//add (1) MSGSRC.0:uw r0.0:uw Temp2_W:uw
shr (1) MSGSRC.0:uw URBOffsetC:uw 1:w // divide by 2, because URB entry is counted by 512bits. Offset is counted by 256bits.
//mov (1) MSGSRC.1:ud 0:ud // Reset Handle 1
// URB write 1 MRFs,
// Current MB offset is in URBOffset, use it as write origin
// Add 2 to offset to store data be be passed to the right MB
send null:uw m0 MSGSRC<8;8,1>:uw URBWRITE MSG_LEN(1)+URBWMSGDSC+0x20 // URB write

View File

@@ -0,0 +1,70 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Module name: WriteURB_Child.asm
//
// General purpose module to write data to URB using the URB handle/offset in r0
//
//----------------------------------------------------------------
// Assume:
// - a0.0 and a0.1 is meg desc, has been assign with URB offset and msg size
// - MRFs are alrady assigned with data.
//----------------------------------------------------------------
//
// 16x16 byte pixel block can be saved using just 1 "send" instruction.
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x3535:w
#endif
// URB write header:
//mov (8) MSGSRC.0:ud r0.0<8;8,1>:ud // Copy parent R0 header
//shr (1) Temp2_W:uw URBOffsetC:uw 1:w // divide by 2, because URB entry is counted by 512bits. Offset is counted by 256bits.
//add (1) MSGSRC.0:uw r0.0:uw Temp2_W:uw
shr (1) MSGSRC.0:uw URBOffsetC:uw 1:w // divide by 2, because URB entry is counted by 512bits. Offset is counted by 256bits.
//mov (1) MSGSRC.1:ud 0:ud // Reset Handle 1
// URB write 2 MRFs,
// Current MB offset is in URBOffset, use it as write origin
// Add 2 to offset to store data be be passed to the right MB
//mov (1) URBWriteMsgDesc:ud 0x06300020:ud
send null:uw m0 MSGSRC<8;8,1>:uw URBWRITE MSG_LEN(2)+URBWMSGDSC+0x20 // URB write

View File

@@ -0,0 +1,284 @@
/*
* All inter-prediction macroblock kernels
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Kernel name: AVCMCInter.asm
#ifdef INTERLABEL
#undef INTERLABEL
#endif
#if defined(MBAFF)
// < MBaff_Motion >
#define INTERLABEL(x) x##_##MBF
#elif defined(FIELD)
// < FieldMB_Motion >
#define INTERLABEL(x) x##_##FLD
#else // FRAME
// < FrameMB_Motion >
#define INTERLABEL(x) x##_##FRM
#endif
//
// Decoding an inter-prediction macroblock (conditional compile)
// -DMBAFF : MBAff picture MB
// -DFRAME : Frame picture MB
// -DFIELD : Field picture MB
// -DMBAFF -DMONO : MBAff mono picture MB
// -DFRAME -DMONO : Frame mono picture MB
// -DFIELD -DMONO : Field mono picture MB
//#if !defined(__AVCMCInter__) // Make sure this is only included once
//#define __AVCMCInter__
// TODO: header files need to be in sync with intra prediction
#include "header.inc"
#include "inter_Header.inc"
// TODO: Kernel names for mono cases
#if defined(MBAFF)
.kernel MBAff_Motion
MBAFF_MB:
#elif defined(FIELD)
.kernel FieldMB_Motion
FIELD_MB:
#else // Frame
.kernel FrameMB_Motion
FRAME_MB:
#endif
#ifdef _DEBUG
// WA for FULSIM so we'll know which kernel is being debugged
#if defined(MBAFF)
mov (1) acc0:ud 0x0aaa55a5:ud
#elif defined(FIELD)
mov (1) acc0:ud 0x0baa55a5:ud
#else // Frame
mov (1) acc0:ud 0x0caa55a5:ud
#endif
#endif
#ifdef SW_SCOREBOARD
CALL(scoreboard_start_inter,1)
#endif
mov (8) gMSGSRC<1>:ud r0.0<8;8,1>:ud // Initialize message header payload with R0
and (1) gwMBTYPE<1> gMBTYPE:ub nMBTYPE_MASK:w // MB type
shl (2) gX<1>:w gORIX<2;2,1>:ub 4:w // Convert MB origin to pixel unit
// #include "process_inter16x16.asm" // Handle B_L0_16x16 case with zero MVs and weighted pred off.
// In the case of B_L0_16x16 with zero MVs and weighted pred off, the kernel jumps to INTERLABEL(EXIT_LOOP).
INTERLABEL(INIT_MBPARA):
#include "initialize_MBPara.asm"
//========================= BEGIN - LOOP_SUBMB ===========================
mov (1) gLOOP_SUBMB:uw 0:uw // 0, 2, 4, 6
INTERLABEL(LOOP_SUBMB):
//========================== BEGIN - LOOP_DIR ============================
// Prediction flag (gPREDFLAG - 0:Pred_L0, 1:Pred_L1, 2:BiPred)
asr (1) gPREDFLAG:w gSUBMB_MODE:ub gLOOP_SUBMB:uw
mov (1) gLOOP_DIR:uw 1:uw // 1, 0
and (1) gPREDFLAG:w gPREDFLAG:w 0x3:w
INTERLABEL(LOOP_DIR):
cmp.e.f0.0 (1) null:w gLOOP_DIR:w gPREDFLAG:w
(f0.0) jmpi INTERLABEL(LOOP_DIR_CONTINUE)
// Get binding table index
// & reference picture parity (gREFPARITY - 0:top, 0x100:bottom, x:frame)
// & address of interpolation result
cmp.e.f0.1 (1) null:w gLOOP_DIR:w 1:w
(f0.1) mov (1) gpINTP:ud nOFFSET_INTP0:ud {NoDDClr} //
(f0.1) and (1) gBIDX:w r[pBIDX]:ub 0x7f:w {NoDDChk} //
(-f0.1) mov (1) gpINTP:ud nOFFSET_INTP1:ud {NoDDClr} //
(-f0.1) and (1) gBIDX:w r[pBIDX,4]:ub 0x7f:w {NoDDChk} //
#if defined(MBAFF) || defined(FIELD)
(f0.1) and (1) gREFPARITY:w r[pBIDX]:ub 0x80:w
(-f0.1) and (1) gREFPARITY:w r[pBIDX,4]:ub 0x80:w
shl (1) gREFPARITY:w gREFPARITY<0;1,0>:w 1:w
#endif
// Sub MB shape
asr (1) gSHAPETEMP:w gSUBMB_SHAPE:ub gLOOP_SUBMB:w
// Chroma MV adjustment & Set message descriptor for frame/field read
#if defined(MBAFF)
#include "chromaMVAdjust.asm"
and.nz.f0.0 (1) null:uw gFIELDMBFLAG:ub nFIELDMB_MASK:uw
(f0.0) add (1) gD0:ud gBIDX:uw nDWBRMSGDSC_SC_TF:ud
(-f0.0) add (1) gMSGDSC_R:ud gBIDX:uw nDWBRMSGDSC_SC:ud
(f0.0) add (1) gMSGDSC_R:ud gD0:ud gREFPARITY:uw
#elif defined(FIELD)
#include "chromaMVAdjust.asm"
add (1) gMSGDSC_R:ud gBIDX:uw nDWBRMSGDSC_SC_TF:ud
add (1) gMSGDSC_R:ud gMSGDSC_R:ud gREFPARITY:uw
#else // FRAME
add (1) gMSGDSC_R:ud gBIDX:uw nDWBRMSGDSC_SC:ud
#endif
and.nz.f0.1 (1) null:w gSHAPETEMP:w 3:w
(f0.1) jmpi INTERLABEL(PROCESS4x4)
//======================== BEGIN - PROCESS 8x8 ===========================
// Reference block load
#include "loadRef_Y_16x13.asm"
#ifndef MONO
#if defined(MBAFF) || defined(FIELD)
add (1) r[pMV,2]:w r[pMV,2]:w gCHRMVADJ:w
#endif
#include "loadRef_C_10x5.asm"
#endif
// Interpolation
//CALL_INTER(INTERLABEL(Interpolate_Y_8x8_Func), 1)
#include "interpolate_Y_8x8.asm"
#ifndef MONO
//CALL_INTER(INTERLABEL(Interpolate_C_4x4_Func), 1)
#include "interpolate_C_4x4.asm"
#endif
jmpi INTERLABEL(ROUND_SHIFT_C)
//========================= END - PROCESS 8x8 ============================
//======================== BEGIN - LOOP_SUBMBPT ==========================
INTERLABEL(PROCESS4x4):
mov (1) gLOOP_SUBMBPT:uw 4:uw // 4, 3, 2, 1
INTERLABEL(LOOP_SUBMBPT):
// Reference block load
#include "loadRef_Y_16x9.asm"
#ifndef MONO
#if defined(MBAFF) || defined(FIELD)
add (1) r[pMV,2]:w r[pMV,2]:w gCHRMVADJ:w
#endif
#include "loadRef_C_6x3.asm"
#endif
// Interpolation
#include "interpolate_Y_4x4.asm"
#ifndef MONO
#include "interpolate_C_2x2.asm"
#endif
cmp.e.f0.0 (1) null:w gLOOP_SUBMBPT:uw 3:w
add.z.f0.1 (1) gLOOP_SUBMBPT:uw gLOOP_SUBMBPT:uw -1:w
add (1) pMV:w pMV:w 8:w
(-f0.0) add (1) gpINTP:ud gpINTP:ud 0x00080008:ud // 8 & 8
(f0.0) add (1) gpINTP:ud gpINTP:ud 0x00180038:ud // 24 & 56
(-f0.1) jmpi INTERLABEL(LOOP_SUBMBPT)
cmp.e.f0.1 null:w gLOOP_DIR:w 1:w
add (1) pMV:w pMV:w -32:w
(f0.1) mov (1) gpINTP:ud nOFFSET_INTP0:ud
(-f0.1) mov (1) gpINTP:ud nOFFSET_INTP1:ud
mov (1) pRESULT:uw gpINTPC:uw
//========================= END - LOOP_SUBMBPT ===========================
INTERLABEL(ROUND_SHIFT_C):
#ifndef MONO
#include "roundShift_C_4x4.asm"
#endif
INTERLABEL(LOOP_DIR_CONTINUE):
add.nz.f0.1 (1) gLOOP_DIR:uw gLOOP_DIR:uw -1:w
add (1) pMV:w pMV:w 4:w
(-f0.1) jmpi INTERLABEL(LOOP_DIR)
//=========================== END - LOOP_DIR =============================
INTERLABEL(Weighted_Prediction):
#include "weightedPred.asm"
and.z.f0.1 (16) null<1>:w gLOOP_SUBMB<0;1,0>:uw 2:w
#include "recon_Y_8x8.asm"
#ifndef MONO
#include "recon_C_4x4.asm"
(-f0.1) add (1) pERRORC:w pERRORC:w 48:w
#endif
cmp.e.f0.1 (1) null:w gLOOP_SUBMB:uw 6:w
add (1) gLOOP_SUBMB:uw gLOOP_SUBMB:uw 2:w
add (1) pWGT_BIDX:ud pWGT_BIDX:ud 0x00100001:ud // 12 & 1
add (1) pMV:w pMV:w gMVSTEP:w
(-f0.1) jmpi INTERLABEL(LOOP_SUBMB)
//========================== END - LOOP_SUBMB ============================
INTERLABEL(EXIT_LOOP):
#include "writeRecon_YC.asm"
#ifdef SW_SCOREBOARD
wait n0:ud // Now wait for scoreboard to response
#include "Soreboard_update.asm" // scorboard update function
#else
// Check for write commit first if SW scoreboard is disabled
mov (1) gREG_WRITE_COMMIT_Y<1>:ud gREG_WRITE_COMMIT_Y<0;1,0>:ud // Make sure Y write is committed
mov (1) gREG_WRITE_COMMIT_UV<1>:ud gREG_WRITE_COMMIT_UV<0;1,0>:ud // Make sure U/V write is committed
#endif
// Terminate the thread
//
END_THREAD
//#include "Interpolate_Y_8x8_Func.asm"
//#include "Interpolate_C_4x4_Func.asm"
//#include "WeightedPred_Y_Func.asm"
//#include "WeightedPred_C_Func.asm"
.end_code
.end_kernel
//#endif // !defined(__AVCMCInter__)

View File

@@ -0,0 +1,469 @@
/*
* All HWMC kernels
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// Kernel name: AllAVC.asm
//
// All HWMC kernels merged into this file
//
// $Revision: 2 $
// $Date: 9/10/06 2:02a $
//
// Note: To enable SW scoreboard for ILK AVC kernels, simply toggle the HW_SCOREBOARD
// and SW_SCOREBOARD definition as described below.
//
// ----------------------------------------------------
// Main: ALLINTRA
// ----------------------------------------------------
#define COMBINED_KERNEL
#define ENABLE_ILDB
// WA for *Stim tool issue, should be removed later
#ifdef DEV_ILK
#define INSTFACTOR 2 // 128-bit count as 2 instructions
#else
#define INSTFACTOR 1 // 128-bit is 1 instruction
#endif // DEV_ILK
#ifdef DEV_CTG
#define SW_SCOREBOARD // SW Scoreboard should be enabled for CTG and earlier
#undef HW_SCOREBOARD // HW Scoreboard should be disabled for CTG and earlier
#else
#define HW_SCOREBOARD // HW Scoreboard should be enabled for ILK and beyond
#undef SW_SCOREBOARD // SW Scoreboard should be disabled for ILK and beyond
#endif // DEV_CTG
#ifdef BOOTSTRAP
# ifdef ENABLE_ILDB
# define ALL_SPAWNED_UV_ILDB_FRAME_IP 0
# define SLEEP_ENTRY_UV_ILDB_FRAME_IP 0
# define POST_SLEEP_UV_ILDB_FRAME_IP 0
# define ALL_SPAWNED_Y_ILDB_FRAME_IP 0
# define SLEEP_ENTRY_Y_ILDB_FRAME_IP 0
# define POST_SLEEP_Y_ILDB_FRAME_IP 0
# endif
#elif defined(DEV_ILK)
# include "export.inc.gen5"
#elif defined(DEV_CTG)
# include "export.inc"
#endif
#if defined(_EXPORT)
#include "AllAVC_Export.inc"
#elif defined(_BUILD)
#include "AllAVC.ich" // ISAasm dumped .exports
#include "AllAVC_Export.inc" // Keep jumping targets aligned, only for CTG and beyond
#include "AllAVC_Build.inc"
#else
#endif
.kernel AllAVC
// Build all intra prediction kernels
//
#ifdef INTRA_16x16_PAD_NENOP
$for(0; <INTRA_16x16_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef INTRA_16x16_PAD_NOP
$for(0; <INTRA_16x16_PAD_NOP; 1) {
nop
}
#endif
#include "Intra_16x16.asm"
#ifdef INTRA_8x8_PAD_NENOP
$for(0; <INTRA_8x8_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef INTRA_8x8_PAD_NOP
$for(0; <INTRA_8x8_PAD_NOP; 1) {
nop
}
#endif
#include "Intra_8x8.asm"
#ifdef INTRA_4x4_PAD_NENOP
$for(0; <INTRA_4x4_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef INTRA_4x4_PAD_NOP
$for(0; <INTRA_4x4_PAD_NOP; 1) {
nop
}
#endif
#include "Intra_4x4.asm"
#ifdef INTRA_PCM_PAD_NENOP
$for(0; <INTRA_PCM_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef INTRA_PCM_PAD_NOP
$for(0; <INTRA_PCM_PAD_NOP; 1) {
nop
}
#endif
#include "Intra_PCM.asm"
// Build FrameMB_Motion kernel
//
#define FRAME
#ifdef FRAME_MB_PAD_NENOP
$for(0; <FRAME_MB_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef FRAME_MB_PAD_NOP
$for(0; <FRAME_MB_PAD_NOP; 1) {
nop
}
#endif
#include "AVCMCInter.asm"
#undef FRAME
// Build FieldMB_Motion kernel
//
#define FIELD
#ifdef FIELD_MB_PAD_NENOP
$for(0; <FIELD_MB_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef FIELD_MB_PAD_NOP
$for(0; <FIELD_MB_PAD_NOP; 1) {
nop
}
#endif
#include "AVCMCInter.asm"
#undef FIELD
// Build MBAff_Motion kernel
//
#define MBAFF
#ifdef MBAFF_MB_PAD_NENOP
$for(0; <MBAFF_MB_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef MBAFF_MB_PAD_NOP
$for(0; <MBAFF_MB_PAD_NOP; 1) {
nop
}
#endif
#include "AVCMCInter.asm"
#undef MBAFF
#ifdef SW_SCOREBOARD
// SW scoreboard kernel for non-MBAFF
//
#ifdef SCOREBOARD_PAD_NENOP
$for(0; <SCOREBOARD_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef SCOREBOARD_PAD_NOP
$for(0; <SCOREBOARD_PAD_NOP; 1) {
nop
}
#endif
#include "scoreboard.asm"
// SW scoreboard kernel for MBAFF
#ifdef SCOREBOARD_MBAFF_PAD_NENOP
$for(0; <SCOREBOARD_MBAFF_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef SCOREBOARD_MBAFF_PAD_NOP
$for(0; <SCOREBOARD_MBAFF_PAD_NOP; 1) {
nop
}
#endif
#include "scoreboard_MBAFF.asm"
#elif defined(HW_SCOREBOARD)
// SetHWscoreboard kernel for non-MBAFF
//
#ifdef SETHWSCOREBOARD_PAD_NENOP
$for(0; <SETHWSCOREBOARD_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef SETHWSCOREBOARD_PAD_NOP
$for(0; <SETHWSCOREBOARD_PAD_NOP; 1) {
nop
}
#endif
#include "SetHWScoreboard.asm"
// SetHWscoreboard kernel for MBAFF
#ifdef SETHWSCOREBOARD_MBAFF_PAD_NENOP
$for(0; <SETHWSCOREBOARD_MBAFF_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef SETHWSCOREBOARD_MBAFF_PAD_NOP
$for(0; <SETHWSCOREBOARD_MBAFF_PAD_NOP; 1) {
nop
}
#endif
#include "SetHWScoreboard_MBAFF.asm"
#endif // SW_SCOREBOARD
#ifdef BSDRESET_PAD_NENOP
$for(0; <BSDRESET_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef BSDRESET_PAD_NOP
$for(0; <BSDRESET_PAD_NOP; 1) {
nop
}
#endif
#include "BSDReset.asm"
#ifdef DCRESETDUMMY_PAD_NENOP
$for(0; <DCRESETDUMMY_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef DCRESETDUMMY_PAD_NOP
$for(0; <DCRESETDUMMY_PAD_NOP; 1) {
nop
}
#endif
#include "DCResetDummy.asm"
#ifdef ENABLE_ILDB
// Build all ILDB kernels
//
// Undefine some previous defined symbols since they will be re-defined/re-declared in ILDB kernels
#undef A
#undef B
#undef p0
#undef p1
#define MSGPAYLOADB MSGPAYLOADB_ILDB
#define MSGPAYLOADW MSGPAYLOADW_ILDB
#define MSGPAYLOADD MSGPAYLOADD_ILDB
#define MSGPAYLOADF MSGPAYLOADF_ILDB
// < Frame ILDB >
#define _PROGRESSIVE
#define ILDB_LABEL(x) x##_ILDB_FRAME
#ifdef AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NENOP
$for(0; <AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NOP
$for(0; <AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Root_Y.asm"
#ifdef AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NENOP
$for(0; <AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NOP
$for(0; <AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Child_Y.asm"
#ifdef AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NENOP
$for(0; <AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NOP
$for(0; <AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Root_UV.asm"
#ifdef AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NENOP
$for(0; <AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NOP
$for(0; <AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Child_UV.asm"
#undef ILDB_LABEL
#undef _PROGRESSIVE
// < Field ILDB >
#define _FIELD
#define ILDB_LABEL(x) x##_ILDB_FIELD
#ifdef AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NENOP
$for(0; <AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NOP
$for(0; <AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Root_Field_Y.asm"
#ifdef AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NENOP
$for(0; <AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NOP
$for(0; <AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Child_Field_Y.asm"
#ifdef AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NENOP
$for(0; <AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NOP
$for(0; <AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Root_Field_UV.asm"
#ifdef AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NENOP
$for(0; <AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NOP
$for(0; <AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Child_Field_UV.asm"
#undef ILDB_LABEL
#undef _FIELD
// < MBAFF Frame ILDB >
#define _MBAFF
#define ILDB_LABEL(x) x##_ILDB_MBAFF
#ifdef AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NENOP
$for(0; <AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NOP
$for(0; <AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Root_Mbaff_Y.asm"
#ifdef AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NENOP
$for(0; <AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NOP
$for(0; <AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Child_Mbaff_Y.asm"
#ifdef AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NENOP
$for(0; <AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NOP
$for(0; <AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Root_Mbaff_UV.asm"
#ifdef AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NENOP
$for(0; <AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NENOP; 1) {
nenop
}
#endif
#ifdef AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NOP
$for(0; <AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NOP; 1) {
nop
}
#endif
#include "AVC_ILDB_Child_Mbaff_UV.asm"
#undef ILDB_LABEL
#undef _MBAFF
#endif // ENABLE_ILDB
AllAVC_END:
nop
// End of AllAVC
.end_code
.end_kernel

View File

@@ -0,0 +1,100 @@
/*
* All field picture HWMC kernels
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// 2857702934 // 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets
// 0 // Offset to Intra_16x16 luma prediction mode 0
// 9 // Offset to Intra_16x16 luma prediction mode 1
// 19 // Offset to Intra_16x16 luma prediction mode 2
// 42 // Offset to Intra_16x16 luma prediction mode 3
// 2857699336 // 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets
// 0 // Offset to Intra_8x8 luma prediction mode 0
// 5 // Offset to Intra_8x8 luma prediction mode 1
// 10 // Offset to Intra_8x8 luma prediction mode 2
// 26 // Offset to Intra_8x8 luma prediction mode 3
// 36 // Offset to Intra_8x8 luma prediction mode 4
// 50 // Offset to Intra_8x8 luma prediction mode 5
// 68 // Offset to Intra_8x8 luma prediction mode 6
// 85 // Offset to Intra_8x8 luma prediction mode 7
// 95 // Offset to Intra_8x8 luma prediction mode 8
// 2857698308 // 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets
// 0 // Offset to Intra_4x4 luma prediction mode 0
// 2 // Offset to Intra_4x4 luma prediction mode 1
// 4 // Offset to Intra_4x4 luma prediction mode 2
// 16 // Offset to Intra_4x4 luma prediction mode 3
// 23 // Offset to Intra_4x4 luma prediction mode 4
// 32 // Offset to Intra_4x4 luma prediction mode 5
// 45 // Offset to Intra_4x4 luma prediction mode 6
// 59 // Offset to Intra_4x4 luma prediction mode 7
// 66 // Offset to Intra_4x4 luma prediction mode 8
// 2857700364 // 0xAA550C0C - GUID for intra chroma prediction mode offsets
// 0 // Offset to intra chroma prediction mode 0
// 30 // Offset to intra chroma prediction mode 1
// 36 // Offset to intra chroma prediction mode 2
// 41 // Offset to intra chroma prediction mode 3
// Kernel name: AllAVCField.asm
//
// All field picture HWMC kernels merged into this file
//
// $Revision: 1 $
// $Date: 4/13/06 4:35p $
//
// ----------------------------------------------------
// Main: AllAVCField
// ----------------------------------------------------
#define ALLHWMC
#define COMBINED_KERNEL
.kernel AllAVCField
#include "Intra_PCM.asm"
#include "Intra_16x16.asm"
#include "Intra_8x8.asm"
#include "Intra_4x4.asm"
#include "scoreboard.asm"
#define FIELD
#include "AVCMCInter.asm"
// End of AllAVCField
.end_kernel

View File

@@ -0,0 +1,99 @@
/*
* All frame picture HWMC kernels
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// 2857702934 // 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets
// 0 // Offset to Intra_16x16 luma prediction mode 0
// 9 // Offset to Intra_16x16 luma prediction mode 1
// 19 // Offset to Intra_16x16 luma prediction mode 2
// 42 // Offset to Intra_16x16 luma prediction mode 3
// 2857699336 // 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets
// 0 // Offset to Intra_8x8 luma prediction mode 0
// 5 // Offset to Intra_8x8 luma prediction mode 1
// 10 // Offset to Intra_8x8 luma prediction mode 2
// 26 // Offset to Intra_8x8 luma prediction mode 3
// 36 // Offset to Intra_8x8 luma prediction mode 4
// 50 // Offset to Intra_8x8 luma prediction mode 5
// 68 // Offset to Intra_8x8 luma prediction mode 6
// 85 // Offset to Intra_8x8 luma prediction mode 7
// 95 // Offset to Intra_8x8 luma prediction mode 8
// 2857698308 // 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets
// 0 // Offset to Intra_4x4 luma prediction mode 0
// 2 // Offset to Intra_4x4 luma prediction mode 1
// 4 // Offset to Intra_4x4 luma prediction mode 2
// 16 // Offset to Intra_4x4 luma prediction mode 3
// 23 // Offset to Intra_4x4 luma prediction mode 4
// 32 // Offset to Intra_4x4 luma prediction mode 5
// 45 // Offset to Intra_4x4 luma prediction mode 6
// 59 // Offset to Intra_4x4 luma prediction mode 7
// 66 // Offset to Intra_4x4 luma prediction mode 8
// 2857700364 // 0xAA550C0C - GUID for intra chroma prediction mode offsets
// 0 // Offset to intra chroma prediction mode 0
// 30 // Offset to intra chroma prediction mode 1
// 36 // Offset to intra chroma prediction mode 2
// 41 // Offset to intra chroma prediction mode 3
// Kernel name: AllAVCFrame.asm
//
// All frame picture HWMC kernels merged into this file
//
// $Revision: 1 $
// $Date: 4/13/06 4:35p $
//
// ----------------------------------------------------
// Main: AllAVCFrame
// ----------------------------------------------------
#define ALLHWMC
#define COMBINED_KERNEL
.kernel AllAVCFrame
#include "Intra_PCM.asm"
#include "Intra_16x16.asm"
#include "Intra_8x8.asm"
#include "Intra_4x4.asm"
#include "scoreboard.asm"
#include "AVCMCInter.asm"
// End of AllAVCFrame
.end_kernel

View File

@@ -0,0 +1,100 @@
/*
* All MBAFF frame picture HWMC kernels
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// 2857702934 // 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets
// 0 // Offset to Intra_16x16 luma prediction mode 0
// 9 // Offset to Intra_16x16 luma prediction mode 1
// 19 // Offset to Intra_16x16 luma prediction mode 2
// 42 // Offset to Intra_16x16 luma prediction mode 3
// 2857699336 // 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets
// 0 // Offset to Intra_8x8 luma prediction mode 0
// 5 // Offset to Intra_8x8 luma prediction mode 1
// 10 // Offset to Intra_8x8 luma prediction mode 2
// 26 // Offset to Intra_8x8 luma prediction mode 3
// 36 // Offset to Intra_8x8 luma prediction mode 4
// 50 // Offset to Intra_8x8 luma prediction mode 5
// 68 // Offset to Intra_8x8 luma prediction mode 6
// 85 // Offset to Intra_8x8 luma prediction mode 7
// 95 // Offset to Intra_8x8 luma prediction mode 8
// 2857698308 // 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets
// 0 // Offset to Intra_4x4 luma prediction mode 0
// 2 // Offset to Intra_4x4 luma prediction mode 1
// 4 // Offset to Intra_4x4 luma prediction mode 2
// 16 // Offset to Intra_4x4 luma prediction mode 3
// 23 // Offset to Intra_4x4 luma prediction mode 4
// 32 // Offset to Intra_4x4 luma prediction mode 5
// 45 // Offset to Intra_4x4 luma prediction mode 6
// 59 // Offset to Intra_4x4 luma prediction mode 7
// 66 // Offset to Intra_4x4 luma prediction mode 8
// 2857700364 // 0xAA550C0C - GUID for intra chroma prediction mode offsets
// 0 // Offset to intra chroma prediction mode 0
// 30 // Offset to intra chroma prediction mode 1
// 36 // Offset to intra chroma prediction mode 2
// 41 // Offset to intra chroma prediction mode 3
// Kernel name: AllAVCMBAFF.asm
//
// All MBAFF frame picture HWMC kernels merged into this file
//
// $Revision: 1 $
// $Date: 4/13/06 4:35p $
//
// ----------------------------------------------------
// Main: AllAVCMBAFF
// ----------------------------------------------------
#define ALLHWMC
#define COMBINED_KERNEL
.kernel AllAVCMBAFF
#include "Intra_PCM.asm"
#include "Intra_16x16.asm"
#include "Intra_8x8.asm"
#include "Intra_4x4.asm"
#include "scoreboard.asm"
#define MBAFF
#include "AVCMCInter.asm"
// End of AllAVCMBAFF
.end_kernel

View File

@@ -0,0 +1,112 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
$table {
AllAVC_END_IP/INSTFACTOR // Total instruction count
#if (defined(SW_SCOREBOARD) || defined(HW_SCOREBOARD)) && defined(ENABLE_ILDB)
// 23 // Total kernel count
#elif defined(SW_SCOREBOARD) || defined(HW_SCOREBOARD)
// 11 // Total kernel count
#elif defined(ENABLE_ILDB)
// 21 // Total kernel count
#else
// 11 // Total kernel count
#endif
INTRA_16x16_ENTRY/INSTFACTOR // Instruction offset to 'Intra_16x16'
INTRA_8x8_ENTRY/INSTFACTOR // Instruction offset to 'Intra_8x8'
INTRA_4x4_ENTRY/INSTFACTOR // Instruction offset to 'Intra_4x4'
INTRA_PCM_ENTRY/INSTFACTOR // Instruction offset to 'Intra_PCM'
FRAME_MB_ENTRY/INSTFACTOR // Instruction offset to 'FrameMB_Motion'
FIELD_MB_ENTRY/INSTFACTOR // Instruction offset to 'FieldMB_Motion'
MBAFF_MB_ENTRY/INSTFACTOR // Instruction offset to 'MBAff_Motion'
#ifdef SW_SCOREBOARD
SCOREBOARD_ENTRY/INSTFACTOR // Instruction offset to 'scoreboard'
SCOREBOARD_MBAFF_ENTRY/INSTFACTOR // Instruction offset to 'scoreboard_MBAFF'
#elif defined(HW_SCOREBOARD)
SETHWSCOREBOARD_ENTRY/INSTFACTOR // Instruction offset to 'AVC_SetIntraDepend'
SETHWSCOREBOARD_MBAFF_ENTRY/INSTFACTOR // Instruction offset to 'AVC_SetIntraDependMBAFF'
#endif // SW_SCOREBOARD
#ifdef ENABLE_ILDB
AVC_ILDB_ROOT_Y_ILDB_FRAME_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Root_Y'
AVC_ILDB_CHILD_Y_ILDB_FRAME_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Child_Y'
AVC_ILDB_ROOT_UV_ILDB_FRAME_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Root_UV'
AVC_ILDB_CHILD_UV_ILDB_FRAME_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Child_UV'
AVC_ILDB_ROOT_Y_ILDB_FIELD_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Root_Field_Y'
AVC_ILDB_CHILD_Y_ILDB_FIELD_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Child_Field_Y'
AVC_ILDB_ROOT_UV_ILDB_FIELD_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Root_Field_UV'
AVC_ILDB_CHILD_UV_ILDB_FIELD_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Child_Field_UV'
AVC_ILDB_ROOT_Y_ILDB_MBAFF_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Root_Mbaff_Y'
AVC_ILDB_CHILD_Y_ILDB_MBAFF_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Child_Mbaff_Y'
AVC_ILDB_ROOT_UV_ILDB_MBAFF_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Root_Mbaff_UV'
AVC_ILDB_CHILD_UV_ILDB_MBAFF_ENTRY/INSTFACTOR // Instruction offset to 'AVC_ILDB_Child_Mbaff_UV'
#endif // ENABLE_ILDB
BSDRESET_ENTRY/INSTFACTOR // Instruction offset to 'BSDReset'
DCRESETDUMMY_ENTRY/INSTFACTOR // Instruction offset to 'DCResetDummy'
// 0 // Instruction offset to Intra_4x4_luma_prediction_mode_0
INTRA_4X4_HORIZONTAL_IP-INTRA_4X4_VERTICAL_IP // Instruction offset to Intra_4x4_luma_prediction_mode_1
INTRA_4X4_DC_IP-INTRA_4X4_VERTICAL_IP // Instruction offset to Intra_4x4_luma_prediction_mode_2
INTRA_4X4_DIAG_DOWN_LEFT_IP-INTRA_4X4_VERTICAL_IP // Instruction offset to Intra_4x4_luma_prediction_mode_3
INTRA_4X4_DIAG_DOWN_RIGHT_IP-INTRA_4X4_VERTICAL_IP // Instruction offset to Intra_4x4_luma_prediction_mode_4
INTRA_4X4_VERT_RIGHT_IP-INTRA_4X4_VERTICAL_IP // Instruction offset to Intra_4x4_luma_prediction_mode_5
INTRA_4X4_HOR_DOWN_IP-INTRA_4X4_VERTICAL_IP // Instruction offset to Intra_4x4_luma_prediction_mode_6
INTRA_4X4_VERT_LEFT_IP-INTRA_4X4_VERTICAL_IP // Instruction offset to Intra_4x4_luma_prediction_mode_7
INTRA_4X4_HOR_UP_IP-INTRA_4X4_VERTICAL_IP // Instruction offset to Intra_4x4_luma_prediction_mode_8
// 0 // Instruction offset to Intra_8x8_luma_prediction_mode_0
INTRA_8X8_HORIZONTAL_IP-INTRA_8X8_VERTICAL_IP // Instruction offset to Intra_8x8_luma_prediction_mode_1
INTRA_8X8_DC_IP-INTRA_8X8_VERTICAL_IP // Instruction offset to Intra_8x8_luma_prediction_mode_2
INTRA_8X8_DIAG_DOWN_LEFT_IP-INTRA_8X8_VERTICAL_IP // Instruction offset to Intra_8x8_luma_prediction_mode_3
INTRA_8X8_DIAG_DOWN_RIGHT_IP-INTRA_8X8_VERTICAL_IP // Instruction offset to Intra_8x8_luma_prediction_mode_4
INTRA_8X8_VERT_RIGHT_IP-INTRA_8X8_VERTICAL_IP // Instruction offset to Intra_8x8_luma_prediction_mode_5
INTRA_8X8_HOR_DOWN_IP-INTRA_8X8_VERTICAL_IP // Instruction offset to Intra_8x8_luma_prediction_mode_6
INTRA_8X8_VERT_LEFT_IP-INTRA_8X8_VERTICAL_IP // Instruction offset to Intra_8x8_luma_prediction_mode_7
INTRA_8X8_HOR_UP_IP-INTRA_8X8_VERTICAL_IP // Instruction offset to Intra_8x8_luma_prediction_mode_8
// 0 // Instruction offset to Intra_16x16_luma_prediction_mode_0
INTRA_16x16_HORIZONTAL_IP-INTRA_16x16_VERTICAL_IP // Instruction offset to Intra_16x16_luma_prediction_mode_1
INTRA_16x16_DC_IP-INTRA_16x16_VERTICAL_IP // Instruction offset to Intra_16x16_luma_prediction_mode_2
INTRA_16x16_PLANE_IP-INTRA_16x16_VERTICAL_IP // Instruction offset to Intra_16x16_luma_prediction_mode_3
// 0 // Instruction offset to intra_chroma_prediction_mode_0
INTRA_CHROMA_HORIZONTAL_IP-INTRA_CHROMA_DC_IP // Instruction offset to intra_chroma_prediction_mode_1
INTRA_CHROMA_VERTICAL_IP-INTRA_CHROMA_DC_IP // Instruction offset to intra_chroma_prediction_mode_2
INTRA_Chroma_PLANE_IP-INTRA_CHROMA_DC_IP // Instruction offset to intra_chroma_prediction_mode_3
intra_Pred_4x4_Y_IP-ADD_ERROR_SB3_IP*0x100+intra_Pred_4x4_Y_IP-ADD_ERROR_SB2_IP*0x100+intra_Pred_4x4_Y_IP-ADD_ERROR_SB1_IP*0x100+intra_Pred_4x4_Y_IP-ADD_ERROR_SB0_IP // Instruction offset to intra_4x4_pred_module
}

View File

@@ -0,0 +1,202 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
.export entry_point INTRA_16x16
.export entry_point INTRA_8x8
.export entry_point INTRA_4x4
.export entry_point INTRA_PCM
.export entry_point FRAME_MB
.export entry_point FIELD_MB
.export entry_point MBAFF_MB
#ifdef SW_SCOREBOARD
.export entry_point SCOREBOARD
.export entry_point SCOREBOARD_MBAFF
#elif defined(HW_SCOREBOARD)
.export entry_point SETHWSCOREBOARD
.export entry_point SETHWSCOREBOARD_MBAFF
#endif // SW_SCOREBOARD
#ifdef ENABLE_ILDB
.export entry_point AVC_ILDB_ROOT_Y_ILDB_FRAME
.export entry_point AVC_ILDB_CHILD_Y_ILDB_FRAME
.export entry_point AVC_ILDB_ROOT_UV_ILDB_FRAME
.export entry_point AVC_ILDB_CHILD_UV_ILDB_FRAME
.export entry_point AVC_ILDB_ROOT_Y_ILDB_FIELD
.export entry_point AVC_ILDB_CHILD_Y_ILDB_FIELD
.export entry_point AVC_ILDB_ROOT_UV_ILDB_FIELD
.export entry_point AVC_ILDB_CHILD_UV_ILDB_FIELD
.export entry_point AVC_ILDB_ROOT_Y_ILDB_MBAFF
.export entry_point AVC_ILDB_CHILD_Y_ILDB_MBAFF
.export entry_point AVC_ILDB_ROOT_UV_ILDB_MBAFF
.export entry_point AVC_ILDB_CHILD_UV_ILDB_MBAFF
#endif // ENABLE_ILDB
.export entry_point BSDRESET
.export entry_point DCRESETDUMMY
.export label INTRA_16x16_VERTICAL
.export label INTRA_16x16_HORIZONTAL
.export label INTRA_16x16_DC
.export label INTRA_16x16_PLANE
.export label INTRA_8X8_VERTICAL
.export label INTRA_8X8_HORIZONTAL
.export label INTRA_8X8_DC
.export label INTRA_8X8_DIAG_DOWN_LEFT
.export label INTRA_8X8_DIAG_DOWN_RIGHT
.export label INTRA_8X8_VERT_RIGHT
.export label INTRA_8X8_HOR_DOWN
.export label INTRA_8X8_VERT_LEFT
.export label INTRA_8X8_HOR_UP
.export label INTRA_4X4_VERTICAL
.export label INTRA_4X4_HORIZONTAL
.export label INTRA_4X4_DC
.export label INTRA_4X4_DIAG_DOWN_LEFT
.export label INTRA_4X4_DIAG_DOWN_RIGHT
.export label INTRA_4X4_VERT_RIGHT
.export label INTRA_4X4_HOR_DOWN
.export label INTRA_4X4_VERT_LEFT
.export label INTRA_4X4_HOR_UP
.export label INTRA_CHROMA_DC
.export label INTRA_CHROMA_HORIZONTAL
.export label INTRA_CHROMA_VERTICAL
.export label INTRA_Chroma_PLANE
.export label intra_Pred_4x4_Y
.export label ADD_ERROR_SB0
.export label ADD_ERROR_SB1
.export label ADD_ERROR_SB2
.export label ADD_ERROR_SB3
.export label AllAVC_END
#ifdef SW_SCOREBOARD
.export label MB_Loop
.export label No_Message
.export label Dependency_Check
.export label Notify_MSG
.export label Update_CurMB
.export label MBAFF_MB_Loop
.export label MBAFF_No_Message
.export label MBAFF_Dependency_Check
.export label MBAFF_Notify_MSG
.export label MBAFF_Update_CurMB
//.export label
// Definitions for first pass MC kernel building
#ifndef No_Message_IP
#define No_Message_IP 0
#endif
#ifndef Dependency_Check_IP
#define Dependency_Check_IP 0
#endif
#ifndef Notify_MSG_IP
#define Notify_MSG_IP 0
#endif
#ifndef Update_CurMB_IP
#define Update_CurMB_IP 0
#endif
#ifndef MBAFF_No_Message_IP
#define MBAFF_No_Message_IP 0
#endif
#ifndef MBAFF_Dependency_Check_IP
#define MBAFF_Dependency_Check_IP 0
#endif
#ifndef MBAFF_Notify_MSG_IP
#define MBAFF_Notify_MSG_IP 0
#endif
#ifndef AS_ENABLED
#ifndef MBAFF_MB_Loop_IP
#define MBAFF_MB_Loop_IP 0
#endif
#ifndef MB_Loop_IP
#define MB_Loop_IP 0
#endif
#endif // End AS_ENABLED
#ifndef MBAFF_Update_CurMB_IP
#define MBAFF_Update_CurMB_IP 0
#endif
#endif // SW_SCOREBOARD
#ifdef ENABLE_ILDB
.export label ALL_SPAWNED_UV_ILDB_FRAME
.export label SLEEP_ENTRY_UV_ILDB_FRAME
.export label POST_SLEEP_UV_ILDB_FRAME
.export label ALL_SPAWNED_Y_ILDB_FRAME
.export label SLEEP_ENTRY_Y_ILDB_FRAME
.export label POST_SLEEP_Y_ILDB_FRAME
// Definitions for first pass ILDB kernel building
#ifndef ALL_SPAWNED_UV_ILDB_FRAME_IP
#define ALL_SPAWNED_UV_ILDB_FRAME_IP 0
#endif
#ifndef SLEEP_ENTRY_UV_ILDB_FRAME_IP
#define SLEEP_ENTRY_UV_ILDB_FRAME_IP 0
#endif
#ifndef POST_SLEEP_UV_ILDB_FRAME_IP
#define POST_SLEEP_UV_ILDB_FRAME_IP 0
#endif
#ifndef ALL_SPAWNED_Y_ILDB_FRAME_IP
#define ALL_SPAWNED_Y_ILDB_FRAME_IP 0
#endif
#ifndef SLEEP_ENTRY_Y_ILDB_FRAME_IP
#define SLEEP_ENTRY_Y_ILDB_FRAME_IP 0
#endif
#ifndef POST_SLEEP_Y_ILDB_FRAME_IP
#define POST_SLEEP_Y_ILDB_FRAME_IP 0
#endif
#endif // ENABLE_ILDB

View File

@@ -0,0 +1,98 @@
/*
* All intra-prediction macroblock kernels
* Copyright © <2010>, Intel Corporation.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file was originally licensed under the following license
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// 2857702934 // 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets
// 0 // Offset to Intra_16x16 luma prediction mode 0
// 9 // Offset to Intra_16x16 luma prediction mode 1
// 19 // Offset to Intra_16x16 luma prediction mode 2
// 42 // Offset to Intra_16x16 luma prediction mode 3
// 2857699336 // 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets
// 0 // Offset to Intra_8x8 luma prediction mode 0
// 5 // Offset to Intra_8x8 luma prediction mode 1
// 10 // Offset to Intra_8x8 luma prediction mode 2
// 26 // Offset to Intra_8x8 luma prediction mode 3
// 36 // Offset to Intra_8x8 luma prediction mode 4
// 50 // Offset to Intra_8x8 luma prediction mode 5
// 68 // Offset to Intra_8x8 luma prediction mode 6
// 85 // Offset to Intra_8x8 luma prediction mode 7
// 95 // Offset to Intra_8x8 luma prediction mode 8
// 2857698308 // 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets
// 0 // Offset to Intra_4x4 luma prediction mode 0
// 2 // Offset to Intra_4x4 luma prediction mode 1
// 4 // Offset to Intra_4x4 luma prediction mode 2
// 16 // Offset to Intra_4x4 luma prediction mode 3
// 23 // Offset to Intra_4x4 luma prediction mode 4
// 32 // Offset to Intra_4x4 luma prediction mode 5
// 45 // Offset to Intra_4x4 luma prediction mode 6
// 59 // Offset to Intra_4x4 luma prediction mode 7
// 66 // Offset to Intra_4x4 luma prediction mode 8
// 2857700364 // 0xAA550C0C - GUID for intra chroma prediction mode offsets
// 0 // Offset to intra chroma prediction mode 0
// 30 // Offset to intra chroma prediction mode 1
// 36 // Offset to intra chroma prediction mode 2
// 41 // Offset to intra chroma prediction mode 3
// Kernel name: AllIntra.asm
//
// All HWMC kernels merged into this file
//
// $Revision: 1 $
// $Date: 4/13/06 4:35p $
//
// ----------------------------------------------------
// Main: ALLINTRA
// ----------------------------------------------------
#define ALLHWMC
#define COMBINED_KERNEL
.kernel ALLINTRA
// All frame destination HWMC kernels
//
#include "Intra_PCM.asm"
#include "Intra_16x16.asm"
#include "Intra_8x8.asm"
#include "Intra_4x4.asm"
// End of ALLINTRA
.end_kernel

Some files were not shown because too many files have changed in this diff Show More