libva-1.6.2

git-svn-id: svn://kolibrios.org@6146 a494cfbc-eb01-0410-851d-a64ba20cac60
2016-02-05 22:00:38 +00:00
parent 84cfd5cf57
commit a08f61ddb9
1084 changed files with 430297 additions and 0 deletions
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/Makefile.am
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/Makefile.am
@@ -0,0 +1,6 @@
+SUBDIRS = h264 mpeg2 render post_processing vme utils
+
+EXTRA_DIST = gpp.py
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/Makefile.in
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/Makefile.in
@@ -0,0 +1,634 @@
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/shaders
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/src/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+	ctags-recursive dvi-recursive html-recursive info-recursive \
+	install-data-recursive install-dvi-recursive \
+	install-exec-recursive install-html-recursive \
+	install-info-recursive install-pdf-recursive \
+	install-ps-recursive install-recursive installcheck-recursive \
+	installdirs-recursive pdf-recursive ps-recursive \
+	tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+  $(RECURSIVE_TARGETS) \
+  $(RECURSIVE_CLEAN_TARGETS) \
+  $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+	distdir
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DRM_CFLAGS = @DRM_CFLAGS@
+DRM_LIBS = @DRM_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_LIBS = @EGL_LIBS@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GEN4ASM = @GEN4ASM@
+GEN4ASM_CFLAGS = @GEN4ASM_CFLAGS@
+GEN4ASM_LIBS = @GEN4ASM_LIBS@
+GIT = @GIT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_DRIVER_LT_LDFLAGS = @INTEL_DRIVER_LT_LDFLAGS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBDRM_VERSION = @LIBDRM_VERSION@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBVA_DEPS_CFLAGS = @LIBVA_DEPS_CFLAGS@
+LIBVA_DEPS_LIBS = @LIBVA_DEPS_LIBS@
+LIBVA_DRIVERS_PATH = @LIBVA_DRIVERS_PATH@
+LIBVA_DRM_DEPS_CFLAGS = @LIBVA_DRM_DEPS_CFLAGS@
+LIBVA_DRM_DEPS_LIBS = @LIBVA_DRM_DEPS_LIBS@
+LIBVA_PACKAGE_VERSION = @LIBVA_PACKAGE_VERSION@
+LIBVA_WAYLAND_DEPS_CFLAGS = @LIBVA_WAYLAND_DEPS_CFLAGS@
+LIBVA_WAYLAND_DEPS_LIBS = @LIBVA_WAYLAND_DEPS_LIBS@
+LIBVA_X11_DEPS_CFLAGS = @LIBVA_X11_DEPS_CFLAGS@
+LIBVA_X11_DEPS_LIBS = @LIBVA_X11_DEPS_LIBS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PYTHON2 = @PYTHON2@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+wayland_protocoldir = @wayland_protocoldir@
+wayland_scanner = @wayland_scanner@
+SUBDIRS = h264 mpeg2 render post_processing vme utils
+EXTRA_DIST = gpp.py
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
+all: all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/shaders/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu src/shaders/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+#     (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+	@fail=; \
+	if $(am__make_keepgoing); then \
+	  failcom='fail=yes'; \
+	else \
+	  failcom='exit 1'; \
+	fi; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-recursive
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-recursive
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    $(am__make_dryrun) \
+	      || test -d "$(distdir)/$$subdir" \
+	      || $(MKDIR_P) "$(distdir)/$$subdir" \
+	      || exit 1; \
+	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+	    $(am__relativize); \
+	    new_distdir=$$reldir; \
+	    dir1=$$subdir; dir2="$(top_distdir)"; \
+	    $(am__relativize); \
+	    new_top_distdir=$$reldir; \
+	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+	    ($(am__cd) $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$new_top_distdir" \
+	        distdir="$$new_distdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+		am__skip_mode_fix=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+	-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(am__recursive_targets) install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
+	check-am clean clean-generic clean-libtool cscopelist-am ctags \
+	ctags-am distclean distclean-generic distclean-libtool \
+	distclean-tags distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	installdirs-am maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/gpp.py
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/gpp.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python
+#coding=UTF-8
+
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Authors:
+#    Chen, Yangyang <yangyang.chen@intel.com>
+#    Han, Haofu     <haofu.han@intel.com>
+#
+
+import sys
+
+class Block:
+	def __init__(self, ln=0, s=None):
+		assert type(ln) == int
+		assert type(s) == str or s == None
+		self.lineno = ln
+		self.text = s
+		self.subblocks = []
+
+	def append(self, block):
+		self.subblocks.append(block)
+
+	def checkfor(self, line):
+		import re
+		p = r'\$\s*for\s*'
+		if re.match(p, line) == None:
+			raise Exception(self.__errmsg('syntax error'))
+		tail = line.split('(', 1)[1].rsplit(')', 1)
+		conds = tail[0].split(';')
+		lb = tail[1]
+		if lb.strip() != '{':
+			raise Exception(self.__errmsg('missing "{"'))
+		if len(conds) != 3:
+			raise Exception(self.__errmsg('syntax error(miss ";"?)'))
+		init = conds[0]
+		cond = conds[1]
+		step = conds[2]
+		self.__parse_init(init)
+		self.__parse_cond(cond)
+		self.__parse_step(step)
+
+	def __parse_init(self, init):
+		inits = init.split(',')
+		self.param_init = []
+		for ini in inits:
+			try:
+				val = eval(ini)
+				self.param_init.append(val)
+			except:
+				raise Exception(self.__errmsg('non an exp: %s'%ini))
+		self.param_num = len(inits)
+
+	def __parse_cond(self, cond):
+		cond = cond.strip()
+		if cond[0] in ['<', '>']:
+			if cond[1] == '=':
+				self.param_op = cond[:2]
+				limit = cond[2:]
+			else:
+				self.param_op = cond[0]
+				limit = cond[1:]
+			try:
+				self.param_limit = eval(limit)
+			except:
+				raise Exception(self.__errmsg('non an exp: %s'%limit))
+		else:
+			raise Exception(self.__errmsg('syntax error'))
+
+	def __parse_step(self, step):
+		steps = step.split(',')
+		if len(steps) != self.param_num:
+			raise Exception(self.__errmsg('params number no match'))
+		self.param_step = []
+		for st in steps:
+			try:
+				val = eval(st)
+				self.param_step.append(val)
+			except:
+				raise Exception(self.__errmsg('non an exp: %s'%st))
+
+	def __errmsg(self, msg=''):
+		return '%d: %s' % (self.lineno, msg)
+
+def readlines(f):
+	lines = f.readlines()
+	buf = []
+	for line in lines:
+		if '\\n' in line:
+			tmp = line.split('\\n')
+			buf.extend(tmp)
+		else:
+			buf.append(line)
+	return buf
+
+def parselines(lines):
+	root = Block(0)
+	stack = [root]
+	lineno = 0
+	for line in lines:
+		lineno += 1
+		line = line.strip()
+		if line.startswith('$'):
+			block = Block(lineno)
+			block.checkfor(line)
+			stack[-1].append(block)
+			stack.append(block)
+		elif line.startswith('}'):
+			stack.pop()
+		elif line and not line.startswith('#'):
+			stack[-1].append(Block(lineno, line))
+	return root
+
+def writeblocks(outfile, blocks):
+	buf = []
+
+	def check_cond(op, cur, lim):
+		assert op in ['<', '>', '<=', '>=']
+		assert type(cur) == int
+		assert type(lim) == int
+		return eval('%d %s %d' % (cur, op, lim))
+
+	def do_writeblock(block, curs):
+		if block.text != None:
+			import re
+			p = r'\%(\d+)'
+			newline = block.text
+			params = set(re.findall(p, block.text))
+			for param in params:
+				index = int(param) - 1
+				if index >= len(curs):
+					raise Exception('%d: too many param(%%%d)'%(block.lineno, index+1))
+				newline = newline.replace('%%%d'%(index+1), str(curs[index]))
+			if newline and \
+					not newline.startswith('.') and \
+					not newline.endswith(':') and \
+					not newline.endswith(';'):
+				newline += ';'
+			buf.append(newline)
+		else:
+			for_curs = block.param_init
+			while check_cond(block.param_op, for_curs[0], block.param_limit):
+				for sblock in block.subblocks:
+					do_writeblock(sblock, for_curs)
+				for i in range(0, block.param_num):
+					for_curs[i] += block.param_step[i]
+
+	for block in blocks.subblocks:
+		do_writeblock(block, [])
+	outfile.write('\n'.join(buf))
+	outfile.write('\n')
+
+if __name__ == '__main__':
+	argc = len(sys.argv)
+	if argc == 1:
+		print >>sys.stderr, 'no input file'
+		sys.exit(0)
+
+	try:
+		infile = open(sys.argv[1], 'r')
+	except IOError:
+		print >>sys.stderr, 'can not open %s' % sys.argv[1]
+		sys.exit(1)
+
+	if argc == 2:
+		outfile = sys.stdout
+	else:
+		try:
+			outfile = open(sys.argv[2], 'w')
+		except IOError:
+			print >>sys.stderr, 'can not write to %s' % sys.argv[2]
+			sys.exit(1)
+
+	lines = readlines(infile)
+	try:
+		infile.close()
+	except IOError:
+		pass
+
+	blocks = parselines(lines)
+	writeblocks(outfile, blocks)
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/Makefile.am
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/Makefile.am
@@ -0,0 +1,4 @@
+SUBDIRS = ildb mc
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/Makefile.in
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/Makefile.in
@@ -0,0 +1,633 @@
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/shaders/h264
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/src/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+	ctags-recursive dvi-recursive html-recursive info-recursive \
+	install-data-recursive install-dvi-recursive \
+	install-exec-recursive install-html-recursive \
+	install-info-recursive install-pdf-recursive \
+	install-ps-recursive install-recursive installcheck-recursive \
+	installdirs-recursive pdf-recursive ps-recursive \
+	tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+  $(RECURSIVE_TARGETS) \
+  $(RECURSIVE_CLEAN_TARGETS) \
+  $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+	distdir
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DRM_CFLAGS = @DRM_CFLAGS@
+DRM_LIBS = @DRM_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_LIBS = @EGL_LIBS@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GEN4ASM = @GEN4ASM@
+GEN4ASM_CFLAGS = @GEN4ASM_CFLAGS@
+GEN4ASM_LIBS = @GEN4ASM_LIBS@
+GIT = @GIT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_DRIVER_LT_LDFLAGS = @INTEL_DRIVER_LT_LDFLAGS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBDRM_VERSION = @LIBDRM_VERSION@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBVA_DEPS_CFLAGS = @LIBVA_DEPS_CFLAGS@
+LIBVA_DEPS_LIBS = @LIBVA_DEPS_LIBS@
+LIBVA_DRIVERS_PATH = @LIBVA_DRIVERS_PATH@
+LIBVA_DRM_DEPS_CFLAGS = @LIBVA_DRM_DEPS_CFLAGS@
+LIBVA_DRM_DEPS_LIBS = @LIBVA_DRM_DEPS_LIBS@
+LIBVA_PACKAGE_VERSION = @LIBVA_PACKAGE_VERSION@
+LIBVA_WAYLAND_DEPS_CFLAGS = @LIBVA_WAYLAND_DEPS_CFLAGS@
+LIBVA_WAYLAND_DEPS_LIBS = @LIBVA_WAYLAND_DEPS_LIBS@
+LIBVA_X11_DEPS_CFLAGS = @LIBVA_X11_DEPS_CFLAGS@
+LIBVA_X11_DEPS_LIBS = @LIBVA_X11_DEPS_LIBS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PYTHON2 = @PYTHON2@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+wayland_protocoldir = @wayland_protocoldir@
+wayland_scanner = @wayland_scanner@
+SUBDIRS = ildb mc
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
+all: all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/shaders/h264/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu src/shaders/h264/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+#     (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+	@fail=; \
+	if $(am__make_keepgoing); then \
+	  failcom='fail=yes'; \
+	else \
+	  failcom='exit 1'; \
+	fi; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-recursive
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-recursive
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    $(am__make_dryrun) \
+	      || test -d "$(distdir)/$$subdir" \
+	      || $(MKDIR_P) "$(distdir)/$$subdir" \
+	      || exit 1; \
+	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+	    $(am__relativize); \
+	    new_distdir=$$reldir; \
+	    dir1=$$subdir; dir2="$(top_distdir)"; \
+	    $(am__relativize); \
+	    new_top_distdir=$$reldir; \
+	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+	    ($(am__cd) $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$new_top_distdir" \
+	        distdir="$$new_distdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+		am__skip_mode_fix=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+	-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(am__recursive_targets) install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
+	check-am clean clean-generic clean-libtool cscopelist-am ctags \
+	ctags-am distclean distclean-generic distclean-libtool \
+	distclean-tags distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	installdirs-am maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB.inc
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB.inc
@@ -0,0 +1,748 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#if !defined(__AVC_ILDB_HEADER__)	// Make sure this file is only included once
+#define __AVC_ILDB_HEADER__
+
+// Module name: AVC_ILDB.inc
+
+#undef ORIX
+#undef ORIY
+
+//========== Root thread input parameters ==================================================
+#define RootParam				r1		// :w
+#define	MBsCntX					r1.0	// :w, MB count per row
+#define	MBsCntY					r1.1	// :w, MB count per col
+//#define PicType					r1.2	// :w, Picture type
+#define	MaxThreads				r1.3	// :w, Max Thread limit
+#define EntrySignature			r1.4	// :w, Debug flag
+#define BitFields				r1.5	// :uw
+#define 	MbaffFlag			BIT0	// :w, mbaff flag, bit 0 in BitFields
+#define 	BotFieldFlag		BIT1	// :w, bottom field flag, bit 1 in BitFields
+#define 	CntlDataExpFlag		BIT2	// :w, Control Data Expansion Flag, bit 2 in BitFields
+#define RampConst				r1.12	// 8 :ub, Ramp constant, r1.12 - r1.19:ub
+#define StepToNextMB			r1.20	// :b, 2 bytes
+#define Minus2Minus1			r1.22	// :b, 2 bytes
+// next one starts at r1.11:w
+
+#define 	TopFieldFlag		0xFFFD	// :w, top field flag, used to set bit1 to 0.
+
+
+//========== Root Locals =============================================================
+
+// Variables in root kernel for launching child therad
+#define ChildParam				r2.0	// :w
+//Not used  #define	URBOffset				r2.3	// :w, Each row occupies 4 URB entries.  All children in the same row use the same set of URB entries
+#define	CurCol					r2.10	// :w, current col
+#define	CurColB					r2.20	// :b, current col
+#define	CurRow					r2.11	// :w, current row
+#define	CurRowB					r2.22	// :b, current row
+#define	LastCol					r2.12	// :w, last col
+#define	LastRow					r2.13	// :w, last row
+
+// Root local constants during spawning process
+#define	Col_Boundary			r3.0	// :w, 
+#define	Row_Boundary			r3.1	// :w, 
+//#define	TotalBlocks				r3.2	// :w, Total blocks in the frame 
+#define	URB_EntriesPerMB_2		r3.3	// :w, = URB entries per MB, but in differnt form
+#define	URBOffsetUVBase			r3.4	// :w, UV Base offset in URB
+
+#define	Temp1_D					r3.6	// :d:
+#define	Temp1_W					r3.12	// :w, Temp1
+#define	Temp1_B					r3.24	// :b, = Temp1_W
+#define	Temp2_W					r3.13	// :w, Temp2
+#define	Temp2_B					r3.26	// :b, = Temp2_W
+
+// Root local variables
+#define JumpTable				r4		// :d, jump table
+#define JUMPTABLE_BASE			4*32
+#define JumpAddr				a0.7
+
+#define TopRowForScan			r5.0 	// :w, track the top row for scan. All rows above this row is deblocked already. 
+
+
+// Child Thread R0 Header Field
+#define MRF0					m0		
+#define CT_R0Hdr				m1
+
+/*
+.declare GatewayAperture	Base=r50.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud	
+#define GatewayApertureB	1600	// r50 byte offset from r0.0
+
+// Chroma root thread updates luma root's ThreadLimit at r10.0:w via gateway
+#define ThreadLimit				r62.0	// :w, thread limit //r56.0
+#define THREAD_LIMIT_OFFSET		0x01800000	// Offset from r50 to r56 = 12*32 = 384 = 0x0180.  0x180 << 16 = 0x01800000
+	//#define THREAD_LIMIT_OFFSET		0x00C00000	// Offset from r50 to r56 = 6*32 = 192 = 0x00C0.  0xC0 << 16 = 0x00C00000
+*/
+
+// Gateway size is 16 GRF.  68 rows of MBs takes 9 GRFs (r6 - r14)
+// For CTG: Expended to support 1280 rows of pixel (80 rows of MBs).  It requires 10 GRFs (r6 - r15)
+.declare GatewayAperture	Base=r6.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud	
+#define GatewayApertureB	192	// r0.0 byte offset from r0.0
+
+// Chroma root thread updates luma root's ThreadLimit at r10.0:w via gateway
+#define ThreadLimit				r18.0	// :w, thread limit 
+#define THREAD_LIMIT_OFFSET		0x01800000	// Offset from r50 to r56 = 12*32 = 384 = 0x0180.  0x180 << 16 = 0x01800000
+#define	TotalBlocks				r18.1	// :w, Total blocks in the frame 
+
+// Root local variables
+#define	ChildThreadsID			r19.0	// :w, Child thread ID, unique to each child
+#define	OutstandingThreads		r20.0	// :w, Outstanding threads 
+#define ProcessedMBs			r20.1	// :w, # of MBs processed 
+
+#define	URBOffset				r21.0	// :w, Each row occupies 4 URB entries.  All children in the same row use the same set of URB entries
+
+//=================================================================================
+
+#define ScoreBd_Size			128 //96 // size of Status[] or ProcCol[]
+
+#define ScoreBd_Idx				2		
+//#define Saved_Col				0
+
+#define StatusAddr				a0.4	// :w, point to r50
+//=================================================================================
+
+
+// Gateway payload
+#define GatewayPayload			r48.0	// :ud
+#define GatewayPayloadKey		r48.8	// :uw
+#define DispatchID				r48.20	// :ub
+#define RegBase_GatewaySize 	r48.5	// :ud, used in open a gateway
+#define Offset_Length			r48.5	// :ud, used in forwardmsg back to root
+#define EUID_TID				r48.9	// :uw, used in forwardmsg back to root
+
+// Gateway response
+#define GatewayResponse 		r49.0	// :ud, one GRF
+
+#define URBWriteMsgDesc			a0.0	// Used in URB write, :ud
+#define URBWriteMsgDescLow		a0.0	// Used in URB write, :uw
+#define URBWriteMsgDescHigh		a0.1	// Used in URB write, :uw
+
+.declare WritebackResponse		Base=r50 ElementSize=4 SrcRegion=REGION(8,1) Type=ud	// 1 GRF for write backs
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+// IDesc Order					Offset
+//
+// 0) luma root					0 from luma root
+// 1) luma child				16 from luma root
+// 2) chroma root				32 from luma root
+// 3) chroma child				16 from chroma root
+
+// 4) luma field root			0 from luma field root
+// 5) luma field child			16 from luma field root
+// 6) chroma field root			32 from luma field root
+// 7) chroma field child		16 from chroma field root
+
+// 8) luma Mbaff root			0 from luma Mbaff root
+// 9) luma Mbaff child 			16 from luma Mbaff root
+// 10) chroma Mbaff root		32 from luma Mbaff root
+// 11) chroma Mbaff child		16 from chroma Mbaff root
+
+// IDesc offset within non-mbaff or mbaff mode
+#define CHROMA_ROOT_OFFSET 		32		// Offset from luma root to chroma root
+#define CHILD_OFFSET	  		16		// Offset from luma root to luma child, 
+										// and from chroma root to chroma child
+/////////////////////////////////////////////////////////////////////////////////////////////
+
+
+//========== End of Root Variables ======================================================
+
+
+//========== Child thread input parameters ==============================================
+//#define	MBsCntX					r1.0	// :w, MB count per row (same as root)
+//#define	MBsCntY					r1.1	// :w, MB count per col (same as root)
+//#define PicTypeC				r1.2	// :w, Picture type		same as root thread (same as root)
+#define	URBOffsetC				r1.3	// :w, 
+#define EntrySignatureC			r1.4	// :w, Debug field (same as root)
+//#define BitFields				r1.5	// :w (same as root)
+//#define 	MbaffFlag			BIT0	// :w, mbaff flag, bit 0 in BitFields
+//#define 	BotFieldFlag		BIT1	// :w, bottom field flag, bit 1 in BitFields
+//#define 	CntlDataExpFlag		BIT2	// :w, Control Data Expansion Flag, bit 2 in BitFields
+#define RampConstC				r1.12	// 8 :ub, Ramp constant, r1.12 - r1.19:ub.
+#define	ORIX					r1.10	// :w, carry over from root r1 in MB count
+#define	ORIY					r1.11	// :w, carry over from root r1 in MB count
+#define	LastColC				r1.12	// :w, last col
+#define	LastRowC				r1.13	// :w, last row
+
+.declare GatewayApertureC		Base=r1.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud	
+#define GatewayApertureCBase	32		// r1 byte offset from r0.0
+
+
+//========== Child Variables ============================================================
+
+// Mbaff Alpha, Beta, Tc0 vectors for an edge
+.declare Mbaff_ALPHA 	Base=r14.0  ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// r14
+.declare Mbaff_BETA 	Base=r15.0  ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// r15
+.declare Mbaff_TC0 		Base=r16.0  ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// r16
+.declare RRampW	   		Base=r17.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w		// r17
+
+.declare Mbaff_ALPHA2	Base=r45.0  ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// alpha2 = (alpha >> 2) + 2
+
+
+#define	ORIX_CUR				r46.0	// :w, current block origin X in bytes
+#define	ORIY_CUR				r46.1	// :w, current block origin Y in bytes
+#define	ORIX_LEFT				r46.2	// :w, left block origin X in bytes
+#define	ORIY_LEFT				r46.3	// :w, left block origin Y in bytes
+#define	ORIX_TOP				r46.4	// :w, top block origin X in bytes
+#define	ORIY_TOP				r46.5	// :w, top block origin Y in bytes
+//#define FilterSampleFlag		r46.6	// :uw,
+#define	CTemp0_W				r46.7	// :w, child Temp0
+
+#define alpha					r46.8	// :w, Scaler version for non Mbaff
+#define beta					r46.9	// :w, Scaler version for non Mbaff
+#define tc0						r46.20	// 4 :ub, r46.20 ~ r46.23, Scaler version for non Mbaff
+#define MaskA					r46.12	// :uw
+#define MaskB					r46.13	// :uw
+
+// Child control flags
+#define DualFieldMode			r47.0	// Cur MB is frame based, above MB is field based in mbaff mode
+										// :uw, 0 = not in dual field mode,  1 = in dual field mode, filter both top and bot fields
+#define	GateWayOffsetC			r47.1	// :w, Gateway offset for child writing into root space
+#define CntrlDataOffsetY		r47.1	// :ud, MB control data data offset
+#define alpha2					r47.4	// :uw, 	alpha2 = (alpha >> 2) + 2
+
+#define VertEdgePattern			r47.5	// :uw, 
+
+#define	CTemp1_W				r47.6	// :w, child Temp1
+#define	CTemp1_B				r47.12	// :b, = child Temp1_W
+#define	CTemp2_W				r47.7	// :w, child Temp2
+#define	CTemp2_B				r47.14	// :b, = child Temp2_W
+
+// Used in child
+#define ECM_AddrReg				a0.4	// Edge Control Map register
+#define P_AddrReg				a0.6	// point to P samples in left or top MB
+#define Q_AddrReg				a0.7	// point to Q samples in cur MB
+
+
+.declare 	RTempD	   Base=r26.0 ElementSize=4 SrcRegion=REGION(8,1) Type=d	// r26-27
+.declare 	RTempB	   Base=r26.0 ElementSize=1 SrcRegion=REGION(8,4) Type=ub	// r26-27
+.declare 	RTempW	   Base=r26.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w	// r26-27
+#define 	LEFT_TEMP_D		RTempD
+#define 	LEFT_TEMP_B		RTempB
+#define 	LEFT_TEMP_W		RTempW
+
+.declare 	TempRow0   Base=r26.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+.declare 	TempRow0B  Base=r26.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
+.declare 	TempRow1   Base=r27.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+.declare 	TempRow1B  Base=r27.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
+
+.declare 	CUR_TEMP_D	Base=r28.0 ElementSize=4 SrcRegion=REGION(8,1) Type=d	// 8 GRFs
+.declare 	CUR_TEMP_B	Base=r28.0 ElementSize=1 SrcRegion=REGION(8,4) Type=ub
+.declare 	CUR_TEMP_W	Base=r28.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+
+#define FilterSampleFlag		r28.0	// :uw,
+
+.declare 	A   		Base=r28.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w
+.declare 	BB   		Base=r29.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w
+
+.declare 	TempRow3   Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+.declare 	TempRow3B  Base=r30.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
+
+.declare 	tc0_exp		Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+.declare 	tc8			Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+
+.declare 	tc_exp		Base=r31.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+.declare	tx_exp_8	Base=r31.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+
+.declare	q0_p0		Base=r32.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+.declare	ABS_q0_p0	Base=r33.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+
+.declare	ap			Base=r34.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+.declare	aq			Base=r35.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+
+// These buffers have the src data for each edge to be beblocked.
+// They have modified pixels from previous edges.
+//
+//	Y:
+//	+----+----+----+----+----+----+----+----+
+//	| p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |
+//	+----+----+----+----+----+----+----+----+
+//
+//	p3 = r[P_AddrReg, 0]<16;16,1>  
+//	p2 = r[P_AddrReg, 16]<16;16,1> 
+//	p1 = r[P_AddrReg, 32]<16;16,1> 
+//	p0 = r[P_AddrReg, 48]<16;16,1> 
+// 	q0 = r[Q_AddrReg, 0]<16;16,1>  
+//	q1 = r[Q_AddrReg, 16]<16;16,1> 
+//	q2 = r[Q_AddrReg, 32]<16;16,1> 
+//	q3 = r[Q_AddrReg, 48]<16;16,1> 
+
+.declare	p0123_W		Base=r36.0  ElementSize=2 SrcRegion=REGION(16,1) Type=uw		// r36, r37
+.declare	q0123_W		Base=r38.0  ElementSize=2 SrcRegion=REGION(16,1) Type=uw		// r38, r39
+.declare	p3			Base=r36.0  ElementSize=1 SrcRegion=REGION(8,1) Type=ub
+.declare	p2			Base=r36.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
+.declare	p1			Base=r37.0  ElementSize=1 SrcRegion=REGION(8,1) Type=ub
+.declare	p0			Base=r37.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
+.declare	q0			Base=r38.0  ElementSize=1 SrcRegion=REGION(8,1) Type=ub
+.declare	q1			Base=r38.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
+.declare	q2			Base=r39.0  ElementSize=1 SrcRegion=REGION(8,1) Type=ub
+.declare	q3			Base=r39.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
+
+.declare 	TempRow2   Base=r38.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+
+// Temp space for mbaff dual field mode
+#define		ABOVE_CUR_MB_BASE	40*GRFWIB											// Byte offset to r40
+.declare    ABOVE_CUR_MB_YW  Base=r40 	ElementSize=2 SrcRegion=REGION(8,1) Type=uw
+.declare    ABOVE_CUR_MB_UW  Base=r40 	ElementSize=2 SrcRegion=REGION(8,1) Type=uw
+
+.declare 	P0_plus_P1   Base=r41.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+.declare 	Q0_plus_Q1   Base=r42.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+
+.declare 	P2_plus_P3   Base=r43.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+.declare 	Q2_plus_Q3   Base=r44.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// MB control data reference
+
+// Expanded control data is in r18 - r25
+.declare    CNTRL_DATA_D Base=r18 	ElementSize=4 SrcRegion=REGION(8,1) Type=ud		// For read, 8 GRFs
+#define		CNTRL_DATA_BASE	18*GRFWIB												// Base offset to r18
+
+//  Bit mask for extracting bits
+#define	MbaffFrameFlag					0x01
+#define	FieldModeCurrentMbFlag			0x02
+#define FieldModeLeftMbFlag				0x04
+#define FieldModeAboveMbFlag			0x08
+#define FilterInternal8x8EdgesFlag		0x10
+#define FilterInternal4x4EdgesFlag		0x20
+#define FilterLeftMbEdgeFlag			0x40
+#define FilterTopMbEdgeFlag				0x80
+
+#define	DISABLE_ILDB_FLAG				0x01
+
+//  Exact bit pattern for left and cur MB coding mode (frame vs. field)
+#define LEFT_FRAME_CUR_FRAME			0x00
+#define LEFT_FRAME_CUR_FIELD			0x02
+#define LEFT_FIELD_CUR_FRAME			0x04
+#define LEFT_FIELD_CUR_FIELD			0x06
+
+//  Exact bit pattern for above and cur MB coding mode (frame vs. field)
+#define ABOVE_FRAME_CUR_FRAME			0x00
+#define ABOVE_FRAME_CUR_FIELD			0x02
+#define ABOVE_FIELD_CUR_FRAME			0x08
+#define ABOVE_FIELD_CUR_FIELD			0x0A
+
+
+
+//========== MB control data field offset in byte ==========
+
+#if !defined(_APPLE)
+
+// GRF0 - GRF1 holds original control data
+
+// GRF0
+#define HorizOrigin						0
+#define VertOrigin						1
+#define BitFlags						2		// Bit flags
+
+#define bbSinternalLeftVert				4		// Internal left vertical bS, 2 bits per bS for 4 Y pixels and 2 U/V pixels 	
+#define bbSinternalMidVert				5		// Internal mid vertical bS
+#define bbSinternalRightVert			6		// Internal right vertical bS
+#define bbSinternalTopHorz				7		// Internal top horizontal bS
+
+#define	bbSinternalMidHorz				8		// Internal mid horizontal bS
+#define	bbSinternalBotHorz				9		// Internal bottom horizontal bS
+#define	wbSLeft0						10		// External left vertical bS (0), 4 bits per bS for 4 Y pixels and 2 U/V pixels, and byte 11
+
+#define	wbSLeft1						12		// External left vertical bS (1), and byte 13
+#define	wbSTop0							14		// External top horizontal bS (0), and byte 15
+
+#define	wbSTop1							16		// Externaltop horizontal bS (1), and byte 17
+#define	bIndexAinternal_Y				18		// Internal index A for Y
+#define	bIndexBinternal_Y				19		// Internal index B for Y
+
+#define	bIndexAleft0_Y					20		// Left index A for Y (0)
+#define	bIndexBleft0_Y					21		// Left index B for Y (0)
+#define	bIndexAleft1_Y					22		// Left index A for Y (1)
+#define	bIndexBleft1_Y					23		// Left index B for Y (1)
+
+#define	bIndexAtop0_Y					24		// Top index A for Y (0)
+#define	bIndexBtop0_Y					25		// Top index B for Y (0)
+#define	bIndexAtop1_Y					26		// Top index A for Y (1)
+#define	bIndexBtop1_Y					27		// Top index B for Y (1)
+
+#define	bIndexAinternal_Cb				28		// Internal index A for Cb
+#define	bIndexBinternal_Cb				29		// Internal index B for Cb
+#define	bIndexAleft0_Cb					30		// Left index A for Cb (0)
+#define	bIndexBleft0_Cb					31		// Left index B for Cb (0)
+
+// GRF1
+#define	bIndexAleft1_Cb					32		// Left index A for Cb (1)
+#define	bIndexBleft1_Cb					33		// Left index B for Cb (1)
+#define	bIndexAtop0_Cb					34		// Top index A for Cb (0)
+#define	bIndexBtop0_Cb					35		// Top index B for Cb (0)
+
+#define	bIndexAtop1_Cb					36		// Top index A for Cb (1)
+#define	bIndexBtop1_Cb					37		// Top index B for Cb (1)
+#define	bIndexAinternal_Cr				38		// Internal index A for Cr
+#define	bIndexBinternal_Cr				39		// Internal index B for Cr
+
+#define	bIndexAleft0_Cr					40		// Left index A for Cr (0)
+#define bIndexBleft0_Cr					41		// Left index B for Cr (0)
+#define	bIndexAleft1_Cr					42		// Left index A for Cr (1)
+#define	bIndexBleft1_Cr					43		// Left index B for Cr (1)
+
+#define	bIndexAtop0_Cr					44		// Top index A for Cr (0)
+#define	bIndexBtop0_Cr					45		// Top index B for Cr (0)
+#define	bIndexAtop1_Cr					46		// Top index A for Cr (1)
+#define	bIndexBtop1_Cr					47		// Top index B for Cr (1)
+
+#define	ExtBitFlags						48		// Extended bit flags, such as disable ILDB bits
+
+// Offset 49 - 63 not used
+
+
+//===== GRF2 - GRF7 hold expanded control data =====
+
+// GRF2
+#define	wEdgeCntlMap_IntLeftVert		64		// Derived from bbSinternalLeftVert, 1 bit per pixel 
+#define	wEdgeCntlMap_IntMidVert			66		// Derived from bbSinternalLeftVert
+		
+#define	wEdgeCntlMap_IntRightVert		68		// Derived from bbSinternalRightVert
+#define	wEdgeCntlMap_IntTopHorz			70		// Derived from bbSinternalTopHorz, 1bit per pixel 
+		
+#define	wEdgeCntlMap_IntMidHorz			72		// Derived from bbSinternalMidHorz
+#define	wEdgeCntlMap_IntBotHorz			74		// Derived from bbSinternalBotHorz
+
+// Offset 76 - 79 not used
+
+#define	wEdgeCntlMapA_ExtLeftVert0		80		// Derived from wbSLeft0, 1bit per pixel
+#define	wEdgeCntlMapB_ExtLeftVert0		82		// Derived from wbSLeft0
+
+#define	wEdgeCntlMapA_ExtTopHorz0		84		// Derived from wbSTop0, 1bit per pixel
+#define	wEdgeCntlMapB_ExtTopHorz0		86		// Derived from wbSTop0
+
+#define	wEdgeCntlMapA_ExtLeftVert1		88		// Derived from wbSLeft1, 1bit per pixel
+#define	wEdgeCntlMapB_ExtLeftVert1		90		// Derived from wbSLeft1
+
+#define	wEdgeCntlMapA_ExtTopHorz1		92		// Derived from wbSTop1, 1bit per pixel
+#define	wEdgeCntlMapB_ExtTopHorz1		94		// Derived from wbSTop1
+
+
+// GRF3
+#define	bTc0_v00_0_Y					96		// Derived from bSv00_0 and bIndexAleft0_Y, 4 pixels per tc0
+#define	bTc0_v10_0_Y					97		// Derived from bSv10_0 and bIndexAleft0_Y
+#define	bTc0_v20_0_Y					98		// Derived from bSv20_0 and bIndexAleft0_Y
+#define	bTc0_v30_0_Y					99		// Derived from bSv30_0 and bIndexAleft0_Y
+
+#define	bTc0_v01_Y						100		// Derived from bSv01 and bIndexAinternal_Y
+#define	bTc0_v11_Y						101		// Derived from bSv11 and bIndexAinternal_Y
+#define	bTc0_v21_Y						102		// Derived from bSv21 and bIndexAinternal_Y
+#define	bTc0_v31_Y						103		// Derived from bSv31 and bIndexAinternal_Y
+
+#define	bTc0_v02_Y						104		// Derived from bSv02 and bIndexAinternal_Y
+#define	bTc0_v12_Y						105		// Derived from bSv12 and bIndexAinternal_Y
+#define	bTc0_v22_Y						106		// Derived from bSv22 and bIndexAinternal_Y
+#define	bTc0_v32_Y						107		// Derived from bSv32 and bIndexAinternal_Y
+
+#define	bTc0_v03_Y						108		// Derived from bSv03 and bIndexAinternal_Y
+#define	bTc0_v13_Y						109		// Derived from bSv13 and bIndexAinternal_Y
+#define	bTc0_v23_Y						110		// Derived from bSv23 and bIndexAinternal_Y
+#define	bTc0_v33_Y						111		// Derived from bSv33 and bIndexAinternal_Y
+
+#define	bTc0_h00_0_Y					112		// Derived from bSh00_0 and bIndexAleft0_Y
+#define	bTc0_h01_0_Y					113		// Derived from bSh01_0 and bIndexAleft0_Y
+#define	bTc0_h02_0_Y					114		// Derived from bSh02_0 and bIndexAleft0_Y
+#define	bTc0_h03_0_Y					115		// Derived from bSh03_0 and bIndexAleft0_Y
+
+#define	bTc0_h10_Y						116		// Derived from bSh10 and bIndexAinternal_Y
+#define	bTc0_h11_Y						117		// Derived from bSh11 and bIndexAinternal_Y
+#define	bTc0_h12_Y						118		// Derived from bSh12 and bIndexAinternal_Y
+#define	bTc0_h13_Y						119		// Derived from bSh13 and bIndexAinternal_Y
+		
+#define	bTc0_h20_Y						120		// Derived from bSh20 and bIndexAinternal_Y
+#define	bTc0_h21_Y						121		// Derived from bSh21 and bIndexAinternal_Y
+#define	bTc0_h22_Y						122		// Derived from bSh22 and bIndexAinternal_Y
+#define	bTc0_h23_Y						123		// Derived from bSh23 and bIndexAinternal_Y
+
+#define	bTc0_h30_Y						124		// Derived from bSh30 and bIndexAinternal_Y
+#define	bTc0_h31_Y						125		// Derived from bSh31 and bIndexAinternal_Y
+#define	bTc0_h32_Y						126		// Derived from bSh32 and bIndexAinternal_Y
+#define	bTc0_h33_Y						127		// Derived from bSh33 and bIndexAinternal_Y
+
+// GRF4
+#define	bAlphaLeft0_Y					128		// Derived from bIndexAleft0_Y	
+#define	bBetaLeft0_Y					129		// Derived from bIndexBleft0_Y	
+#define	bAlphaTop0_Y					130		// Derived from bIndexAtop0_Y	
+#define	bBetaTop0_Y						131		// Derived from bIndexBtop0_Y	
+
+#define	bAlphaInternal_Y				132		// Derived from bIndexAinternal_Y	
+#define	bBetaInternal_Y					133		// Derived from bIndexBinternal_Y	
+// Offset 134 - 135 not used
+
+// Offset 136 - 143 not used
+#define	bAlphaLeft1_Y					144		// Derived from bIndexAleft1_Y	Used in Mbaff mode only
+#define	bBetaLeft1_Y					145		// Derived from bIndexBleft1_Y	Used in Mbaff mode only
+#define	bAlphaTop1_Y					146		// Derived from bIndexAtop1_Y	Used in Mbaff mode only
+#define	bBetaTop1_Y						147		// Derived from bIndexBtop1_Y	Used in Mbaff mode only
+
+// Offset 148 - 151 not used
+#define	bTc0_v00_1_Y					152		// Derived from bSv00_1 and bIndexAleft1_Y	Used in Mbaff mode only
+#define	bTc0_v10_1_Y					153		// Derived from bSv10_1 and bIndexAleft1_Y	Used in Mbaff mode only
+#define	bTc0_v20_1_Y					154		// Derived from bSv20_1 and bIndexAleft1_Y	Used in Mbaff mode only
+#define	bTc0_v30_1_Y					155		// Derived from bSv30_1 and bIndexAleft1_Y	Used in Mbaff mode only
+
+#define	bTc0_h00_1_Y					156		// Derived from bSh00_1 and bIndexAleft1_Y	Used in Mbaff mode only
+#define	bTc0_h01_1_Y					157		// Derived from bSh01_1 and bIndexAleft1_Y	Used in Mbaff mode only
+#define	bTc0_h02_1_Y					158		// Derived from bSh02_1 and bIndexAleft1_Y	Used in Mbaff mode only
+#define	bTc0_h03_1_Y					159		// Derived from bSh03_1 and bIndexAleft1_Y	Used in Mbaff mode only
+
+
+// GRF5
+#define	bTc0_v00_0_Cb					160		// Derived from bSv00_0 and bIndexAleft0_Cb, 2 pixels per tc0	Left0
+#define	bTc0_v10_0_Cb					161		// Derived from bSv10_0 and bIndexAleft0_Cb	
+#define	bTc0_v20_0_Cb					162		// Derived from bSv20_0 and bIndexAleft0_Cb	
+#define	bTc0_v30_0_Cb					163		// Derived from bSv30_0 and bIndexAleft0_Cb	
+
+#define	bTc0_v02_Cb						164		// Derived from bSv02 and bIndexAinternal_Cb	MidVert
+#define	bTc0_v12_Cb						165		// Derived from bSv12 and bIndexAinternal_Cb	
+#define	bTc0_v22_Cb						166		// Derived from bSv22 and bIndexAinternal_Cb	
+#define	bTc0_v32_Cb						167		// Derived from bSv32 and bIndexAinternal_Cb	
+
+#define	bTc0_h00_0_Cb					168		// Derived from bSh00_0 and bIndexAleft0_Cb	Top0
+#define	bTc0_h01_0_Cb					169		// Derived from bSh01_0 and bIndexAleft0_Cb	
+#define	bTc0_h02_0_Cb					170		// Derived from bSh02_0 and bIndexAleft0_Cb	
+#define	bTc0_h03_0_Cb					171		// Derived from bSh03_0 and bIndexAleft0_Cb	
+
+#define	bTc0_h20_Cb						172		// Derived from bSh20 and bIndexAinternal_Cb	MidHorz
+#define	bTc0_h21_Cb						173		// Derived from bSh21 and bIndexAinternal_Cb	
+#define	bTc0_h22_Cb						174		// Derived from bSh22 and bIndexAinternal_Cb	
+#define	bTc0_h23_Cb						175		// Derived from bSh23 and bIndexAinternal_Cb	
+
+#define	bTc0_v00_0_Cr					176		// Derived from bSv00_0 and bIndexAleft0_Cr, 2 pixels per tc0	Left0
+#define	bTc0_v10_0_Cr					177		// Derived from bSv10_0 and bIndexAleft0_Cr	
+#define	bTc0_v20_0_Cr					178		// Derived from bSv20_0 and bIndexAleft0_Cr	
+#define	bTc0_v30_0_Cr					179		// Derived from bSv30_0 and bIndexAleft0_Cr	
+
+#define	bTc0_v02_Cr						180		// Derived from bSv02 and bIndexAinternal_Cr	Mid Vert
+#define	bTc0_v12_Cr						181		// Derived from bSv12 and bIndexAinternal_Cr	
+#define	bTc0_v22_Cr						182		// Derived from bSv22 and bIndexAinternal_Cr	
+#define	bTc0_v32_Cr						183		// Derived from bSv32 and bIndexAinternal_Cr	
+
+#define	bTc0_h00_0_Cr					184		// Derived from bSh00_0 and bIndexAleft0_Cr, 2 pixels per tc0	Top0
+#define	bTc0_h01_0_Cr					185		// Derived from bSh01_0 and bIndexAleft0_Cr	
+#define	bTc0_h02_0_Cr					186		// Derived from bSh02_0 and bIndexAleft0_Cr	
+#define	bTc0_h03_0_Cr					187		// Derived from bSh03_0 and bIndexAleft0_Cr	
+
+#define	bTc0_h20_Cr						188		// Derived from bSh20 and bIndexAinternal_Cr	Mid  Horz
+#define	bTc0_h21_Cr						189		// Derived from bSh21 and bIndexAinternal_Cr	
+#define	bTc0_h22_Cr						190		// Derived from bSh22 and bIndexAinternal_Cr	
+#define	bTc0_h23_Cr						191		// Derived from bSh23 and bIndexAinternal_Cr	
+
+// GRF6
+#define	bAlphaLeft0_Cb					192		// Derived from bIndexAleft0_Cb
+#define	bBetaLeft0_Cb					193		// Derived from bIndexBleft0_Cb
+#define	bAlphaTop0_Cb					194		// Derived from bIndexAtop0_Cb
+#define	bBetaTop0_Cb					195		// Derived from bIndexBtop0_Cb
+
+#define	bAlphaInternal_Cb				196		// Derived from bIndexAinternal_Cb
+#define	bBetaInternal_Cb				197		// Derived from bIndexBinternal_Cb
+// Offset 198 - 199 not used		
+
+#define	bAlphaLeft0_Cr					200		// Derived from bIndexAleft0_Cr
+#define	bBetaLeft0_Cr					201		// Derived from bIndexBleft0_Cr
+#define	bAlphaTop0_Cr					202		// Derived from bIndexAtop0_Cr
+#define	bBetaTop0_Cr					203		// Derived from bIndexBtop0_Cr
+
+#define	bAlphaInternal_Cr				204		// Derived from bIndexAinternal_Cr
+#define	bBetaInternal_Cr				205		// Derived from bIndexBinternal_Cr
+// Offset 206 - 223 not used		
+
+// GRF7
+#define	bAlphaLeft1_Cb					224		// Derived from bIndexAleft1_Cb	Used in Mbaff mode only
+#define	bBetaLeft1_Cb					225		// Derived from bIndexBleft1_Cb	Used in Mbaff mode only
+#define	bAlphaTop1_Cb					226		// Derived from bIndexAtop1_Cb	Used in Mbaff mode only
+#define	bBetaTop1_Cb					227		// Derived from bIndexBtop1_Cb	Used in Mbaff mode only
+
+// Offset 228 - 231 not used
+
+#define	bTc0_v00_1_Cb					232		// Derived from bSv00_1 and bIndexAleft1_Cb	Used in Mbaff mode only
+#define	bTc0_v10_1_Cb					233		// Derived from bSv10_1 and bIndexAleft1_Cb	Used in Mbaff mode only
+#define	bTc0_v20_1_Cb					234		// Derived from bSv20_1 and bIndexAleft1_Cb	Used in Mbaff mode only
+#define	bTc0_v30_1_Cb					235		// Derived from bSv30_1 and bIndexAleft1_Cb	Used in Mbaff mode only
+
+#define	bTc0_h00_1_Cb					236		// Derived from bSh00_1 and bIndexAleft1_Cb	Used in Mbaff mode only
+#define	bTc0_h01_1_Cb					237		// 	Derived from bSh01_1 and bIndexAleft1_Cb	Used in Mbaff mode only
+#define	bTc0_h02_1_Cb					238		// Derived from bSh02_1 and bIndexAleft1_Cb	Used in Mbaff mode only
+#define	bTc0_h03_1_Cb					239		// Derived from bSh03_1 and bIndexAleft1_Cb	Used in Mbaff mode only
+
+#define	bAlphaLeft1_Cr					240		// Derived from bIndexAleft1_Cr	Used in Mbaff mode only
+#define	bBetaLeft1_Cr					241		// Derived from bIndexBleft1_Cr	Used in Mbaff mode only
+#define	bAlphaTop1_Cr					242		// Derived from bIndexAtop1_Cr	Used in Mbaff mode only
+#define	bBetaTop1_Cr					243		// Derived from bIndexBtop1_Cr	Used in Mbaff mode only
+
+// Offset 244 - 247 not used		
+
+#define	bTc0_v00_1_Cr					248		// Derived from bSv00_1 and bIndexAleft1_Cr	Used in Mbaff mode only
+#define	bTc0_v10_1_Cr					249		// Derived from bSv10_1 and bIndexAleft1_Cr	Used in Mbaff mode only
+#define	bTc0_v20_1_Cr					250		// Derived from bSv20_1 and bIndexAleft1_Cr	Used in Mbaff mode only
+#define	bTc0_v30_1_Cr					251		// Derived from bSv30_1 and bIndexAleft1_Cr	Used in Mbaff mode only
+
+#define	bTc0_h00_1_Cr					252		// Derived from bSh00_1 and bIndexAleft1_Cr	Used in Mbaff mode only
+#define	bTc0_h01_1_Cr					253		// Derived from bSh01_1 and bIndexAleft1_Cr	Used in Mbaff mode only
+#define	bTc0_h02_1_Cr					254		// Derived from bSh02_1 and bIndexAleft1_Cr	Used in Mbaff mode only
+#define	bTc0_h03_1_Cr					255		// Derived from bSh03_1 and bIndexAleft1_Cr	Used in Mbaff mode only
+
+
+#else		// _APPLE is defined
+
+//******** Crestline for Apple, progressive only, 88 bytes **********
+
+// GRF0
+#define HorizOrigin						0
+#define VertOrigin						1
+#define BitFlags						2		// Bit flags
+
+#define	wEdgeCntlMap_IntLeftVert		4		// Derived from bbSinternalLeftVert, 1 bit per pixel 
+#define	wEdgeCntlMap_IntMidVert			6		// Derived from bbSinternalLeftVert
+#define	wEdgeCntlMap_IntRightVert		8		// Derived from bbSinternalRightVert
+#define	wEdgeCntlMap_IntTopHorz			10		// Derived from bbSinternalTopHorz, 1bit per pixel 
+#define	wEdgeCntlMap_IntMidHorz			12		// Derived from bbSinternalMidHorz
+#define	wEdgeCntlMap_IntBotHorz			14		// Derived from bbSinternalBotHorz
+#define	wEdgeCntlMapA_ExtLeftVert0		16		// Derived from wbSLeft0, 1bit per pixel
+#define	wEdgeCntlMapB_ExtLeftVert0		18		// Derived from wbSLeft0
+#define	wEdgeCntlMapA_ExtTopHorz0		20		// Derived from wbSTop0, 1bit per pixel
+#define	wEdgeCntlMapB_ExtTopHorz0		22		// Derived from wbSTop0
+
+#define	bAlphaLeft0_Y					24		// Derived from bIndexAleft0_Y	
+#define	bBetaLeft0_Y					25		// Derived from bIndexBleft0_Y	
+#define	bAlphaTop0_Y					26		// Derived from bIndexAtop0_Y	
+#define	bBetaTop0_Y						27		// Derived from bIndexBtop0_Y	
+#define	bAlphaInternal_Y				28		// Derived from bIndexAinternal_Y	
+#define	bBetaInternal_Y					29		// Derived from bIndexBinternal_Y	
+
+// GRF1
+#define	bTc0_v00_0_Y					32		// Derived from bSv00_0 and bIndexAleft0_Y, 4 pixels per tc0
+#define	bTc0_v10_0_Y					33		// Derived from bSv10_0 and bIndexAleft0_Y
+#define	bTc0_v20_0_Y					34		// Derived from bSv20_0 and bIndexAleft0_Y
+#define	bTc0_v30_0_Y					35		// Derived from bSv30_0 and bIndexAleft0_Y
+#define	bTc0_v01_Y						36		// Derived from bSv01 and bIndexAinternal_Y
+#define	bTc0_v11_Y						37		// Derived from bSv11 and bIndexAinternal_Y
+#define	bTc0_v21_Y						38		// Derived from bSv21 and bIndexAinternal_Y
+#define	bTc0_v31_Y						39		// Derived from bSv31 and bIndexAinternal_Y
+#define	bTc0_v02_Y						40		// Derived from bSv02 and bIndexAinternal_Y
+#define	bTc0_v12_Y						41		// Derived from bSv12 and bIndexAinternal_Y
+#define	bTc0_v22_Y						42		// Derived from bSv22 and bIndexAinternal_Y
+#define	bTc0_v32_Y						43		// Derived from bSv32 and bIndexAinternal_Y
+#define	bTc0_v03_Y						44		// Derived from bSv03 and bIndexAinternal_Y
+#define	bTc0_v13_Y						45		// Derived from bSv13 and bIndexAinternal_Y
+#define	bTc0_v23_Y						46		// Derived from bSv23 and bIndexAinternal_Y
+#define	bTc0_v33_Y						47		// Derived from bSv33 and bIndexAinternal_Y
+
+#define	bTc0_h00_0_Y					48		// Derived from bSh00_0 and bIndexAleft0_Y
+#define	bTc0_h01_0_Y					49		// Derived from bSh01_0 and bIndexAleft0_Y
+#define	bTc0_h02_0_Y					50		// Derived from bSh02_0 and bIndexAleft0_Y
+#define	bTc0_h03_0_Y					51		// Derived from bSh03_0 and bIndexAleft0_Y
+#define	bTc0_h10_Y						52		// Derived from bSh10 and bIndexAinternal_Y
+#define	bTc0_h11_Y						53		// Derived from bSh11 and bIndexAinternal_Y
+#define	bTc0_h12_Y						54		// Derived from bSh12 and bIndexAinternal_Y
+#define	bTc0_h13_Y						55		// Derived from bSh13 and bIndexAinternal_Y
+#define	bTc0_h20_Y						56		// Derived from bSh20 and bIndexAinternal_Y
+#define	bTc0_h21_Y						57		// Derived from bSh21 and bIndexAinternal_Y
+#define	bTc0_h22_Y						58		// Derived from bSh22 and bIndexAinternal_Y
+#define	bTc0_h23_Y						59		// Derived from bSh23 and bIndexAinternal_Y
+#define	bTc0_h30_Y						60		// Derived from bSh30 and bIndexAinternal_Y
+#define	bTc0_h31_Y						61		// Derived from bSh31 and bIndexAinternal_Y
+#define	bTc0_h32_Y						62		// Derived from bSh32 and bIndexAinternal_Y
+#define	bTc0_h33_Y						63		// Derived from bSh33 and bIndexAinternal_Y
+
+// GRF2, 
+#define	bTc0_v00_0_Cb					64		// Derived from bSv00_0 and bIndexAleft0_Cb, 2 pixels per tc0	Left0
+#define	bTc0_v10_0_Cb					65		// Derived from bSv10_0 and bIndexAleft0_Cb	
+#define	bTc0_v20_0_Cb					66		// Derived from bSv20_0 and bIndexAleft0_Cb	
+#define	bTc0_v30_0_Cb					67		// Derived from bSv30_0 and bIndexAleft0_Cb	
+#define	bTc0_v02_Cb						68		// Derived from bSv02 and bIndexAinternal_Cb	MidVert
+#define	bTc0_v12_Cb						69		// Derived from bSv12 and bIndexAinternal_Cb	
+#define	bTc0_v22_Cb						70		// Derived from bSv22 and bIndexAinternal_Cb	
+#define	bTc0_v32_Cb						71		// Derived from bSv32 and bIndexAinternal_Cb	
+#define	bTc0_h00_0_Cb					72		// Derived from bSh00_0 and bIndexAleft0_Cb	Top0
+#define	bTc0_h01_0_Cb					73		// Derived from bSh01_0 and bIndexAleft0_Cb	
+#define	bTc0_h02_0_Cb					74		// Derived from bSh02_0 and bIndexAleft0_Cb	
+#define	bTc0_h03_0_Cb					75		// Derived from bSh03_0 and bIndexAleft0_Cb	
+#define	bTc0_h20_Cb						76		// Derived from bSh20 and bIndexAinternal_Cb	MidHorz
+#define	bTc0_h21_Cb						77		// Derived from bSh21 and bIndexAinternal_Cb	
+#define	bTc0_h22_Cb						78		// Derived from bSh22 and bIndexAinternal_Cb	
+#define	bTc0_h23_Cb						79		// Derived from bSh23 and bIndexAinternal_Cb	
+
+#define	bAlphaLeft0_Cb					80		// Derived from bIndexAleft0_Cb
+#define	bBetaLeft0_Cb					81		// Derived from bIndexBleft0_Cb
+#define	bAlphaTop0_Cb					82		// Derived from bIndexAtop0_Cb
+#define	bBetaTop0_Cb					83		// Derived from bIndexBtop0_Cb
+#define	bAlphaInternal_Cb				84		// Derived from bIndexAinternal_Cb
+#define	bBetaInternal_Cb				85		// Derived from bIndexBinternal_Cb
+
+#define	ExtBitFlags						86		// Extended bit flags, such as disable ILDB bits
+
+// Shared between Cb and Cr
+#define	bTc0_v00_0_Cr			bTc0_v00_0_Cb	
+#define	bTc0_v10_0_Cr		    bTc0_v10_0_Cb	
+#define	bTc0_v20_0_Cr		    bTc0_v20_0_Cb	
+#define	bTc0_v30_0_Cr		    bTc0_v30_0_Cb	
+#define	bTc0_v02_Cr			    bTc0_v02_Cb		
+#define	bTc0_v12_Cr			    bTc0_v12_Cb		
+#define	bTc0_v22_Cr			    bTc0_v22_Cb		
+#define	bTc0_v32_Cr			    bTc0_v32_Cb		
+#define	bTc0_h00_0_Cr		    bTc0_h00_0_Cb	
+#define	bTc0_h01_0_Cr		    bTc0_h01_0_Cb	
+#define	bTc0_h02_0_Cr		    bTc0_h02_0_Cb	
+#define	bTc0_h03_0_Cr		    bTc0_h03_0_Cb	
+#define	bTc0_h20_Cr			    bTc0_h20_Cb		
+#define	bTc0_h21_Cr			    bTc0_h21_Cb		
+#define	bTc0_h22_Cr			    bTc0_h22_Cb		
+#define	bTc0_h23_Cr			    bTc0_h23_Cb		
+                                
+#define	bAlphaLeft0_Cr		    bAlphaLeft0_Cb		   
+#define	bBetaLeft0_Cr		    bBetaLeft0_Cb		   
+#define	bAlphaTop0_Cr		    bAlphaTop0_Cb		   
+#define	bBetaTop0_Cr		    bBetaTop0_Cb		   
+#define	bAlphaInternal_Cr	    bAlphaInternal_Cb	   
+#define	bBetaInternal_Cr	    bBetaInternal_Cb	   
+
+
+#endif
+
+
+//========== End of Child Variables ===============================================================
+
+#if !defined(COMBINED_KERNEL)
+#define ILDB_LABEL(x)	x		// No symbol extension for standalone kernels
+#endif
+
+#endif	// !defined(__AVC_ILDB_HEADER__)
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Field_UV.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Field_UV.asm
@@ -0,0 +1,39 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#include "AVC_ILDB_Child_UV.asm"
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Field_Y.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Field_Y.asm
@@ -0,0 +1,39 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#include "AVC_ILDB_Child_Y.asm"
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_UV.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_UV.asm
@@ -0,0 +1,203 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)
+//
+// First de-block vertical edges from left to right.
+// Second de-block horizontal edge from top to bottom.
+// 
+// For 4:2:0, chroma is always de-blocked at 8x8.
+// NV12 format allows to filter U and V together.
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#define AVC_ILDB
+
+.kernel AVC_ILDB_CHILD_MBAFF_UV
+#if defined(COMBINED_KERNEL)
+ILDB_LABEL(AVC_ILDB_CHILD_UV):
+#endif
+
+#include "SetupVPKernel.asm"
+#include "AVC_ILDB.inc"
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xE997:w
+#endif
+
+	// Setup temp buf used by load and save code
+	#define BUF_B		RTempB
+	#define BUF_W		RTempW
+	#define BUF_D		RTempD
+
+	// Init local variables
+	mul (4)		ORIX_CUR<2>:w		ORIX<0;1,0>:w		16:w	{ NoDDClr }		// Expand X addr to bytes, repeat 4 times
+	mul (4)		ORIY_CUR<2>:w		ORIY<0;1,0>:w		32:w	{ NoDDChk }		// Expand Y addr to bytes, repeat 4 times
+
+	mov (2)		f0.0<1>:w		0:w
+
+	mov	(1)		GateWayOffsetC:uw	ORIY:uw						// Use row # as Gateway offset
+
+	//=== Null Kernel ===============================================================
+//	jmpi ILDB_LABEL(POST_ILDB_UV)
+	//===============================================================================
+
+	//====================================================================================
+	// Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.
+	// Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16
+	// Each MB has 256 bytes of control data
+
+	// For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.
+	// MB_offset = MBsCntX * CurRow + CurCol
+	// Byte_offset = MB_offset * (256 << Mbaff_flag),	Mbaff_flag = 0 or 1.
+	// Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes
+	// where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)
+	// MBCntrlDataOffsetY holds y'.
+
+	// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes.  Need to use a special read command on BL-C.
+	// MB_offset = MBsCntX * CurRow + CurCol
+	// Byte_offset = MB_offset * (64 << Mbaff_flag),	Mbaff_flag = 0 or 1.
+	// MBCntrlDataOffsetY holds globel byte offset.
+	
+#if !defined(DEV_CL)
+	mul (1) CntrlDataOffsetY:ud		MBsCntX:w 				ORIY:w
+	add (1) CntrlDataOffsetY:ud		CntrlDataOffsetY:ud		ORIX:w
+	mul (1) CntrlDataOffsetY:ud		CntrlDataOffsetY:ud		128:uw
+#endif
+	//====================================================================================
+
+	add (1)		ORIX_LEFT:w			ORIX_LEFT:w			-4:w
+	add (1)		ORIY_TOP:w			ORIY_TOP:w			-4:w
+
+	//=========== Process Top MB ============
+    and (1)  	BitFields:w  		BitFields:w		TopFieldFlag:w		// Reset BotFieldFlag
+
+	// Build a ramp from 0 to 15
+	mov	(16)	RRampW(0)<1>		RampConstC<0;8,1>:ub
+	add (8)		RRampW(0,8)<1>		RRampW(0,8)			8:w				// RRampW = ramp 15-0
+
+ILDB_LABEL(RE_ENTRY_UV):	// for bootom field
+
+	// Load current MB control data
+#if defined(DEV_CL)
+	#include "Load_ILDB_Cntrl_Data_64DW.asm"	// Crestline
+#else
+	#include "Load_ILDB_Cntrl_Data_16DW.asm"	// Cantiga and beyond
+#endif
+
+	// Init addr register for vertical control data
+	mov (1)		ECM_AddrReg<1>:w		CNTRL_DATA_BASE:w		// Init ECM_AddrReg
+
+	// Use free cycles here
+	// Check loaded control data
+	and.z.f0.1  (16) null<1>:uw	r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw	0xFFFF:uw		// Skip ILDB?		
+	and.nz.f0.0  (1) null:w		r[ECM_AddrReg, ExtBitFlags]:ub		DISABLE_ILDB_FLAG:w		// Skip ILDB?
+
+	// Set DualFieldMode for all data read, write and deblocking
+	and	(1)	CTemp1_W:uw		r[ECM_AddrReg, BitFlags]:ub		FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
+
+	// Get Vert Edge Pattern (frame vs. field MBs)
+	and	(1)	VertEdgePattern:uw		r[ECM_AddrReg, BitFlags]:ub		FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
+
+	(f0.1.all16h)	jmpi 	ILDB_LABEL(SKIP_ILDB_UV)						// Skip ILDB
+	(f0.0)			jmpi 	ILDB_LABEL(SKIP_ILDB_UV)						// Skip ILDB
+
+	// Set DualFieldMode for all data read, write and deblocking
+//	and	(1)	CTemp1_W:uw		r[ECM_AddrReg, BitFlags]:ub		FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
+	cmp.z.f0.0	(1)	null:w	CTemp1_W:uw		ABOVE_FIELD_CUR_FRAME:w
+	and (1)		DualFieldMode:w		f0.0:w		0x0001:w
+
+	#include "load_Cur_UV_8x8T_Mbaff.asm"		// Load transposed data 8x8
+	#include "load_Left_UV_2x8T_Mbaff.asm"				// Load left MB (2x8) UV data from memory if exists
+
+	#include "Transpose_Cur_UV_8x8.asm"
+	#include "Transpose_Left_UV_2x8.asm"
+	
+
+	//---------- Perform vertical ILDB filting on UV ----------
+	#include "AVC_ILDB_Filter_Mbaff_UV_v.asm"	
+	//---------------------------------------------------------
+
+	#include "save_Left_UV_8x2T_Mbaff.asm"				// Write left MB (2x8) Y data to memory if exists
+	#include "load_Top_UV_8x2_Mbaff.asm"				// Load top MB (8x2) Y data from memory if exists
+
+	#include "Transpose_Cur_UV_8x8.asm"					// Transpose a MB for horizontal edge de-blocking 
+
+	//---------- Perform horizontal ILDB filting on UV ----------
+	#include "AVC_ILDB_Filter_Mbaff_UV_h.asm"	
+	//-----------------------------------------------------------
+
+	#include "save_Cur_UV_8x8_Mbaff.asm"				// Write 8x8
+	#include "save_Top_UV_8x2_Mbaff.asm"				// Write top MB (8x2) if not the top row
+
+	//-----------------------------------------------------------
+ILDB_LABEL(SKIP_ILDB_UV):
+	
+	and.z.f0.0 (1) 	null:w		BitFields:w		BotFieldFlag:w
+
+	//=========== Process Bottom MB ============
+    or (1)  	BitFields:w  	BitFields:w		BotFieldFlag:w	// Set BotFieldFlag to 1
+	(f0.0) jmpi		ILDB_LABEL(RE_ENTRY_UV)							// Loop back for bottom deblocking
+
+	// Fall through to finish
+
+	//=========== Check write commit of the last write ============
+    mov (8)	WritebackResponse(0)<1>		WritebackResponse(0)	
+
+ILDB_LABEL(POST_ILDB_UV):	
+	
+	// Send notification thru Gateway to root thread, update chroma Status[CurRow]
+	#include "AVC_ILDB_ForwardMsg.asm"
+
+#if !defined(GW_DCN)		// For non-ILK chipsets
+	//child send EOT : Request type = 1
+	END_CHILD_THREAD
+#endif	// !defined(DEV_ILK)
+	
+	// The thread finishs here
+	//------------------------------------------------------------------------------
+	
+	////////////////////////////////////////////////////////////////////////////////
+	// Include other subrutines being called
+	#include "AVC_ILDB_Chroma_Core_Mbaff.asm"
+	
+#if !defined(COMBINED_KERNEL)		// For standalone kernel only
+.end_code
+
+.end_kernel
+#endif
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_Y.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Mbaff_Y.asm
@@ -0,0 +1,218 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB Y comp)
+//
+// First, de-block vertical edges from left to right.
+// Second, de-block horizontal edge from top to bottom.
+// 
+//	***** MBAFF Mode *****
+//	This version deblocks top MB first, followed by bottom MB.
+//
+//	Need variable CurMB	to indicate top MB or bottom MB (CurMB = 0 or 1).  
+//	We can use BotFieldFlag in BitFields to represent it.
+//
+//  Usage:
+// 	1) Access control data for top 
+//		CntrlDataOffsetY + CurMB  * Control data block size		(64 DWs for CL, 16 DWs for BLC)
+//
+// 	2) Load frame/field video data based on flags: FieldModeCurrentMbFlag, FieldModeLeftMbFlag, FieldModeaboveMbFlag, 
+//
+//	E.g. 
+//	if (pCntlData->BitField & FieldModeCurrentMbFlag)
+//		cur_y = ORIX_CUR.y + CurMB * 1;				// Add field vertical offset for bot field MB .
+//	else
+//		cur_y = ORIX_CUR.y + CurMB * MB_Rows_Y;		// Add bottom MB vertical offset for bot MB
+//
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#define AVC_ILDB
+
+.kernel AVC_ILDB_CHILD_MBAFF_Y
+#if defined(COMBINED_KERNEL)
+ILDB_LABEL(AVC_ILDB_CHILD_Y):
+#endif
+
+#include "SetupVPKernel.asm"
+#include "AVC_ILDB.inc"
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xE998:w
+#endif
+
+	// Setup temp buf used by load and save code
+	#define BUF_B		RTempB				
+	#define BUF_D		RTempD
+	
+	// Init local variables
+	// These coordinates are in progressive fashion
+	mul (4)		ORIX_CUR<2>:w		ORIX<0;1,0>:w		16:w	{ NoDDClr }		// Expand X addr to bytes, repeat 4 times
+	mul (4)		ORIY_CUR<2>:w		ORIY<0;1,0>:w		32:w	{ NoDDChk }		// Expand Y addr to bytes, repeat 4 times
+
+	mov (2)		f0.0<1>:w		0:w
+	
+	mov	(1)		GateWayOffsetC:uw	ORIY:uw						// Use row # as Gateway offset
+
+	//=== Null Kernel ===============================================================
+//	jmpi POST_ILDB
+	//===============================================================================
+
+	//====================================================================================
+	// Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.
+	// Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16
+	// Each MB has 256 bytes of control data
+
+	// For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.
+	// MB_offset = MBsCntX * CurRow + CurCol
+	// Byte_offset = MB_offset * (256 << Mbaff_flag),	Mbaff_flag = 0 or 1.
+	// Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes
+	// where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)
+	// MBCntrlDataOffsetY holds y'.
+
+	// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes.  Need to use a special read command on BL-C.
+	// MB_offset = MBsCntX * CurRow + CurCol
+	// Byte_offset = MB_offset * (64 << Mbaff_flag),	Mbaff_flag = 0 or 1.
+	// MBCntrlDataOffsetY holds globel byte offset.
+
+#if !defined(DEV_CL)	
+	mul (1) CntrlDataOffsetY:ud		MBsCntX:w 				ORIY:w
+	add (1) CntrlDataOffsetY:ud		CntrlDataOffsetY:ud		ORIX:w
+	mul (1) CntrlDataOffsetY:ud		CntrlDataOffsetY:ud		128:uw
+#endif
+
+	//====================================================================================
+	
+	add (1)		ORIX_LEFT:w			ORIX_LEFT:w			-4:w
+	add (1)		ORIY_TOP:w			ORIY_TOP:w			-4:w
+
+
+	//=========== Process Top MB ============
+    and (1)  	BitFields:w  		BitFields:w		TopFieldFlag:w	// Reset BotFieldFlag
+
+RE_ENTRY:	// for bootom field
+
+	// Load current MB control data
+#if defined(DEV_CL)
+	#include "Load_ILDB_Cntrl_Data_64DW.asm"	// Crestline
+#else
+	#include "Load_ILDB_Cntrl_Data_16DW.asm"	// Cantiga and beyond
+#endif
+
+	// Init addr register for vertical control data
+	mov (1)		ECM_AddrReg<1>:w	CNTRL_DATA_BASE:w			// Init edge control map AddrReg
+
+	// Check loaded control data
+	and.z.f0.1  (16) null<1>:uw	r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw	0xFFFF:uw		// Skip ILDB?		
+	and.nz.f0.0  (1) null:w		r[ECM_AddrReg, ExtBitFlags]:ub		DISABLE_ILDB_FLAG:w		// Skip ILDB?
+
+	// Use free cycles here
+	// Set DualFieldMode for all data read, write and deblocking
+	and	(1)	CTemp1_W:uw		r[ECM_AddrReg, BitFlags]:ub		FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
+
+	// Get Vert Edge Pattern (frame vs. field MBs)
+	and	(1)	VertEdgePattern:uw		r[ECM_AddrReg, BitFlags]:ub		FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
+
+	(f0.1.all16h)	jmpi 	SKIP_ILDB						// Skip ILDB
+	(f0.0)			jmpi 	SKIP_ILDB						// Skip ILDB
+
+	// Set DualFieldMode for all data read, write and deblocking
+//	and	(1)	CTemp1_W:uw		r[ECM_AddrReg, BitFlags]:ub		FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
+	cmp.z.f0.0	(1)	null:w	CTemp1_W:uw		ABOVE_FIELD_CUR_FRAME:w
+	and (1)		DualFieldMode:w		f0.0:w		0x0001:w
+
+	// Load current MB 				// DDD1
+	#include "load_Cur_Y_16x16T_Mbaff.asm"				// Load cur Y, 16x16, transpose
+	#include "load_Left_Y_4x16T_Mbaff.asm"				// Load left MB (4x16) Y data from memory if exists
+
+	#include "Transpose_Cur_Y_16x16.asm"
+	#include "Transpose_Left_Y_4x16.asm"
+
+	//---------- Perform vertical ILDB filting on Y----------
+	#include "AVC_ILDB_Filter_Mbaff_Y_v.asm"	
+	//-------------------------------------------------------
+
+	#include "save_Left_Y_16x4T_Mbaff.asm"				// Write left MB (4x16) Y data to memory if exists
+	#include "load_Top_Y_16x4_Mbaff.asm"				// Load top MB (16x4) Y data from memory if exists
+	#include "Transpose_Cur_Y_16x16.asm"				// Transpose a MB for horizontal edge de-blocking 
+
+	//---------- Perform horizontal ILDB filting on Y ----------
+	#include "AVC_ILDB_Filter_Mbaff_Y_h.asm"	
+	//----------------------------------------------------------
+
+	#include "save_Cur_Y_16x16_Mbaff.asm"					// Write cur MB (16x16)
+	#include "save_Top_Y_16x4_Mbaff.asm"					// Write top MB (16x4) if not the top row
+
+SKIP_ILDB:
+	//----------------------------------------------------------
+	and.z.f0.0 (1) 	null:w		BitFields:w		BotFieldFlag:w
+
+	//=========== Process Bottom MB ============
+    or (1)  	BitFields:w  	BitFields:w		BotFieldFlag:w	// Set BotFieldFlag to 1
+	(f0.0) jmpi		RE_ENTRY								// Loop back for bottom deblocking
+
+	// Fall through to finish
+
+	//=========== Check write commit of the last write ============
+    mov (8)	WritebackResponse(0)<1>		WritebackResponse(0)	
+
+POST_ILDB:
+	
+	//---------------------------------------------------------------------------
+	// Send notification thru Gateway to root thread, update luma Status[CurRow]
+	
+	#include "AVC_ILDB_ForwardMsg.asm"	
+
+#if !defined(GW_DCN)		// For non-ILK chipsets
+	//child send EOT : Request type = 1
+	END_CHILD_THREAD
+#endif	// !defined(DEV_ILK)
+	
+	// The thread finishs here
+	//------------------------------------------------------------------------------
+	
+	
+	////////////////////////////////////////////////////////////////////////////////
+	// Include other subrutines being called
+	#include "AVC_ILDB_Luma_Core_Mbaff.asm"
+
+#if !defined(COMBINED_KERNEL)		// For standalone kernel only
+.end_code
+
+.end_kernel
+#endif
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_UV.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_UV.asm
@@ -0,0 +1,216 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)
+//
+// First de-block vertical edges from left to right.
+// Second de-block horizontal edge from top to bottom.
+// 
+// For 4:2:0, chroma is always de-blocked at 8x8.
+// NV12 format allows to filter U and V together.
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#define AVC_ILDB
+
+.kernel AVC_ILDB_CHILD_UV
+#if defined(COMBINED_KERNEL)
+ILDB_LABEL(AVC_ILDB_CHILD_UV):
+#endif
+
+#include "SetupVPKernel.asm"
+#include "AVC_ILDB.inc"
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0x9997:w
+#endif
+
+	// Init local variables
+	shl (8)		ORIX_CUR<1>:w		ORIX<0;2,1>:w		4:w		// Expand addr to bytes, repeat (x,y) 4 times
+
+	// Init addr register for vertical control data
+	mov (1)		ECM_AddrReg<1>:w		CNTRL_DATA_BASE:w		// Init ECM_AddrReg
+
+	//=== Null Kernel ===============================================================
+//	jmpi ILDB_LABEL(POST_ILDB_UV_UV)
+	//===============================================================================
+
+#if defined(DEV_CL)	
+	mov	(1)		acc0.0:w		240:w	
+#else
+	//====================================================================================
+	// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes.  Need to use a special read command on BL-C.
+	// MB_offset = MBsCntX * CurRow + CurCol
+	// MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64
+	mul (1) CntrlDataOffsetY:ud		MBsCntX:w 				ORIY:w
+	add (1) CntrlDataOffsetY:ud		CntrlDataOffsetY:ud		ORIX:w
+
+	// Assign to MSGSRC.2:ud for memory access
+	// mul (1) CntrlDataOffsetY:ud		CntrlDataOffsetY:ud		64:uw
+	mul (1) MSGSRC.2:ud		CntrlDataOffsetY:ud		64:uw
+		
+	mov	(1)		acc0.0:w		320:w	
+#endif
+	mac (1)		URBOffsetC:w	ORIY:w			4:w				// UV URB entries are right after Y entries		
+
+
+	// Init local variables
+//	shl (8)		ORIX_CUR<1>:w		ORIX<0;2,1>:w		4:w		// Expand addr to bytes, repeat (x,y) 4 times
+	add (1)		ORIX_LEFT:w			ORIX_LEFT:w			-4:w
+	add (1)		ORIY_TOP:w			ORIY_TOP:w			-4:w
+
+	// Build a ramp from 0 to 15
+	mov	(16)	RRampW(0)<1>		RampConstC<0;8,1>:ub
+	add (8)		RRampW(0,8)<1>		RRampW(0,8)			8:w		// RRampW = ramp 15-0
+
+	// Load current MB control data
+#if defined(DEV_CL)
+	#if defined(_APPLE)
+		#include "Load_ILDB_Cntrl_Data_22DW.asm"	// Crestline for Apple, progressive only
+	#else
+		#include "Load_ILDB_Cntrl_Data_64DW.asm"	// Crestline
+	#endif	
+#else
+	#include "Load_ILDB_Cntrl_Data_16DW.asm"	// Cantiga and beyond
+#endif
+
+	// Check loaded control data
+	#if defined(_APPLE)
+		and.z.f0.1  (8) null<1>:uw	r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw		0xFFFF:uw		// Skip ILDB?
+		(f0.1) and.z.f0.1 (2) null<1>:uw	r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw		0xFFFF:uw		// Skip ILDB?
+	#else
+		and.z.f0.1  (16) null<1>:uw	r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw	0xFFFF:uw		// Skip ILDB?		
+	#endif	
+		
+	and.nz.f0.0  (1) null:w		r[ECM_AddrReg, ExtBitFlags]:ub		DISABLE_ILDB_FLAG:w		// Skip ILDB?
+	
+	mov	(1)		GateWayOffsetC:uw	ORIY:uw		// Use row # as Gateway offset
+
+	#if defined(_APPLE)
+		(f0.1.all8h)	jmpi 	ILDB_LABEL(READ_FOR_URB_UV)				// Skip ILDB
+	#else
+		(f0.1.all16h)	jmpi 	ILDB_LABEL(READ_FOR_URB_UV)				// Skip ILDB
+	#endif	
+
+	(f0.0)			jmpi 	ILDB_LABEL(READ_FOR_URB_UV)					// Skip ILDB
+
+
+
+	#include "load_Cur_UV_8x8T.asm"				// Load transposed data 8x8
+//	#include "load_Left_UV_2x8T.asm"
+	#include "load_Top_UV_8x2.asm"				// Load top MB (8x2) Y data from memory if exists
+
+	#include "Transpose_Cur_UV_8x8.asm"
+//	#include "Transpose_Left_UV_2x8.asm"
+
+
+	//---------- Perform vertical ILDB filting on UV ----------
+	#include "AVC_ILDB_Filter_UV_v.asm"	
+	//---------------------------------------------------------
+
+	#include "save_Left_UV_8x2T.asm"			// Write left MB (2x8) Y data to memory if exists
+	#include "Transpose_Cur_UV_8x8.asm"			// Transpose a MB for horizontal edge de-blocking 
+
+	//---------- Perform horizontal ILDB filting on UV ----------
+	#include "AVC_ILDB_Filter_UV_h.asm"	
+	//-----------------------------------------------------------
+
+	#include "save_Cur_UV_8x8.asm"				// Write 8x8
+	#include "save_Top_UV_8x2.asm"				// Write top MB (8x2) if not the top row
+
+	//---------- Write right most 4 columns of cur MB to URB ----------
+	// Transpose the right most 2 cols 2x8 (word) in GRF to 8x2 in BUF_D.  It is 2 left most cols in cur MB.
+	#include "Transpose_Cur_UV_2x8.asm"						
+		
+ILDB_LABEL(WRITE_URB_UV):
+	mov (8)		m1<1>:ud		LEFT_TEMP_D(1)<8;8,1>			// Copy 1 GRF to 1 URB entry (U+V)
+	
+	#include "writeURB_UV_Child.asm"	
+	//-----------------------------------------------------------------
+
+	//=========== Check write commit of the last write ============
+    mov (8)	WritebackResponse(0)<1>		WritebackResponse(0)	
+
+ILDB_LABEL(POST_ILDB_UV):
+	//---------------------------------		
+	
+	// Send notification thru Gateway to root thread, update chroma Status[CurRow]
+	#include "AVC_ILDB_ForwardMsg.asm"
+
+#if !defined(GW_DCN)		// For non-ILK chipsets
+	//child send EOT : Request type = 1
+	END_CHILD_THREAD
+#endif	// !defined(DEV_ILK)
+	
+	// The thread finishs here
+	//------------------------------------------------------------------------------
+	
+ILDB_LABEL(READ_FOR_URB_UV):
+	// Still need to prepare URB data for the right neighbor MB
+	#include "load_Cur_UV_Right_Most_2x8.asm"		// Load cur MB ( right most 4x16) Y data from memory
+	#include "Transpose_Cur_UV_Right_Most_2x8.asm"						
+//	jmpi ILDB_LABEL(WRITE_URB_UV)
+
+	mov (8)		m1<1>:ud		LEFT_TEMP_D(1)<8;8,1>			// Copy 1 GRF to 1 URB entry (U+V)
+	
+	#include "writeURB_UV_Child.asm"	
+	//-----------------------------------------------------------------
+
+	// Send notification thru Gateway to root thread, update chroma Status[CurRow]
+	#include "AVC_ILDB_ForwardMsg.asm"
+
+#if !defined(GW_DCN)		// For non-ILK chipsets
+	//child send EOT : Request type = 1
+	END_CHILD_THREAD
+#endif	// !defined(DEV_ILK)
+	
+	// The thread finishs here
+	//------------------------------------------------------------------------------
+	
+	
+	////////////////////////////////////////////////////////////////////////////////
+	// Include other subrutines being called
+//	#include "AVC_ILDB_Luma_Core.asm"
+	#include "AVC_ILDB_Chroma_Core.asm"
+
+	
+#if !defined(COMBINED_KERNEL)		// For standalone kernel only
+.end_code
+
+.end_kernel
+#endif
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Y.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Child_Y.asm
@@ -0,0 +1,206 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB Y comp)
+//
+// First, de-block vertical edges from left to right.
+// Second, de-block horizontal edge from top to bottom.
+// 
+// If transform_size_8x8_flag = 1, luma is de-blocked at 8x8.  Otherwise, luma is de-blocked at 4x4.
+//
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#define AVC_ILDB
+
+.kernel AVC_ILDB_CHILD_Y
+#if defined(COMBINED_KERNEL)
+ILDB_LABEL(AVC_ILDB_CHILD_Y):
+#endif
+
+#include "SetupVPKernel.asm"
+#include "AVC_ILDB.inc"
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0x9998:w
+#endif
+
+	// Init local variables
+	shl (8)		ORIX_CUR<1>:w		ORIX<0;2,1>:w		4:w		// Expand addr to bytes, repeat (x,y) 4 times
+
+	// Init addr register for vertical control data
+	mov (1)		ECM_AddrReg<1>:w	CNTRL_DATA_BASE:w			// Init edge control map AddrReg
+
+	//=== Null Kernel ===============================================================
+//	jmpi ILDB_LABEL(POST_ILDB_Y)
+	//===============================================================================
+
+	mul	(1)		URBOffsetC:uw	ORIY:uw		4:w	
+	
+#if !defined(DEV_CL)	
+	//====================================================================================
+	// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes.  Need to use a special read command on BL-C.
+	// MB_offset = MBsCntX * CurRow + CurCol
+	// MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64
+	mul (1) CntrlDataOffsetY:ud		MBsCntX:w 				ORIY:w
+	add (1) CntrlDataOffsetY:ud		CntrlDataOffsetY:ud		ORIX:w
+		
+	// Assign to MSGSRC.2:ud for memory access
+	// mul (1) CntrlDataOffsetY:ud		CntrlDataOffsetY:ud		64:uw
+	mul (1) MSGSRC.2:ud		CntrlDataOffsetY:ud		64:uw		
+	
+#endif
+
+	// Load current MB control data
+#if defined(DEV_CL) 
+	#if defined(_APPLE)
+		#include "Load_ILDB_Cntrl_Data_22DW.asm"	// Crestline for Apple, progressive only
+	#else
+		#include "Load_ILDB_Cntrl_Data_64DW.asm"	// Crestline
+	#endif	
+#else
+	#include "Load_ILDB_Cntrl_Data_16DW.asm"	// Cantiga and beyond
+#endif
+
+	// Check loaded control data
+	#if defined(_APPLE)
+		and.z.f0.1  (8) null<1>:uw	r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw		0xFFFF:uw		// Skip ILDB?
+		(f0.1) and.z.f0.1 (2) null<1>:uw	r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw		0xFFFF:uw		// Skip ILDB?
+	#else
+		and.z.f0.1  (16) null<1>:uw	r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw	0xFFFF:uw		// Skip ILDB?		
+	#endif	
+
+	and.nz.f0.0  (1) null:w		r[ECM_AddrReg, ExtBitFlags]:ub		DISABLE_ILDB_FLAG:w		// Skip ILDB?
+
+	// Use free cycles here
+	add (1)		ORIX_LEFT:w			ORIX_LEFT:w			-4:w
+//	add (1)		ORIY_TOP:w			ORIY_TOP:w			-4:w
+	mov	(1)		GateWayOffsetC:uw	ORIY:uw						// Use row # as Gateway offset
+
+	#if defined(_APPLE)
+		(f0.1.all8h)	jmpi 	ILDB_LABEL(READ_FOR_URB_Y)				// Skip ILDB
+	#else
+		(f0.1.all16h)	jmpi 	ILDB_LABEL(READ_FOR_URB_Y)				// Skip ILDB
+	#endif
+
+	(f0.0)			jmpi 	ILDB_LABEL(READ_FOR_URB_Y)					// Skip ILDB
+
+	add (1)		ORIY_TOP:w			ORIY_TOP:w			-4:w
+
+	// Bettr performance is observed if boundary MBs are not checked and skipped.
+	
+	#include "load_Cur_Y_16x16T.asm"				// Load cur MB Y, 16x16, transpose
+//	#include "load_Left_Y_4x16T.asm"				// Load left MB (4x16) Y data from memory
+	#include "load_Top_Y_16x4.asm"					// Load top MB (16x4) Y data from memory
+
+	#include "Transpose_Cur_Y_16x16.asm"
+//	#include "Transpose_Left_Y_4x16.asm"
+
+	//---------- Perform vertical ILDB filting on Y ---------
+	#include "AVC_ILDB_Filter_Y_v.asm"	
+	//-------------------------------------------------------
+
+	#include "save_Left_Y_16x4T.asm"				// Write left MB (4x16) Y data to memory
+	#include "Transpose_Cur_Y_16x16.asm"			// Transpose a MB for horizontal edge de-blocking 
+
+	//---------- Perform horizontal ILDB filting on Y -------
+	#include "AVC_ILDB_Filter_Y_h.asm"	
+	//-------------------------------------------------------
+
+	#include "save_Cur_Y_16x16.asm"					// Write cur MB (16x16)
+	#include "save_Top_Y_16x4.asm"					// Write top MB (16x4)
+
+	//---------- Write right most 4 columns of cur MB to URB ----------
+	// Transpose the right most 4 cols 4x16 in GRF to 16x4 in LEFT_TEMP_B.  It is 4 left most cols in cur MB.	
+	#include "Transpose_Cur_Y_4x16.asm"						
+	
+ILDB_LABEL(WRITE_URB_Y):
+	// Note: LEFT_TEMP_B(2) = TOP_TEMP_B(0), TOP_TEMP_B must be avail
+	mov (16)	m1<1>:ud		LEFT_TEMP_D(2)<8;8,1>		// Copy 2 GRFs to 2 URB entries (Y)
+	
+	#include "writeURB_Y_Child.asm"	
+	//-----------------------------------------------------------------
+
+	//=========== Check write commit of the last write ============
+    mov (8)	WritebackResponse(0)<1>		WritebackResponse(0)	
+
+ILDB_LABEL(POST_ILDB_Y):
+	// Send notification thru Gateway to root thread, update luma Status[CurRow]
+	#include "AVC_ILDB_ForwardMsg.asm"	
+
+#if !defined(GW_DCN)		// For non-ILK chipsets
+	//child send EOT : Request type = 1
+	END_CHILD_THREAD
+#endif	// !defined(DEV_ILK)
+	
+	// The thread finishs here
+	//------------------------------------------------------------------------------
+
+ILDB_LABEL(READ_FOR_URB_Y):
+	// Still need to prepare URB data for the right neighbor MB
+	#include "load_Cur_Y_Right_Most_4x16.asm"		// Load cur MB ( right most 4x16) Y data from memory
+	#include "Transpose_Cur_Y_Right_Most_4x16.asm"						
+//	jmpi ILDB_LABEL(WRITE_URB_Y)
+
+	// Note: LEFT_TEMP_B(2) = TOP_TEMP_B(0), TOP_TEMP_B must be avail
+	mov (16)	m1<1>:ud		LEFT_TEMP_D(2)<8;8,1>		// Copy 2 GRFs to 2 URB entries (Y)
+	
+	#include "writeURB_Y_Child.asm"	
+	//-----------------------------------------------------------------
+
+	// Send notification thru Gateway to root thread, update luma Status[CurRow]
+	#include "AVC_ILDB_ForwardMsg.asm"	
+
+#if !defined(GW_DCN)		// For non-ILK chipsets
+	//child send EOT : Request type = 1
+	END_CHILD_THREAD
+#endif	// !defined(DEV_ILK)
+	
+	// The thread finishs here
+	//------------------------------------------------------------------------------
+	
+	////////////////////////////////////////////////////////////////////////////////
+	// Include other subrutines being called
+	#include "AVC_ILDB_Luma_Core.asm"
+//	#include "AVC_ILDB_Chroma_Core.asm"
+
+	
+#if !defined(COMBINED_KERNEL)		// For standalone kernel only
+.end_code
+
+.end_kernel
+#endif
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Chroma_Core.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Chroma_Core.asm
@@ -0,0 +1,195 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#if !defined(__AVC_ILDB_CHROMA_CORE__)	// Make sure this file is only included once
+#define __AVC_ILDB_CHROMA_CORE__
+
+////////// AVC ILDB Chroma Core /////////////////////////////////////////////////////////////////////////////////
+//
+//	This core performs AVC U or V ILDB filtering on one horizontal edge (8 pixels) of a MB.
+//	If data is transposed, it can also de-block a vertical edge.
+//
+//	Bafore calling this subroutine, caller needs to set the following parameters.
+//
+//	- EdgeCntlMap1				//	Edge control map A
+//	- EdgeCntlMap2				//	Edge control map B
+//	- P_AddrReg					//	Src and dest address register for P pixels
+//	- Q_AddrReg					//	Src and dest address register for Q pixels 	
+//	- alpha						//  alpha corresponding to the edge to be filtered
+//	- beta						//  beta corresponding to the edge to be filtered
+//	- tc0						// 	tc0  corresponding to the edge to be filtered
+//
+//	U or V:
+//	+----+----+----+----+
+//	| P1 | p0 | q0 | q1 |
+//	+----+----+----+----+
+//
+//	p1 = r[P_AddrReg, 0]<16;8,2> 
+//	p0 = r[P_AddrReg, 16]<16;8,2> 
+// 	q0 = r[Q_AddrReg, 0]<16;8,2>  
+//	q1 = r[Q_AddrReg, 16]<16;8,2> 
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// The region is both src and dest
+// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values  
+#undef 	P1
+#undef 	P0
+#undef 	Q0
+#undef 	Q1
+
+#define P1 		r[P_AddrReg,  0]<16;8,2>:ub
+#define P0 		r[P_AddrReg, 16]<16;8,2>:ub
+#define Q0 		r[Q_AddrReg,  0]<16;8,2>:ub
+#define Q1 		r[Q_AddrReg, 16]<16;8,2>:ub
+
+// New region as dest
+#undef 	NewP0
+#undef 	NewQ0
+
+#define NewP0 	r[P_AddrReg, 16]<2>:ub
+#define NewQ0 	r[Q_AddrReg,  0]<2>:ub
+
+// Filter one chroma edge 
+FILTER_UV:
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0x1112:w
+#endif
+	//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
+	// bS is in MaskA
+
+	// Src copy of the p1, p0, q0, q1
+//	mov (8) p1(0)<1>		r[P_AddrReg, 0]<16;8,2>:ub
+//	mov (8) p0(0)<1>		r[P_AddrReg, 16]<16;8,2>:ub
+//	mov (8) q0(0)<1>		r[Q_AddrReg, 0]<16;8,2>:ub
+//	mov (8) q1(0)<1>		r[Q_AddrReg, 16]<16;8,2>:ub
+
+//	mov (1)	f0.0:uw		MaskA:uw
+
+	add (8) q0_p0(0)<1>			Q0		-P0				// q0-p0
+	add (8) TempRow0(0)<1>		P1		-P0				// p1-p0
+	add (8) TempRow1(0)<1>		Q1		-Q0				// q1-q0
+
+	// Build FilterSampleFlag
+	// abs(q0-p0) < alpha
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)q0_p0(0)			alpha:w
+	// abs(p1-p0) < Beta
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)TempRow0(0)		beta:w
+	// abs(q1-q0) < Beta
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)TempRow1(0)		beta:w
+
+	//-----------------------------------------------------------------------------------------
+
+	// if 
+    (f0.0)	if	(8)		UV_ENDIF1
+		// For channels whose edge control map1 = 1 ---> perform de-blocking
+
+//		mov (1)		f0.1:w		MaskB:w		{NoMask}		// Now check for which algorithm to apply
+
+		(f0.1)	if	(8)		UV_ELSE2
+
+			// For channels whose edge control map2 = 1 ---> bS = 4 algorithm 
+			// p0' = (2*p1 + p0 + q1 + 2) >> 2
+			// q0' = (2*q1 + q0 + p1 + 2) >> 2
+
+			// Optimized version:
+			// A = (p1 + q1 + 2)
+			// p0' = (p0 + p1 + A) >> 2
+			// q0' = (q0 + q1 + A) >> 2
+			//------------------------------------------------------------------------------------
+			
+			// p0' = (2*p1 + p0 + q1 + 2) >> 2
+			add (8) acc0<1>:w		Q1				2:w
+			mac (8) acc0<1>:w		P1				2:w
+			add (8)	acc0<1>:w		acc0<8;8,1>:w	P0
+			shr.sat	(8)	TempRow0B(0)<2>		acc0<8;8,1>:w		2:w
+			
+			// q0' = (2*q1 + q0 + p1 + 2) >> 2
+			add (8) acc0<1>:w		P1				2:w
+			mac (8) acc0<1>:w		Q1				2:w
+			add (8)	acc0<1>:w		acc0<8;8,1>:w	Q0
+			shr.sat	(8)	TempRow1B(0)<2>		acc0<8;8,1>:w		2:w
+
+			mov (8) NewP0		TempRow0B(0)					// p0'
+			mov (8) NewQ0		TempRow1B(0)					// q0'
+			
+			
+UV_ELSE2: 
+		else 	(8)		UV_ENDIF2
+			// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
+			
+			// Expand tc0	(tc0 has 4 bytes)
+//			mov (8)	tc0_exp(0)<1>	tc0<1;2,0>:ub	{NoMask}				// tc0_exp = tc0, each tc0 is duplicated 2 times for 2 adjcent pixels	
+			mov (8)	acc0<1>:w	tc0<1;2,0>:ub	{NoMask}				// tc0_exp = tc0, each tc0 is duplicated 2 times for 2 adjcent pixels	
+			
+			// tc_exp = tc0_exp + 1
+//			add (8) tc_exp(0)<1>	tc0_exp(0)		1:w
+			add (8) tc_exp(0)<1>	acc0<8;8,1>:w		1:w
+
+			// delta = Clip3(-tc, tc, ((((q0 - p0)<<2) + (p1-q1) + 4) >> 3))
+			// 4 * (q0-p0) + p1 - q1 + 4
+			add (8)	acc0<1>:w		P1			4:w
+			mac (8) acc0<1>:w		q0_p0(0)	4:w	
+			add (8) acc0<1>:w		acc0<8;8,1>:w		-Q1
+			shr (8) TempRow0(0)<1>	acc0<8;8,1>:w		3:w
+
+			// tc clip
+			cmp.g.f0.0	(8) null:w		TempRow0(0)		tc_exp(0)				// Clip if > tc0
+			cmp.l.f0.1	(8) null:w		TempRow0(0)		-tc_exp(0)				// Clip if < -tc0
+			
+			(f0.0) mov (8) TempRow0(0)<1>				tc_exp(0)
+			(f0.1) mov (8) TempRow0(0)<1>				-tc_exp(0)
+			
+			// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
+			add.sat (8)	TempRow1B(0)<2>		P0			TempRow0(0)				// p0+delta
+		
+			// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
+			add.sat (8)	TempRow0B(0)<2>		Q0			-TempRow0(0)			// q0-delta
+
+			mov (8) NewP0				TempRow1B(0)			// p0'
+			mov (8) NewQ0				TempRow0B(0)			// q0'
+
+		endif
+UV_ENDIF2:
+UV_ENDIF1:
+	endif
+
+RETURN
+
+#endif	// !defined(__AVC_ILDB_CHROMA_CORE__)
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Chroma_Core_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Chroma_Core_Mbaff.asm
@@ -0,0 +1,176 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+////////// AVC ILDB Chroma Core Mbaff /////////////////////////////////////////////////////////////////////////////////
+//
+//	This core performs AVC U or V ILDB filtering on one horizontal edge (8 pixels) of a MB.
+//	If data is transposed, it can also de-block a vertical edge.
+//
+//	Bafore calling this subroutine, caller needs to set the following parameters.
+//
+//	- EdgeCntlMap1				//	Edge control map A
+//	- EdgeCntlMap2				//	Edge control map B
+//	- P_AddrReg					//	Src and dest address register for P pixels
+//	- Q_AddrReg					//	Src and dest address register for Q pixels 	
+//	- alpha						//  alpha corresponding to the edge to be filtered
+//	- beta						//  beta corresponding to the edge to be filtered
+//	- tc0						// 	tc0  corresponding to the edge to be filtered
+//
+//	U or V:
+//	+----+----+----+----+
+//	| P1 | p0 | q0 | q1 |
+//	+----+----+----+----+
+//
+//	p1 = r[P_AddrReg, 0]<16;8,2> 
+//	p0 = r[P_AddrReg, 16]<16;8,2> 
+// 	q0 = r[Q_AddrReg, 0]<16;8,2>  
+//	q1 = r[Q_AddrReg, 16]<16;8,2> 
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// The region is both src and dest
+// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values  
+#undef 	P1
+#undef 	P0
+#undef 	Q0
+#undef 	Q1
+
+#define P1 		r[P_AddrReg,  0]<16;8,2>:ub
+#define P0 		r[P_AddrReg, 16]<16;8,2>:ub
+#define Q0 		r[Q_AddrReg,  0]<16;8,2>:ub
+#define Q1 		r[Q_AddrReg, 16]<16;8,2>:ub
+
+// New region as dest
+#undef 	NewP0
+#undef 	NewQ0
+
+#define NewP0 	r[P_AddrReg, 16]<2>:ub
+#define NewQ0 	r[Q_AddrReg,  0]<2>:ub
+
+// Filter one chroma edge - mbaff
+FILTER_UV_MBAFF:
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0x1112:w
+#endif
+	//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
+
+	//===== Assume f0.0 contains MaskA when entering this routine
+//	mov (1)	f0.0:uw		MaskA:uw
+
+	add (8) q0_p0(0)<1>			Q0		-P0				// q0-p0
+	add (8) TempRow0(0)<1>		P1		-P0				// p1-p0
+	add (8) TempRow1(0)<1>		Q1		-Q0				// q1-q0
+
+	// Build FilterSampleFlag
+	// abs(q0-p0) < alpha
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)q0_p0(0)			Mbaff_ALPHA(0)
+	// abs(p1-p0) < Beta
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)TempRow0(0)		Mbaff_BETA(0)
+	// abs(q1-q0) < Beta
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)TempRow1(0)		Mbaff_BETA(0)
+
+	//-----------------------------------------------------------------------------------------
+
+	// if 
+    (f0.0)	if	(8)		MBAFF_UV_ENDIF1
+		// For channels whose edge control map1 = 1 ---> perform de-blocking
+
+//		mov (1)		f0.1:w		MaskB:w		{NoMask}		// Now check for which algorithm to apply
+
+		(f0.1)	if	(8)		MBAFF_UV_ELSE2
+
+			// For channels whose edge control map2 = 1 ---> bS = 4 algorithm 
+			// p0' = (2*p1 + P0 + q1 + 2) >> 2
+			// q0' = (2*q1 + q0 + p1 + 2) >> 2
+			//------------------------------------------------------------------------------------
+
+			// p0' = (2*p1 + p0 + q1 + 2) >> 2
+			add (8) acc0<1>:w		Q1				2:w
+			mac (8) acc0<1>:w		P1				2:w
+			add (8)	acc0<1>:w		acc0<8;8,1>:w	P0
+			shr.sat	(8)	TempRow0B(0)<2>		acc0<8;8,1>:w		2:w
+
+			// q0' = (2*q1 + q0 + p1 + 2) >> 2
+			add (8) acc0<1>:w		P1				2:w
+			mac (8) acc0<1>:w		Q1				2:w
+			add (8)	acc0<1>:w		acc0<8;8,1>:w	Q0
+			shr.sat	(8)	TempRow1B(0)<2>		acc0<8;8,1>:w		2:w
+
+			mov (8) NewP0		TempRow0B(0)					// p0'
+			mov (8) NewQ0		TempRow1B(0)					// q0'
+			
+MBAFF_UV_ELSE2: 
+		else 	(8)		MBAFF_UV_ENDIF2
+			// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
+			
+			// tc_exp = tc0_exp + 1
+			add (8) tc_exp(0)<1>	Mbaff_TC0(0)		1:w
+
+			// delta = Clip3(-tc, tc, ((((q0 - p0)<<2) + (p1-q1) + 4) >> 3))
+			// 4 * (q0-p0) + p1 - q1 + 4
+			add (8)	acc0<1>:w		P1			4:w
+			mac (8) acc0<1>:w		q0_p0(0)	4:w	
+			add (8) acc0<1>:w		acc0<8;8,1>:w		-Q1
+			shr (8) TempRow0(0)<1>	acc0<8;8,1>:w		3:w
+
+			// tc clip
+			cmp.g.f0.0	(8) null:w		TempRow0(0)		tc_exp(0)				// Clip if > tc0
+			cmp.l.f0.1	(8) null:w		TempRow0(0)		-tc_exp(0)				// Clip if < -tc0
+			
+			(f0.0) mov (8) TempRow0(0)<1>				tc_exp(0)
+			(f0.1) mov (8) TempRow0(0)<1>				-tc_exp(0)
+			
+			// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
+			add.sat (8)	TempRow1B(0)<2>		P0			TempRow0(0)				// p0+delta
+		
+			// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
+			add.sat (8)	TempRow0B(0)<2>		Q0			-TempRow0(0)			// q0-delta
+
+			mov (8) NewP0				TempRow1B(0)			// p0'
+			mov (8) NewQ0				TempRow0B(0)			// q0'
+
+		endif
+MBAFF_UV_ENDIF2:
+MBAFF_UV_ENDIF1:
+	endif
+
+RETURN
+
+
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_CloseGateway.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_CloseGateway.asm
@@ -0,0 +1,52 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//----- Close a Message Gateway -----
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignature:b			0x4444:w
+#endif
+
+// Message descriptor
+// bit 31	EOD
+// 27:24	FFID = 0x0011 for msg gateway
+// 23:20	msg length = 1 MRF
+// 19:16	Response length	= 0
+// 1:0		SubFuncID = 01 for CloseGateway
+// Message descriptor: 0 000 0011 0001 0000 + 0 0 000000000000 01 ==> 0000 0011 0001 0000 0000 0000 0000 0001
+send (8)	null:ud 	m7	  r0.0<0;1,0>:ud    MSG_GW	CGWMSGDSC 
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Dep_Check.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Dep_Check.asm
@@ -0,0 +1,216 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//---------- Check dependency and spawn all MBs ----------
+
+// Launch the 1st round of child threads for Vertical ILDB
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignature:w			0x3333:w
+#endif
+
+//=====================================================================
+// Jump Table 1
+	// 0 0 ---> Goto ALL_SPAWNED
+	// 0 1 ---> Goto ALL_SPAWNED
+	// 1 0 ---> Goto SLEEP_ENTRY
+	// 1 1 ---> Goto POST_SLEEP
+	mov (2)		JumpTable.0<1>:d	0:d				{ NoDDClr }
+#if defined(CHROMA_ROOT) 		
+	mov (1)		JumpTable.2:d	SLEEP_ENTRY_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d		{ NoDDClr, NoDDChk }
+	mov (1)		JumpTable.3:d	POST_SLEEP_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d		{ NoDDChk }
+#else
+	mov (1)		JumpTable.2:d	SLEEP_ENTRY_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d		{ NoDDClr, NoDDChk }
+	mov (1)		JumpTable.3:d	POST_SLEEP_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d		{ NoDDChk }
+#endif
+//=====================================================================
+
+	mov (2)		f0.0<1>:w		0:w
+
+	// Get m0 most of fields ready for URB write
+	mov	(8)			MRF0<1>:ud		MSGSRC.0<8;8,1>:ud
+
+	// Add child kernel offset
+	add (1) 	CT_R0Hdr.2:ud 			r0.2:ud 		CHILD_OFFSET:w
+
+	// Init
+	mov (1)		Col_Boundary:w			2:w
+	mov (1)		Row_Boundary:w			LastRow:w
+	mov (1)		TopRowForScan:w			0:w
+	mov (2)		OutstandingThreads<1>:w	0:w
+
+	// Init Scoreboard  (idle = 0x00FF, busy = 0x0000)
+	// Low word is saved col.  High word is busy/idle status
+	mov	(16)		GatewayAperture(0)<1>	0x00FF00FF:ud		// Init r6-r7
+	mov	(16)		GatewayAperture(2)<1>	0x00FF00FF:ud		// Init r8-r9
+	mov	(16)		GatewayAperture(4)<1>	0x00FF00FF:ud		// Init r10-r11
+	mov	(16)		GatewayAperture(6)<1>	0x00FF00FF:ud		// Init r12-r13
+	mov	(16)		GatewayAperture(8)<1>	0x00FF00FF:ud		// Init r14-r15
+
+	mul	(1)	 		StatusAddr:w		CurRow:w		4:w		// dword to bytes offset conversion
+
+	//=====================================================================
+
+//SPAWN_LOOP:
+	//===== OutstandingThreads < ThreadLimit ? ============================
+	cmp.l.f0.1 (1)	null:w		OutstandingThreads:w	ThreadLimit:w		// Check the thread limit
+#if defined(CHROMA_ROOT) 
+    (f0.1) jmpi		ILDB_LABEL(POST_SLEEP_UV)
+#else	// LUMA_ROOT
+    (f0.1) jmpi		ILDB_LABEL(POST_SLEEP_Y)
+#endif
+
+#if defined(CHROMA_ROOT) 
+ILDB_LABEL(SLEEP_ENTRY_UV):
+#else	// LUMA_ROOT
+ILDB_LABEL(SLEEP_ENTRY_Y):
+#endif
+    //===== Goto Sleep ====================================================
+    // Either reached max thread limit or no child thread can be spawned due to dependency.
+	add	(1)			OutstandingThreads:w	OutstandingThreads:w	-1:w // Do this before wait is faster
+	wait 			n0.0:d												
+
+#if defined(CHROMA_ROOT) 
+ILDB_LABEL(POST_SLEEP_UV):
+#else	// LUMA_ROOT
+ILDB_LABEL(POST_SLEEP_Y):
+#endif
+	//===== Luma Status[CurRow] == busy ? =====
+	cmp.z.f0.0 (1)	null:uw		r[StatusAddr, GatewayApertureB+ScoreBd_Idx]:uw		0:uw			// Check west neighbor
+	cmp.g.f0.1 (1)	null:w		CurCol:w		LastCol:w		// Check if the curCol > LastCol
+
+#if defined(CHROMA_ROOT) 
+	mov	(16)		acc0.0<1>:w		URBOffsetUVBase<0;1,0>:w			// Add offset to UV base (MBsCntY * URB_EBTRIES_PER_MB)
+	mac (1)			URBOffset:w		CurRow:w			4:w				// 4 entries per row
+#else
+	mul	(1)			URBOffset:w		CurRow:w			4:w				// 4 entries per row
+#endif
+
+#if defined(CHROMA_ROOT) 
+	(f0.0) jmpi		ILDB_LABEL(SLEEP_ENTRY_UV)								// Current row has a child thread running, can not spawn a new child thread, go back to sleep
+	(f0.1) jmpi		ILDB_LABEL(NEXT_MB_UV)									// skip MB if the curCol > LastCol 
+#else	// LUMA_ROOT
+	(f0.0) jmpi		ILDB_LABEL(SLEEP_ENTRY_Y)								// Current row has a child thread running, can not spawn a new child thread, go back to sleep
+	(f0.1) jmpi		ILDB_LABEL(NEXT_MB_Y)									// skip MB if the curCol > LastCol 
+#endif
+		
+	//========== Spwan a child thread ========================================
+	// Save cur col and set Status[CurRow] to busy
+	mov (2)			r[StatusAddr, GatewayApertureB]<1>:uw		CurColB<2;2,1>:ub		// Store the new col
+			
+	// Increase OutstandingThreads and ProcessedMBs by 1
+	add	(2)			OutstandingThreads<1>:w		OutstandingThreads<2;2,1>:w		1:w  
+
+	#include "AVC_ILDB_SpawnChild.asm"
+
+	//===== Find next MB ===================================================
+#if defined(CHROMA_ROOT) 
+ILDB_LABEL(NEXT_MB_UV):
+#else	// LUMA_ROOT
+ILDB_LABEL(NEXT_MB_Y):
+#endif
+	// Check pic boundary, results are in f0.0 bit0 and bit1
+	cmp.ge.f0.0	(2)	null<1>:w   CurCol<2;2,1>:w 	Col_Boundary<2;2,1>:w
+
+	// Update TopRowForScan if the curCol = LastCol
+	(f0.1) add (1)	TopRowForScan:w		CurRow:w		1:w	
+
+//	cmp.l.f0.1 (1)	null<1>:w		ProcessedMBs:w		TotalBlocks:w		// Processed all blocks ?
+	// 2 sets compare
+	// ProcessedMBs:w < TotalBlocks:w		OutstandingThreads:w < ThreadLimit:wProcessedMBs:w
+	// 0 0 ---> Goto ALL_SPAWNED
+	// 0 1 ---> Goto ALL_SPAWNED
+	// 1 0 ---> Goto SLEEP_ENTRY
+	// 1 1 ---> Goto POST_SLEEP
+	cmp.l.f0.1 (2)	null<1>:w		OutstandingThreads<2;2,1>:w	ThreadLimit<2;2,1>:w
+
+	// Just do it in stalled cycles
+	mov (1)		acc0.0:w		4:w
+	mac	(1)	 	StatusAddr:w		CurRow:w		4:w						// dword to bytes offset conversion	
+	add (2)		CurCol<1>:w		CurCol<2;2,1>:w		StepToNextMB<2;2,1>:b	// CurCol -= 2 and CurRow += 1
+		
+	// Set f0.0 if turning around is needed, assuming bit 15 - 2 are zeros for correct comparison.
+	cmp.nz.f0.0 (1)	null<1>:w	f0.0:w		0x01:w
+		
+	mul (1) 	JumpAddr:w		f0.1:w		4:w		// byte offet in dword count
+		
+	// The next MB is at the row TopRowForScan
+	(f0.0) mul (1)	 	StatusAddr:w	TopRowForScan:w		4:w				// dword to bytes offset conversion
+	(f0.0) mov (1)		CurRow:w		TopRowForScan:w								{ NoDDClr }	// Restart from the top row that has MBs not deblocked yet.
+	(f0.0) add (1)		CurCol:w		r[StatusAddr, GatewayApertureB]:uw		1:w		{ NoDDChk }
+	
+	//===== Processed all blocks ? =========================================
+	// (f0.1) jmpi		SPAWN_LOOP
+
+	jmpi	r[JumpAddr, JUMPTABLE_BASE]:d
+//JUMP_BASE:
+
+	//======================================================================
+
+	// All MB are spawned at this point, check for outstanding thread count
+#if defined(CHROMA_ROOT) 
+ILDB_LABEL(ALL_SPAWNED_UV):
+#else	// LUMA_ROOT
+ILDB_LABEL(ALL_SPAWNED_Y):
+#endif
+	cmp.e.f0.1 (1) 	null:w		OutstandingThreads:w		0:w			// Check before goto sleep
+#if defined(CHROMA_ROOT) 
+	(f0.1) jmpi		ILDB_LABEL(ALL_DONE_UV)
+#else	// LUMA_ROOT
+	(f0.1) jmpi		ILDB_LABEL(ALL_DONE_Y)
+#endif
+	
+	wait 			n0.0:d												// Wake up by a finished child thread
+	add	(1)			OutstandingThreads:w	OutstandingThreads:w	-1:w
+
+#if defined(CHROMA_ROOT) 
+	// One thread is free and give it to luma thread limit --- Increase luma thread limit by one.
+	#include "AVC_ILDB_LumaThrdLimit.asm"
+#endif
+
+#if defined(CHROMA_ROOT) 
+    jmpi			ILDB_LABEL(ALL_SPAWNED_UV)							// Waked up and goto dependency check
+#else	// LUMA_ROOT
+    jmpi			ILDB_LABEL(ALL_SPAWNED_Y)							// Waked up and goto dependency check
+#endif
+
+	// All child threads are finsihed at this point 
+#if defined(CHROMA_ROOT) 
+ILDB_LABEL(ALL_DONE_UV):
+#else	// LUMA_ROOT
+ILDB_LABEL(ALL_DONE_Y):
+#endif
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_h.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_h.asm
@@ -0,0 +1,253 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+////////// AVC ILDB filter horizontal Mbaff UV ///////////////////////////////////////////////////////
+//
+//	This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of UV.
+//
+//	It sssumes the data for horizontal de-blocking is already transposed.  
+//
+//		Chroma:
+//
+//		+-------+-------+		H0 Edge
+//		|		|		|
+//		|		|		|
+//		|		|		|
+//		+-------+-------+		H1 Edge
+//		|		|		|
+//		|		|		|
+//		|		|		|
+//		+-------+-------+
+//
+/////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xBBBC:w
+#endif	
+
+//=============== Chroma deblocking ================
+
+//---------- Deblock UV external top edge ----------
+
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterTopMbEdgeFlag:w		// Check for FilterTopMbEdgeFlag 
+
+	mov	(1)	f0.1:w		DualFieldMode:w		// Check for dual field mode
+
+	// Get Luma maskA and maskB	
+	shr (16)	TempRow0(0)<1>		r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<0;1,0>:uw		RRampW(0)
+	shr (16)	TempRow1(0)<1>		r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz0]<0;1,0>:uw		RRampW(0)
+
+    (f0.0)	jmpi	H0_UV_DONE				// Skip H0 UV edge
+
+	(f0.1) jmpi DUAL_FIELD_UV
+
+	// Non dual field mode	
+
+	// Extract UV MaskA and MaskB from every other bit of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+	and.nz.f0.1 (8) null:w			TempRow1(0)<16;8,2>		1:w
+
+	// Ext U
+	//	p1 = Prev MB U row 0
+	//	p0 = Prev MB U row 1
+	// 	q0 = Cur MB U row 0
+	//	q1 = Cur MB U row 1
+	mov (1)	P_AddrReg:w		PREV_MB_U_BASE:w	{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_U_BASE:w		{ NoDDChk }
+	
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaTop0_Cb]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaTop0_Cb]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h00_0_Cb]<1;2,0>:ub
+
+	// Store UV MaskA and MaskB
+	mov (2)		MaskA<1>:uw			f0.0<2;2,1>:uw
+
+	CALL(FILTER_UV_MBAFF, 1)	
+
+	// Ext V
+	mov (1)	P_AddrReg:w		PREV_MB_V_BASE:w	{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_V_BASE:w		{ NoDDChk }
+
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaTop0_Cr]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaTop0_Cr]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h00_0_Cr]<1;2,0>:ub
+
+	// Set UV MaskA and MaskB
+	mov (2)		f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	CALL(FILTER_UV_MBAFF, 1)	
+
+	jmpi H0_UV_DONE	
+	
+DUAL_FIELD_UV:
+	// Dual field mode, FieldModeCurrentMbFlag=0 && FieldModeAboveMbFlag=1
+
+	//===== Ext U, Top field
+
+	// Extract UV MaskA and MaskB from every other bit of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+	and.nz.f0.1 (8) null:w			TempRow1(0)<16;8,2>		1:w
+
+	mov (1)	P_AddrReg:w		ABOVE_CUR_MB_BASE:w			{ NoDDClr }
+	mov (1)	Q_AddrReg:w		ABOVE_CUR_MB_BASE+32:w		{ NoDDChk }
+
+	mov (16) ABOVE_CUR_MB_UW(0)<1>	PREV_MB_UW(0, 0)<16;8,1>	// Copy p1, p0
+	mov (16) ABOVE_CUR_MB_UW(1)<1>	SRC_UW(0, 0)<16;8,1>		// Copy q1, q0
+
+	//===== Ext U, top field
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaTop0_Cb]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaTop0_Cb]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h00_0_Cb]<1;2,0>:ub
+
+	// Store UV MaskA and MaskB
+	mov (2)		MaskA<1>:uw			f0.0<2;2,1>:uw
+
+	CALL(FILTER_UV_MBAFF, 1)	// Ext U, top field
+
+	//===== Ext V, top field
+	mov (1)	P_AddrReg:w		ABOVE_CUR_MB_BASE+1:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		ABOVE_CUR_MB_BASE+33:w		{ NoDDChk }
+
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaTop0_Cr]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaTop0_Cr]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h00_0_Cr]<1;2,0>:ub
+
+	// Set UV MaskA and MaskB
+	mov (2)		f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	CALL(FILTER_UV_MBAFF, 1)	// Ext U, top field
+
+	// Prefetch for bottom field
+	// Get bot field Luma maskA and maskB	
+	shr (16)	TempRow0(0)<1>		r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz1]<0;1,0>:uw		RRampW(0)
+	shr (16)	TempRow1(0)<1>		r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz1]<0;1,0>:uw		RRampW(0)
+
+	// Save deblocked top field rows
+	mov (8) PREV_MB_UW(1, 0)<1>		ABOVE_CUR_MB_UW(0, 8)	// Copy p0
+	mov (8) SRC_UW(0, 0)<1>			ABOVE_CUR_MB_UW(1, 0)	// Copy q0
+	//==========================================================================
+
+	//===== Ext U, Bot field 
+	
+	// Extract UV MaskA and MaskB from every other bit of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+	and.nz.f0.1 (8) null:w			TempRow1(0)<16;8,2>		1:w
+
+	mov (1)	P_AddrReg:w		ABOVE_CUR_MB_BASE:w			{ NoDDClr }
+	mov (1)	Q_AddrReg:w		ABOVE_CUR_MB_BASE+32:w		{ NoDDChk }
+
+	mov (16) ABOVE_CUR_MB_UW(0)<1>	PREV_MB_UW(0, 8)<16;8,1>	// Copy p1, p0
+	mov (16) ABOVE_CUR_MB_UW(1)<1>	SRC_UW(0, 8)<16;8,1>		// Copy q1, q0
+
+	//===== Ext U, bottom field
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaTop1_Cb]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaTop1_Cb]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h00_1_Cb]<1;2,0>:ub
+
+	// Store UV MaskA and MaskB
+	mov (2)		MaskA<1>:uw			f0.0<2;2,1>:uw
+
+	CALL(FILTER_UV_MBAFF, 1)	// Ext U, bottom field
+
+	//===== Ext V, bot field
+	mov (1)	P_AddrReg:w		ABOVE_CUR_MB_BASE+1:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		ABOVE_CUR_MB_BASE+33:w		{ NoDDChk }
+
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaTop1_Cr]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaTop1_Cr]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h00_1_Cr]<1;2,0>:ub
+
+	// Set UV MaskA and MaskB
+	mov (2)		f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	CALL(FILTER_UV_MBAFF, 1)	// Ext V, bottom field
+	
+	// Save deblocked bot field rows
+	mov (8) PREV_MB_UW(1, 8)<1>		ABOVE_CUR_MB_UW(0, 8)	// Copy p0
+	mov (8) SRC_UW(0, 8)<1>			ABOVE_CUR_MB_UW(1, 0)	// Copy q0
+	//========================================
+
+H0_UV_DONE:
+
+//---------- Deblock U internal horz middle edge ----------
+
+	//***** Need to take every other bit to form U maskA in core
+	shr (16)	TempRow0(0)<1>		r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]<0;1,0>:uw		RRampW(0)
+
+	//	p1 = Cur MB U row 2
+	//	p0 = Cur MB U row 3
+	// 	q0 = Cur MB U row 4
+	//	q1 = Cur MB U row 5
+	mov (1)	P_AddrReg:w		4*UV_ROW_WIDTH+SRC_MB_U_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		8*UV_ROW_WIDTH+SRC_MB_U_BASE:w		{ NoDDChk }
+
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaInternal_Cb]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaInternal_Cb]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h20_Cb]<1;2,0>:ub
+
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+
+	// Store UV MaskA and MaskB
+	mov (1)	f0.1:uw		0:w
+	mov (1)	MaskB:uw	0:w			{ NoDDClr }
+	mov (1)	MaskA:uw	f0.0:uw		{ NoDDChk }
+
+	CALL(FILTER_UV_MBAFF, 1)	
+//-----------------------------------------------
+
+
+//---------- Deblock V internal horz middle edge ----------
+
+	//	p1 = Cur MB V row 2
+	//	p0 = Cur MB V row 3
+	// 	q0 = Cur MB V row 4
+	//	q1 = Cur MB V row 5
+	mov (1)	P_AddrReg:w		4*UV_ROW_WIDTH+SRC_MB_V_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		8*UV_ROW_WIDTH+SRC_MB_V_BASE:w		{ NoDDChk }
+
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaInternal_Cr]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaInternal_Cr]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h20_Cr]<1;2,0>:ub
+
+	// Set UV MaskA and MaskB
+	mov (2)		f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	CALL(FILTER_UV_MBAFF, 1)	
+//-----------------------------------------------
+
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_v.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_UV_v.asm
@@ -0,0 +1,239 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+////////// AVC LDB filter vertical Mbaff UV ///////////////////////////////////////////////////////
+//
+//	This filter code prepares the src data and control data for ILDB filtering on all vertical edges of UV.
+//
+//	It sssumes the data for vertical de-blocking is already transposed.  
+//
+//		Chroma:
+//
+//		+-------+-------+
+//		|		|		|
+//		|		|		|
+//		|		|		|
+//		+-------+-------+
+//		|		|		|
+//		|		|		|
+//		|		|		|
+//		+-------+-------+
+//
+//		V0		V1		
+//		Edge	Edge	
+//
+/////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xBBBC:w
+#endif	
+
+//=============== Chroma deblocking ================
+
+//---------- Deblock U external left edge ----------
+
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterLeftMbEdgeFlag:w		// Check for FilterLeftMbEdgeFlag 
+
+	cmp.z.f0.1	(1)	null:w	VertEdgePattern:uw		LEFT_FIELD_CUR_FRAME:w
+
+	// Get Luma maskA and maskB	
+	shr (16)	TempRow0(0)<1>		r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<0;1,0>:uw		RRampW(0)
+	shr (16)	TempRow1(0)<1>		r[ECM_AddrReg, wEdgeCntlMapB_ExtLeftVert0]<0;1,0>:uw		RRampW(0)
+	
+    (f0.0)	jmpi	BYPASS_V0_UV	// Do not deblock Left ext edge
+
+	cmp.z.f0.0	(1)	null:w	VertEdgePattern:uw		LEFT_FRAME_CUR_FIELD:w
+
+	(-f0.1) jmpi V0_U_NEXT1	// Jump if not LEFT_FIELD_CUR_FRAME
+
+	//----- For LEFT_FIELD_CUR_FRAME
+	
+	// Extract UV MaskA and MaskB from every other 2 bits of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<4;2,1>		1:w
+	and.nz.f0.1 (8) null:w			TempRow1(0)<4;2,1>		1:w
+
+	// For FieldModeLeftMbFlag=1 && FieldModeCurrentMbFlag=0 
+	mov	(4)	Mbaff_ALPHA(0,0)<2>		r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub	{ NoDDClr }
+	mov	(4)	Mbaff_ALPHA(0,1)<2>		r[ECM_AddrReg, bAlphaLeft1_Cb]<0;1,0>:ub	{ NoDDChk }
+	mov	(4)	Mbaff_BETA(0,0)<2>		r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub		{ NoDDClr }
+	mov	(4)	Mbaff_BETA(0,1)<2>		r[ECM_AddrReg, bBetaLeft1_Cb]<0;1,0>:ub		{ NoDDChk }
+	mov (4)	Mbaff_TC0(0,0)<2>		r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub		{ NoDDClr }
+	mov (4)	Mbaff_TC0(0,1)<2>		r[ECM_AddrReg, bTc0_v00_1_Cb]<4;4,1>:ub		{ NoDDChk }
+
+	jmpi	V0_U_NEXT3
+
+V0_U_NEXT1:
+	
+	(-f0.0) jmpi V0_U_NEXT2			// Jump if not LEFT_FRAME_CUR_FIELD
+	
+	//----- For LEFT_FRAME_CUR_FIELD
+		
+	// Extract UV MaskA and MaskB from every other bit of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+	and.nz.f0.1 (8) null:w			TempRow1(0)<16;8,2>		1:w
+
+	// For FieldModeLeftMbFlag=0 && FieldModeCurrentMbFlag=1
+	mov	(4)	Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub	{ NoDDClr }
+	mov	(4)	Mbaff_ALPHA(0,4)<1>		r[ECM_AddrReg, bAlphaLeft1_Cb]<0;1,0>:ub	{ NoDDChk }
+	mov	(4)	Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub		{ NoDDClr }
+	mov	(4)	Mbaff_BETA(0,4)<1>		r[ECM_AddrReg, bBetaLeft1_Cb]<0;1,0>:ub		{ NoDDChk }
+	mov (4)	Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub		{ NoDDClr }
+	mov (4)	Mbaff_TC0(0,4)<1>		r[ECM_AddrReg, bTc0_v00_1_Cb]<4;4,1>:ub		{ NoDDChk }
+
+	jmpi	V0_U_NEXT3
+	
+V0_U_NEXT2:
+
+	// Extract UV MaskA and MaskB from every other bit of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+	and.nz.f0.1 (8) null:w			TempRow1(0)<16;8,2>		1:w
+	
+	// Both are frames or fields
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v00_0_Cb]<1;2,0>:ub
+
+V0_U_NEXT3:	
+
+	//	p1 = Prev MB U row 0
+	//	p0 = Prev MB U row 1
+	// 	q0 = Cur MB U row 0
+	//	q1 = Cur MB U row 1
+	mov (1)	P_AddrReg:w		PREV_MB_U_BASE:w	{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_U_BASE:w		{ NoDDChk }
+
+	// Store UV MaskA and MaskB
+	mov (2)		MaskA<1>:uw			f0.0<2;2,1>:uw
+
+	CALL(FILTER_UV_MBAFF, 1)	
+//-----------------------------------------------
+
+//---------- Deblock V external left edge ----------
+
+	// No change to MaskA and MaskB
+
+	cmp.z.f0.0	(4)	null:w	VertEdgePattern:uw		LEFT_FIELD_CUR_FRAME:w
+	cmp.z.f0.1	(4)	null:w	VertEdgePattern:uw		LEFT_FRAME_CUR_FIELD:w
+
+	// both are frame or field
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v00_0_Cr]<1;2,0>:ub
+				
+	//	p1 = Prev MB V row 0
+	//	p0 = Prev MB V row 1
+	// 	q0 = Cur MB V row 0
+	//	q1 = Cur MB V row 1
+	mov (1)	P_AddrReg:w		PREV_MB_V_BASE:w	{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_V_BASE:w		{ NoDDChk }
+				
+	// For FieldModeLeftMbFlag=1 && FieldModeCurrentMbFlag=0 
+	(f0.0) mov (4)	Mbaff_ALPHA(0,0)<2>		r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub	{ NoDDClr }
+	(f0.0) mov (4)	Mbaff_ALPHA(0,1)<2>		r[ECM_AddrReg, bAlphaLeft1_Cr]<0;1,0>:ub	{ NoDDChk }	
+	(f0.0) mov (4)	Mbaff_BETA(0,0)<2>		r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub		{ NoDDClr }
+	(f0.0) mov (4)	Mbaff_BETA(0,1)<2>		r[ECM_AddrReg, bBetaLeft1_Cr]<0;1,0>:ub		{ NoDDChk }
+	(f0.0) mov (4)	Mbaff_TC0(0,0)<2>		r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub		{ NoDDClr }
+	(f0.0) mov (4)	Mbaff_TC0(0,1)<2>		r[ECM_AddrReg, bTc0_v00_1_Cr]<4;4,1>:ub		{ NoDDChk }
+
+	// For FieldModeLeftMbFlag=0 && FieldModeCurrentMbFlag=1
+	(f0.1) mov (4)	Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub	{ NoDDClr }
+	(f0.1) mov (4)	Mbaff_ALPHA(0,4)<1>		r[ECM_AddrReg, bAlphaLeft1_Cr]<0;1,0>:ub	{ NoDDChk }
+	(f0.1) mov (4)	Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub		{ NoDDClr }
+	(f0.1) mov (4)	Mbaff_BETA(0,4)<1>		r[ECM_AddrReg, bBetaLeft1_Cr]<0;1,0>:ub		{ NoDDChk }
+	(f0.1) mov (4)	Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub		{ NoDDClr }
+	(f0.1) mov (4)	Mbaff_TC0(0,4)<1>		r[ECM_AddrReg, bTc0_v00_1_Cr]<4;4,1>:ub		{ NoDDChk }
+
+	// Set UV MaskA and MaskB
+	mov (2)		f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	CALL(FILTER_UV_MBAFF, 1)	
+//-----------------------------------------------
+
+BYPASS_V0_UV:
+	// Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.
+	// Same alpha and beta for all internal vert and horiz edges 
+
+//---------- Deblock U internal vert middle edge ----------
+
+	//***** Need to take every other bit to form U or V maskA
+	shr (16) TempRow0(0)<1>			r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]<0;1,0>:uw		RRampW(0)
+
+	//	p1 = Cur MB U row 2
+	//	p0 = Cur MB U row 3
+	// 	q0 = Cur MB U row 4
+	//	q1 = Cur MB U row 5
+	mov (1)	P_AddrReg:w		4*UV_ROW_WIDTH+SRC_MB_U_BASE:w		{ NoDDClr }		// Skip 2 U rows and 2 V rows
+	mov (1)	Q_AddrReg:w		8*UV_ROW_WIDTH+SRC_MB_U_BASE:w		{ NoDDChk }
+
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaInternal_Cb]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaInternal_Cb]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v02_Cb]<1;2,0>:ub
+
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+
+	// Store MaskA and MaskB
+	mov (1)	f0.1:uw		0:w			
+	mov (1)	MaskB:uw	0:w			{ NoDDClr }
+	mov (1)	MaskA:uw	f0.0:uw		{ NoDDChk }
+
+	CALL(FILTER_UV_MBAFF, 1)	
+	
+//-----------------------------------------------
+
+
+//---------- Deblock V internal vert middle edge ----------
+
+	//	P1 = Cur MB V row 2
+	//	P0 = Cur MB V row 3
+	// 	Q0 = Cur MB V row 4
+	//	Q1 = Cur MB V row 5
+	mov (1)	P_AddrReg:w		4*UV_ROW_WIDTH+SRC_MB_V_BASE:w		{ NoDDClr }		// Skip 2 U rows and 2 V rows
+	mov (1)	Q_AddrReg:w		8*UV_ROW_WIDTH+SRC_MB_V_BASE:w		{ NoDDChk }
+
+	// Put MaskA into f0.0
+	// Put MaskB into f0.1
+	mov (2)	f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	mov	(8) Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaInternal_Cr]<0;1,0>:ub
+	mov	(8) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaInternal_Cr]<0;1,0>:ub
+	mov (8) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v02_Cr]<1;2,0>:ub
+
+	CALL(FILTER_UV_MBAFF, 1)	
+
+//-----------------------------------------------
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_h.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_h.asm
@@ -0,0 +1,264 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+////////// AVC ILDB filter horizontal Mbaff Y ///////////////////////////////////////////////////////
+//
+//	This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of Y.
+//
+//	It sssumes the data for horizontal de-blocking is already transposed.  
+//
+//		Luma:
+//
+//		+-------+-------+-------+-------+		H0  Edge
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+		H1 Edge
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+		H2	Edge
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+		H3 Edge
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//
+/////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xBBBB:w
+#endif	
+	
+
+//========== Luma deblocking ==========
+
+
+//---------- Deblock Y external top edge (H0)  ----------	
+
+	// Bypass deblocking if it is the top edge of the picture.  
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterTopMbEdgeFlag:w		// Check for FilterTopMbEdgeFlag 
+	mov	(1)	f0.1:w		DualFieldMode:w			// Check for dual field mode
+		
+	// Non dual field mode	
+
+	// Get (alpha >> 2) + 2
+	shr (16) Mbaff_ALPHA2(0,0)<1>	r[ECM_AddrReg, bAlphaTop0_Y]<0;1,0>:ub		2:w			// alpha >> 2
+
+	mov (2)	MaskA<1>:uw	r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw
+
+	// Ext Y
+	mov	(16) Mbaff_ALPHA(0,0)<1>	r[ECM_AddrReg, bAlphaTop0_Y]<0;1,0>:ub
+	mov	(16) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaTop0_Y]<0;1,0>:ub
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h00_0_Y]<1;4,0>:ub
+
+	add (16) Mbaff_ALPHA2(0,0)<1>		Mbaff_ALPHA2(0,0)<16;16,1>		2:w					// alpha2 = (alpha >> 2) + 2  
+
+    (f0.0) jmpi	H0_Y_DONE				// Skip Ext Y deblocking
+	(f0.1) jmpi	DUAL_FIELD_Y
+	
+	mov (1)	P_AddrReg:w		PREV_MB_Y_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_Y_BASE:w			{ NoDDChk }
+	
+	CALL(FILTER_Y_MBAFF, 1)			// Non dual field deblocking
+		
+	jmpi	H0_Y_DONE
+
+DUAL_FIELD_Y:
+	// Dual field mode, FieldModeCurrentMbFlag=0 && FieldModeAboveMbFlag=1
+
+	mov (1)	P_AddrReg:w		ABOVE_CUR_MB_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		ABOVE_CUR_MB_BASE+64:w	{ NoDDChk }
+
+	//  Must use PREV_MB_YW.  TOP_MB_YW is not big enough.
+	// Get top field rows
+	mov (16) ABOVE_CUR_MB_YW(0)<1>	PREV_MB_YW(0, 0)<16;8,1>	// Copy p3, p2
+	mov (16) ABOVE_CUR_MB_YW(1)<1>	PREV_MB_YW(2, 0)<16;8,1>	// Copy p1, p0
+	mov (16) ABOVE_CUR_MB_YW(2)<1>	SRC_YW(0, 0)<16;8,1>		// Copy q0, q1
+	mov (16) ABOVE_CUR_MB_YW(3)<1>	SRC_YW(2, 0)<16;8,1>		// Copy q2, q3
+
+	CALL(FILTER_Y_MBAFF, 1)				// Ext Y, top field
+
+	// Save deblocked top field rows
+	mov (8) PREV_MB_YW(1, 0)<1>		ABOVE_CUR_MB_YW(0, 8)	// Copy p2
+	mov (8) PREV_MB_YW(2, 0)<1>		ABOVE_CUR_MB_YW(1, 0)	// Copy p1
+	mov (8) PREV_MB_YW(3, 0)<1>		ABOVE_CUR_MB_YW(1, 8)	// Copy p0
+	mov (8) SRC_YW(0, 0)<1>			ABOVE_CUR_MB_YW(2, 0)	// Copy q0
+	mov (8) SRC_YW(1, 0)<1>			ABOVE_CUR_MB_YW(2, 8)	// Copy q1
+	mov (8) SRC_YW(2, 0)<1>			ABOVE_CUR_MB_YW(3, 0)	// Copy q2
+
+	//==================================================================================
+	// Bottom field
+	
+	// Get (alpha >> 2) + 2
+	shr (16) Mbaff_ALPHA2(0,0)<1>	r[ECM_AddrReg, bAlphaTop1_Y]<0;1,0>:ub		2:w			// alpha >> 2
+
+	mov (1)	P_AddrReg:w		ABOVE_CUR_MB_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		ABOVE_CUR_MB_BASE+64:w	{ NoDDChk }
+	
+	// Get bot field rows
+	mov (16) ABOVE_CUR_MB_YW(0)<1>	PREV_MB_YW(0, 8)<16;8,1>	// Copy p3, p2
+	mov (16) ABOVE_CUR_MB_YW(1)<1>	PREV_MB_YW(2, 8)<16;8,1>	// Copy p1, p0
+	mov (16) ABOVE_CUR_MB_YW(2)<1>	SRC_YW(0, 8)<16;8,1>		// Copy q0, q1
+	mov (16) ABOVE_CUR_MB_YW(3)<1>	SRC_YW(2, 8)<16;8,1>		// Copy q2, q3
+
+	mov (2)	MaskA<1>:uw	r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz1]<2;2,1>:uw
+
+	mov	(16) Mbaff_ALPHA(0,0)<1>	r[ECM_AddrReg, bAlphaTop1_Y]<0;1,0>:ub
+	mov	(16) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaTop1_Y]<0;1,0>:ub
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h00_1_Y]<1;4,0>:ub
+
+	add (16) Mbaff_ALPHA2(0,0)<1>		Mbaff_ALPHA2(0,0)<16;16,1>		2:w					// alpha2 = (alpha >> 2) + 2  
+
+	CALL(FILTER_Y_MBAFF, 1)				// Ext Y, bot field
+
+	// Save deblocked top field rows
+	mov (8) PREV_MB_YW(1, 8)<1>		ABOVE_CUR_MB_YW(0, 8)	// Copy p2
+	mov (8) PREV_MB_YW(2, 8)<1>		ABOVE_CUR_MB_YW(1, 0)	// Copy p1
+	mov (8) PREV_MB_YW(3, 8)<1>		ABOVE_CUR_MB_YW(1, 8)	// Copy p0
+	mov (8) SRC_YW(0, 8)<1>			ABOVE_CUR_MB_YW(2, 0)	// Copy q0
+	mov (8) SRC_YW(1, 8)<1>			ABOVE_CUR_MB_YW(2, 8)	// Copy q1
+	mov (8) SRC_YW(2, 8)<1>			ABOVE_CUR_MB_YW(3, 0)	// Copy q2
+	//==================================================================================
+
+H0_Y_DONE:
+
+//BYPASS_H0_Y:
+//------------------------------------------------------------------
+	// Same alpha, alpha2, beta and MaskB for all internal edges 
+
+	// Get (alpha >> 2) + 2
+	shr (16) Mbaff_ALPHA2(0,0)<1>	r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub		2:w			// alpha >> 2
+
+	// alpha = bAlphaInternal_Y 
+	// beta = bBetaInternal_Y
+	mov	(16) Mbaff_ALPHA(0,0)<1>	r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub
+	mov	(16) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaInternal_Y]<0;1,0>:ub
+
+	mov (1) MaskB:uw	0:w						// Set MaskB = 0 for all 3 edges, so it always uses bS < 4 algorithm.
+
+	add (16) Mbaff_ALPHA2(0,0)<1>		Mbaff_ALPHA2(0,0)<16;16,1>		2:w					// alpha2 = (alpha >> 2) + 2  
+
+//---------- Deblock Y internal top edge (H1)  ----------
+
+	// Bypass deblocking if FilterInternal4x4EdgesFlag = 0  
+	and.z.f0.0 (1) null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal4x4EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+//    (f0.0)	jmpi	BYPASS_H1_Y
+
+	//	p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1> 
+	//	p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		SRC_MB_Y_BASE:w					{ NoDDClr }
+	mov (1)	Q_AddrReg:w		4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w	{ NoDDChk }
+
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntTopHorz]:uw
+
+	// tc0 has bTc0_h13_Y + bTc0_h12_Y + bTc0_h11_Y + bTc0_h10_Y		
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h10_Y]<1;4,0>:ub
+
+//	CALL(FILTER_Y_MBAFF, 1)
+	PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
+
+//BYPASS_H1_Y:
+//------------------------------------------------------------------
+
+
+//---------- Deblock Y internal mid horizontal edge (H2) ----------
+
+	// Bypass deblocking if FilterInternal8x8EdgesFlag = 0  
+	and.z.f0.0 (1) null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal8x8EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+//    (f0.0)	jmpi	BYPASS_H2_Y
+
+	//	p3 = Cur MB Y row 4  = r[P_AddrReg, 0]<16;16,1> 
+	//	p2 = Cur MB Y row 5  = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 6  = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 7  = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 8  = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 9  = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDChk }
+
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw
+
+	// tc0 has bTc0_h23_Y + bTc0_h22_Y + bTc0_h21_Y + bTc0_h20_Y		
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h20_Y]<1;4,0>:ub
+
+//	CALL(FILTER_Y_MBAFF, 1)
+	PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
+
+//BYPASS_H2_Y:
+//-----------------------------------------------
+
+
+//---------- Deblock Y internal bottom edge (H3) ----------	 
+
+	// Bypass deblocking if FilterInternal4x4EdgesFlag = 0  
+	and.z.f0.0 (1) null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal4x4EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+//    (f0.0)	jmpi	BYPASS_H3_Y
+
+	//	p3 = Cur MB Y row 8  = r[P_AddrReg, 0]<16;16,1> 
+	//	p2 = Cur MB Y row 9  = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDChk }
+	
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw
+
+	// tc0 has bTc0_h33_Y + bTc0_h32_Y + bTc0_h31_Y + bTc0_h30_Y
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_h30_Y]<1;4,0>:ub
+
+//	CALL(FILTER_Y_MBAFF, 1)
+	PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
+
+//BYPASS_H3_Y:
+//-----------------------------------------------
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_v.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Mbaff_Y_v.asm
@@ -0,0 +1,299 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+////////// AVC ILDB filter vertical Mbaff Y ///////////////////////////////////////////////////////
+//
+//	This filter code prepares the src data and control data for ILDB filtering on all vertical edges of Y.
+//
+//	It sssumes the data for vertical de-blocking is already transposed.  
+//
+//		Luma:
+//
+//		+-------+-------+-------+-------+
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//
+//		V0		V1		V2		V3
+//		Edge	Edge	Edge	Edge
+//
+/////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xBBBB:w
+#endif	
+	
+
+//========== Luma deblocking ==========
+
+
+//---------- Deblock Y external left edge (V0) ----------	
+
+	cmp.z.f0.0	(8)	null:w	VertEdgePattern:uw		LEFT_FIELD_CUR_FRAME:w
+	cmp.z.f0.1	(8)	null:w	VertEdgePattern:uw		LEFT_FRAME_CUR_FIELD:w
+
+	// Intial set for both are frame or field
+	mov	(16) Mbaff_ALPHA(0,0)<1>	r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub
+	mov	(16) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v00_0_Y]<1;4,0>:ub
+		
+	// For FieldModeCurrentMbFlag=1 && FieldModeLeftMbFlag=0
+	(f0.0) mov (8)	Mbaff_ALPHA(0,0)<2>		r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub		{ NoDDClr }
+	(f0.0) mov (8)	Mbaff_ALPHA(0,1)<2>		r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub		{ NoDDChk }
+	(f0.0) mov (8)	Mbaff_BETA(0,0)<2>		r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub		{ NoDDClr }
+	(f0.0) mov (8)	Mbaff_BETA(0,1)<2>		r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub		{ NoDDChk }
+	(f0.0) mov (8)	Mbaff_TC0(0,0)<2>		r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub		{ NoDDClr }
+	(f0.0) mov (8)	Mbaff_TC0(0,1)<2>		r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub		{ NoDDChk }
+
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterLeftMbEdgeFlag:w		// Check for FilterLeftMbEdgeFlag 
+
+	// For FieldModeCurrentMbFlag=0 && FieldModeLeftMbFlag=1
+	(f0.1) mov (8)	Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub		{ NoDDClr }
+	(f0.1) mov (8)	Mbaff_ALPHA(0,8)<1>		r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub		{ NoDDChk }
+	(f0.1) mov (8)	Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub		{ NoDDClr }
+	(f0.1) mov (8)	Mbaff_BETA(0,8)<1>		r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub		{ NoDDChk }
+	(f0.1) mov (8)	Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub		{ NoDDClr }
+	(f0.1) mov (8)	Mbaff_TC0(0,8)<1>		r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub		{ NoDDChk }
+
+	// Get (alpha >> 2) + 2
+	shr (16) Mbaff_ALPHA2(0,0)<1>	Mbaff_ALPHA(0)		2:w			// alpha >> 2
+
+	//	p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
+	//	p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 0  = r[Q_AddrReg, 0]<16;16,1>
+	//	q1 = Cur MB Y row 1  = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 2  = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 3  = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		PREV_MB_Y_BASE:w	{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_Y_BASE:w		{ NoDDChk }
+
+	// Set MaskA and MaskB	
+	mov (2)	MaskA<1>:uw		r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<2;2,1>:uw
+
+	add (16) Mbaff_ALPHA2(0,0)<1>		Mbaff_ALPHA2(0,0)<16;16,1>		2:w					// alpha2 = (alpha >> 2) + 2  
+
+//	CALL(FILTER_Y_MBAFF, 1)
+	PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
+	
+//BYPASS_V0_Y:
+//------------------------------------------------------------------
+
+
+/*
+//---------- Deblock Y external left edge (V0) ----------	
+
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterLeftMbEdgeFlag:w		// Check for FilterLeftMbEdgeFlag 
+    (f0.0)	jmpi	ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_Y)	
+
+	// Get vertical border edge control data  
+
+//	mov	(1)	f0.0		0:w
+	and	(1)	CTemp1_W:uw		r[ECM_AddrReg, BitFlags]:ub		FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
+	cmp.z.f0.0	(1)	null:w	CTemp1_W:uw		LEFT_FIELD_CUR_FRAME:w
+	(-f0.0) jmpi LEFT_EDGE_Y_NEXT1
+
+	// For FieldModeCurrentMbFlag=1 && FieldModeLeftMbFlag=0
+	mov	(8)	Mbaff_ALPHA(0,0)<2>		r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub		{ NoDDClr }
+	mov	(8)	Mbaff_ALPHA(0,1)<2>		r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub		{ NoDDChk }
+	mov	(8)	Mbaff_BETA(0,0)<2>		r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub		{ NoDDClr }
+	mov	(8)	Mbaff_BETA(0,1)<2>		r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub		{ NoDDChk }
+	mov (8)	Mbaff_TC0(0,0)<2>		r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub		{ NoDDClr }
+	mov (8)	Mbaff_TC0(0,1)<2>		r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub		{ NoDDChk }
+
+	jmpi	LEFT_EDGE_Y_ALPHA_BETA_TC0_SELECTED
+
+LEFT_EDGE_Y_NEXT1:
+	cmp.z.f0.0	(1)	null:w	CTemp1_W:uw		LEFT_FRAME_CUR_FIELD:w
+	(-f0.0) jmpi LEFT_EDGE_Y_NEXT2
+
+
+	// For FieldModeCurrentMbFlag=0 && FieldModeLeftMbFlag=1
+	mov	(8)	Mbaff_ALPHA(0,0)<1>		r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub		{ NoDDClr }
+	mov	(8)	Mbaff_ALPHA(0,8)<1>		r[ECM_AddrReg, bAlphaLeft1_Y]<0;1,0>:ub		{ NoDDChk }
+	mov	(8)	Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub		{ NoDDClr }
+	mov	(8)	Mbaff_BETA(0,8)<1>		r[ECM_AddrReg, bBetaLeft1_Y]<0;1,0>:ub		{ NoDDChk }
+	mov (8)	Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v00_0_Y]<1;2,0>:ub		{ NoDDClr }
+	mov (8)	Mbaff_TC0(0,8)<1>		r[ECM_AddrReg, bTc0_v00_1_Y]<1;2,0>:ub		{ NoDDChk }
+
+	jmpi	LEFT_EDGE_Y_ALPHA_BETA_TC0_SELECTED
+	
+LEFT_EDGE_Y_NEXT2:
+	// both are frame or field
+	mov	(16) Mbaff_ALPHA(0,0)<1>	r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub
+	mov	(16) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaLeft0_Y]<0;1,0>:ub
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v00_0_Y]<1;4,0>:ub
+
+LEFT_EDGE_Y_ALPHA_BETA_TC0_SELECTED:
+
+	mov (2)	MaskA<1>:uw		r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<2;2,1>:uw
+
+	//	p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
+	//	p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 0  = r[Q_AddrReg, 0]<16;16,1>
+	//	q1 = Cur MB Y row 1  = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 2  = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 3  = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		PREV_MB_Y_BASE:w	{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_Y_BASE:w		{ NoDDChk }
+	
+	// Get (alpha >> 2) + 2
+	shr (16) Mbaff_ALPHA2(0,0)<1>	r[ECM_AddrReg, bAlphaLeft0_Y]<0;1,0>:ub		2:w			// alpha >> 2
+	add (16) Mbaff_ALPHA2(0,0)<1>		Mbaff_ALPHA2(0,0)<16;16,1>		2:w					// alpha2 = (alpha >> 2) + 2  
+	
+	CALL(FILTER_Y_MBAFF, 1)
+
+ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_Y):
+//------------------------------------------------------------------
+*/
+
+	// Same alpha, alpha2, beta and MaskB for all internal edges 
+	
+	// Get (alpha >> 2) + 2
+	shr (16) Mbaff_ALPHA2(0,0)<1>	r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub		2:w			// alpha >> 2
+	
+	// alpha = bAlphaInternal_Y
+	// beta = bBetaInternal_Y
+	mov	(16) Mbaff_ALPHA(0,0)<1>	r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub
+	mov	(16) Mbaff_BETA(0,0)<1>		r[ECM_AddrReg, bBetaInternal_Y]<0;1,0>:ub
+
+	mov (1) MaskB:uw	0:w						// Set MaskB = 0 for all 3 edges, so it always uses bS < 4 algorithm.
+
+	add (16) Mbaff_ALPHA2(0,0)<1>		Mbaff_ALPHA2(0,0)<16;16,1>		2:w						// alpha2 = (alpha >> 2) + 2  
+
+//---------- Deblock Y internal left edge (V1) ----------
+
+	// Bypass deblocking if FilterInternal4x4EdgesFlag = 0  
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterInternal4x4EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+//    (f0.0)	jmpi	BYPASS_V1_Y
+
+	//	p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1>  
+	//	p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		SRC_MB_Y_BASE:w						{ NoDDClr }
+	mov (1)	Q_AddrReg:w		4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDChk }
+
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]:uw
+
+	// tc0 has bTc0_v31_Y + bTc0_v21_Y + bTc0_v11_Y + bTc0_v01_Y	
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v01_Y]<1;4,0>:ub
+
+//	CALL(FILTER_Y_MBAFF, 1)
+	PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
+
+BYPASS_V1_Y:
+//------------------------------------------------------------------
+
+
+//---------- Deblock Y internal mid vert edge (V2) ----------
+
+	// Bypass deblocking if FilterInternal8x8EdgesFlag = 0  
+	and.z.f0.0	(1)	null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal8x8EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+//    (f0.0)	jmpi	BYPASS_V2_Y
+
+	//	p3 = Cur MB Y row 4  = r[P_AddrReg, 0]<16;16,1>  
+	//	p2 = Cur MB Y row 5  = r[P_AddrReg, 16]<16;16,1> 
+	//	p1 = Cur MB Y row 6  = r[P_AddrReg, 32]<16;16,1> 
+	//	p0 = Cur MB Y row 7  = r[P_AddrReg, 48]<16;16,1> 
+	// 	q0 = Cur MB Y row 8  = r[Q_AddrReg, 0]<16;16,1>  
+	//	q1 = Cur MB Y row 9  = r[Q_AddrReg, 16]<16;16,1> 
+	//	q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1> 
+	//	q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1> 
+	mov (1)	P_AddrReg:w		4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDChk }
+
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]:uw
+
+	// tc0 has bTc0_v32_Y + bTc0_v22_Y + bTc0_v12_Y + bTc0_v02_Y	
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v02_Y]<1;4,0>:ub
+
+//	CALL(FILTER_Y_MBAFF, 1)
+	PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
+
+BYPASS_V2_Y:
+//-----------------------------------------------
+
+
+//---------- Deblock Y interal right edge (V3) ----------	 
+
+	// Bypass deblocking if FilterInternal4x4EdgesFlag = 0  
+	and.z.f0.0	(1)	null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal4x4EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+//    (f0.0)	jmpi	BYPASS_V3_Y
+
+	//	p3 = Cur MB Y row 8  = r[P_AddrReg, 0]<16;16,1> 
+	//	p2 = Cur MB Y row 9  = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDChk }
+	
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntRightVert]:uw
+
+	// tc0 has bTc0_v33_Y + bTc0_v23_Y + bTc0_v13_Y + bTc0_v03_Y
+	mov (16) Mbaff_TC0(0,0)<1>		r[ECM_AddrReg, bTc0_v03_Y]<1;4,0>:ub
+
+//	CALL(FILTER_Y_MBAFF, 1)
+	PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
+
+BYPASS_V3_Y:
+//-----------------------------------------------
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_UV_h.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_UV_h.asm
@@ -0,0 +1,175 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+////////// AVC ILDB filter horizontal UV ///////////////////////////////////////////////////////
+//
+//	This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of UV.
+//
+//	It sssumes the data for horizontal de-blocking is already transposed.  
+//
+//		Chroma:
+//
+//		+-------+-------+		H0 Edge
+//		|		|		|
+//		|		|		|
+//		|		|		|
+//		+-------+-------+		H1 Edge
+//		|		|		|
+//		|		|		|
+//		|		|		|
+//		+-------+-------+
+//
+/////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xBBBC:w
+#endif	
+
+//=============== Chroma deblocking ================
+
+//---------- Deblock U external top edge ----------
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterTopMbEdgeFlag:w		// Check for FilterTopMbEdgeFlag 
+//    (f0.0)	jmpi	BYPASS_EXT_TOP_EDGE_UV	
+
+	// Get horizontal border edge control data.
+	
+	//***** Need to take every other bit to form U maskA and mask B
+	// Get Luma maskA and maskB	
+	shr (16)	TempRow0(0)<1>		r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<0;1,0>:uw		RRampW(0)
+	shr (16)	TempRow1(0)<1>		r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz0]<0;1,0>:uw		RRampW(0)
+		
+    (f0.0)	jmpi	ILDB_LABEL(BYPASS_EXT_TOP_EDGE_UV)			
+
+	// Extract UV MaskA and MaskB from every other bit of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+	and.nz.f0.1 (8) null:w			TempRow1(0)<16;8,2>		1:w
+
+//---------- Deblock U external edge ----------
+	//	p1 = Prev MB U row 0
+	//	p0 = Prev MB U row 1
+	// 	q0 = Cur MB U row 0
+	//	q1 = Cur MB U row 1
+//	mov (1)	P_AddrReg:w		PREV_MB_U_BASE:w									{ NoDDClr }
+	mov (1)	P_AddrReg:w		TOP_MB_U_BASE:w										{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_U_BASE:w										{ NoDDChk }
+
+	// alpha = bAlphaTop0_Cb, beta = bBetaTop0_Cb
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaTop0_Cb]<2;2,1>:ub					{ NoDDClr } 
+	// tc0 has bTc0_h03_0_Cb + bTc0_h02_0_Cb + bTc0_h01_0_Cb + bTc0_h00_0_Cb
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_h00_0_Cb]<4;4,1>:ub					{ NoDDChk } 
+		
+	// UV MaskA and MaskB
+	mov (2)		MaskA<1>:uw			f0.0<2;2,1>:uw
+
+	CALL(FILTER_UV, 1)	
+
+//---------- Deblock V external top edge ----------
+	//	p1 = Prev MB V row 0
+	//	p0 = Prev MB V row 1
+	// 	q0 = Cur MB V row 0
+	//	q1 = Cur MB V row 1
+//	mov (1)	P_AddrReg:w		PREV_MB_V_BASE:w		{ NoDDClr }
+	mov (1)	P_AddrReg:w		TOP_MB_V_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_V_BASE:w			{ NoDDChk }
+
+	// alpha = bAlphaTop0_Cr, beta = bBetaTop0_Cr
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaTop0_Cr]<2;2,1>:ub		{ NoDDClr }
+	
+	// tc0 has bTc0_h03_0_Cr + bTc0_h02_0_Cr + bTc0_h01_0_Cr + bTc0_h00_0_Cr
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_h00_0_Cr]<4;4,1>:ub		{ NoDDChk }
+
+	// UV MaskA and MaskB
+	mov (2)		f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	CALL(FILTER_UV, 1)	
+
+ILDB_LABEL(BYPASS_EXT_TOP_EDGE_UV):
+
+	// Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.
+
+	// Bypass deblocking if FilterInternal4x4EdgesFlag = 0  
+//	and.z.f0.0 (1) null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal4x4EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+//    (f0.0)	jmpi	BYPASS_4x4_DEBLOCK_H
+
+//---------- Deblock U internal horz middle edge ----------
+
+	//***** Need to take every other bit to form U maskA
+	// Get Luma maskA and maskB	
+	shr (16)	TempRow0(0)<1>		r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]<0;1,0>:uw		RRampW(0)
+
+	//	p1 = Cur MB U row 2
+	//	p0 = Cur MB U row 3
+	// 	q0 = Cur MB U row 4
+	//	q1 = Cur MB U row 5
+	mov (1)	P_AddrReg:w		4*UV_ROW_WIDTH+SRC_MB_U_BASE:w					{ NoDDClr }		// Skip 2 U rows and 2 V rows
+	mov (1)	Q_AddrReg:w		8*UV_ROW_WIDTH+SRC_MB_U_BASE:w					{ NoDDChk }
+
+	// alpha = bAlphaInternal_Cb, beta = bBetaInternal_Cb
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaInternal_Cb]<2;2,1>:ub 		{ NoDDClr }
+	// tc0 has bTc0_h23_Cb + bTc0_h22_Cb + bTc0_h21_Cb + bTc0_h20_Cb		
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_h20_Cb]<4;4,1>:ub				{ NoDDChk }
+
+	// Extract UV MaskA and MaskB from every other bit of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+
+	// UV MaskA and MaskB
+	mov (1) f0.1:uw		0:w
+	mov (1)	MaskB:uw	0:w													{ NoDDClr }
+	mov (1)	MaskA:uw	f0.0:uw												{ NoDDChk }
+
+	CALL(FILTER_UV, 1)	
+
+//---------- Deblock V internal horz middle edge ----------
+	//	p1 = Cur MB V row 2
+	//	p0 = Cur MB V row 3
+	// 	q0 = Cur MB V row 4
+	//	q1 = Cur MB V row 5
+	mov (1)	P_AddrReg:w		4*UV_ROW_WIDTH+SRC_MB_V_BASE:w					{ NoDDClr }		// Skip 2 U rows and 2 V rows
+	mov (1)	Q_AddrReg:w		8*UV_ROW_WIDTH+SRC_MB_V_BASE:w					{ NoDDChk }
+
+	// alpha = bAlphaInternal_Cr, beta = bBetaInternal_Cr
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaInternal_Cr]<2;2,1>:ub 		{ NoDDClr }
+	// tc0 has bTc0_h23_Cr + bTc0_h22_Cr + bTc0_h21_Cr + bTc0_h20_Cr
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_h20_Cr]<4;4,1>:ub				{ NoDDChk }
+
+	// UV MaskA and MaskB
+	mov (2)		f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	CALL(FILTER_UV, 1)	
+
+//BYPASS_4x4_DEBLOCK_H:
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_UV_v.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_UV_v.asm
@@ -0,0 +1,175 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+////////// AVC LDB filter vertical UV ///////////////////////////////////////////////////////
+//
+//	This filter code prepares the src data and control data for ILDB filtering on all vertical edges of UV.
+//
+//	It sssumes the data for vertical de-blocking is already transposed.  
+//
+//		Chroma:
+//
+//		+-------+-------+
+//		|		|		|
+//		|		|		|
+//		|		|		|
+//		+-------+-------+
+//		|		|		|
+//		|		|		|
+//		|		|		|
+//		+-------+-------+
+//
+//		V0		V1		
+//		Edge	Edge	
+//
+/////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xBBBC:w
+#endif	
+
+//=============== Chroma deblocking ================
+
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterLeftMbEdgeFlag:w		// Check for FilterLeftMbEdgeFlag 
+//    (f0.0)	jmpi	BYPASS_EXT_LEFT_EDGE_UV	
+ 
+	// Get vertical border edge control data.  
+	
+	// Get Luma maskA and maskB	
+	shr (16)	TempRow0(0)<1>		r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<0;1,0>:uw		RRampW(0)
+	shr (16)	TempRow1(0)<1>		r[ECM_AddrReg, wEdgeCntlMapB_ExtLeftVert0]<0;1,0>:uw		RRampW(0)
+	
+    (f0.0)	jmpi	ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_UV)
+
+	// Extract UV MaskA and MaskB from every other bit of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+	and.nz.f0.1 (8) null:w			TempRow1(0)<16;8,2>		1:w
+
+//---------- Deblock U external edge ----------
+	//	p1 = Prev MB U row 0
+	//	p0 = Prev MB U row 1
+	// 	q0 = Cur MB U row 0
+	//	q1 = Cur MB U row 1
+	mov (1)	P_AddrReg:w		PREV_MB_U_BASE:w									{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_U_BASE:w										{ NoDDChk }
+
+	// alpha = bAlphaLeft0_Cb, beta = bBetaLeft0_Cb
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaLeft0_Cb]<2;2,1>:ub				{ NoDDClr }
+	// tc0 has bTc0_v30_0_Cb + bTc0_v20_0_Cb + bTc0_v10_0_Cb + bTc0_v00_0_Cb
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub					{ NoDDChk }
+	
+	// UV MaskA and MaskB
+	mov (2)		MaskA<1>:uw			f0.0<2;2,1>:uw
+
+	CALL(FILTER_UV, 1)	
+
+//---------- Deblock V external edge ----------
+	//	p1 = Prev MB V row 0
+	//	p0 = Prev MB V row 1
+	// 	q0 = Cur MB V row 0
+	//	q1 = Cur MB V row 1
+	mov (1)	P_AddrReg:w		PREV_MB_V_BASE:w									{ NoDDClr }		
+	mov (1)	Q_AddrReg:w		SRC_MB_V_BASE:w										{ NoDDChk }
+
+	// for vert edge: alpha = bAlphaLeft0_Cr, beta = bBetaLeft0_Cr
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaLeft0_Cr]<2;2,1>:ub				{ NoDDClr }
+	
+	// tc0 has bTc0_v30_0_Cr + bTc0_v20_0_Cr + bTc0_v10_0_Cr + bTc0_v00_0_Cr
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub					{ NoDDChk }
+
+	// UV MaskA and MaskB
+	mov (2)		f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	CALL(FILTER_UV, 1)	
+
+
+ILDB_LABEL(BYPASS_EXT_LEFT_EDGE_UV):
+	// Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.
+	// Same alpha and beta for all internal vert and horiz edges 
+
+
+	//***** Need to take every other bit to form U or V maskA
+	// Get Luma maskA and maskB	
+	shr (16)	TempRow0(0)<1>		r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]<0;1,0>:uw		RRampW(0)
+
+//---------- Deblock U internal edge ----------
+	//	p1 = Cur MB U row 2
+	//	p0 = Cur MB U row 3
+	// 	q0 = Cur MB U row 4
+	//	q1 = Cur MB U row 5
+	mov (1)	P_AddrReg:w		4*UV_ROW_WIDTH+SRC_MB_U_BASE:w					{ NoDDClr }
+	mov (1)	Q_AddrReg:w		8*UV_ROW_WIDTH+SRC_MB_U_BASE:w					{ NoDDChk }
+
+	// alpha = bAlphaInternal_Cb, beta = bBetaInternal_Cb
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaInternal_Cb]<2;2,1>:ub 		{ NoDDClr }
+
+	// tc0 has bTc0_v32_Cb + bTc0_v22_Cb + bTc0_v12_Cb + bTc0_v02_Cb	
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_v02_Cb]<4;4,1>:ub				{ NoDDChk }
+
+	// Extract UV MaskA and MaskB from every other bit of Y masks
+	and.nz.f0.0 (8) null:w			TempRow0(0)<16;8,2>		1:w
+
+	// UV MaskA and MaskB
+	mov (1) f0.1:uw		0:w
+	mov (1)	MaskB:uw	0:w													{ NoDDClr }
+	mov (1)	MaskA:uw	f0.0:uw												{ NoDDChk }
+	
+	CALL(FILTER_UV, 1)	
+
+
+//---------- Deblock V internal edge ----------
+	//	P1 = Cur MB V row 2
+	//	P0 = Cur MB V row 3
+	// 	Q0 = Cur MB V row 4
+	//	Q1 = Cur MB V row 5
+	mov (1)	P_AddrReg:w		4*UV_ROW_WIDTH+SRC_MB_V_BASE:w					{ NoDDClr }
+	mov (1)	Q_AddrReg:w		8*UV_ROW_WIDTH+SRC_MB_V_BASE:w					{ NoDDChk }
+
+	// alpha = bAlphaInternal_Cr, beta = bBetaInternal_Cr
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaInternal_Cr]<2;2,1>:ub 		{ NoDDClr }	
+
+	// tc0 has bTc0_v32_Cr + bTc0_v22_Cr + bTc0_v12_Cr + bTc0_v02_Cr	
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_v02_Cr]<4;4,1>:ub				{ NoDDChk }
+
+	// UV MaskA and MaskB
+	mov (2)		f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	CALL(FILTER_UV, 1)	
+
+
+//BYPASS_4x4_DEBLOCK_V:
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Y_h.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Y_h.asm
@@ -0,0 +1,229 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+////////// AVC ILDB filter horizontal Y ///////////////////////////////////////////////////////
+//
+//	This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of Y.
+//
+//	It sssumes the data for horizontal de-blocking is already transposed.  
+//
+//		Luma:
+//
+//		+-------+-------+-------+-------+		H0  Edge
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+		H1 Edge
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+		H2	Edge
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+		H3 Edge
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//
+/////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xBBBB:w
+#endif	
+	
+
+//========== Luma deblocking ==========
+
+
+//---------- Deblock Y external top edge (H0)  ----------	
+
+	// Bypass deblocking if it is the top edge of the picture.  
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterTopMbEdgeFlag:w		// Check for FilterTopMbEdgeFlag 
+
+//	and.z.f0.1 (1)	null:uw		r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]:uw		0xFFFF:uw	// MaskA = 0? 
+
+	// Get (alpha >> 2) + 2
+	shr (1) alpha2:w		r[ECM_AddrReg, bAlphaTop0_Y]:ub		2:w			// alpha >> 2
+
+	//	p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1> 
+	//	p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 0  = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 1  = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 2  = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 3  = r[Q_AddrReg, 48]<16;16,1>
+//	mov (1)	P_AddrReg:w		PREV_MB_Y_BASE:w		{ NoDDClr }
+	mov (1)	P_AddrReg:w		TOP_MB_Y_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_Y_BASE:w         { NoDDChk }
+	
+	// Get horizontal border edge control data
+	// alpha = bAlphaTop0_Y 
+	// beta = bBetaTop0_Y
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaTop0_Y]<2;2,1>:ub			{ NoDDClr }		// 2 channels for alpha and beta
+	
+	mov (2)	MaskA<1>:uw	r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw		{ NoDDClr, NoDDChk }
+
+	// tc0 has bTc0_h03_0_Y | bTc0_h02_0_Y | bTc0_h01_0_Y | bTc0_h00_0_Y
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_h00_0_Y]<4;4,1>:ub			{ NoDDChk }
+
+//    (f0.0)	jmpi	BYPASS_EXT_TOP_EDGE_Y	
+//	(f0.0.anyv)	 jmpi	BYPASS_EXT_TOP_EDGE_Y
+	
+	add (1) alpha2:w		alpha2:w		2:w								// alpha2 = (alpha >> 2) + 2  
+		
+//	CALL(FILTER_Y, 1)
+	PRED_CALL(-f0.0, FILTER_Y, 1)
+
+//BYPASS_EXT_TOP_EDGE_Y:
+//------------------------------------------------------------------
+	// Same alpha, alpha2, beta and MaskB for all internal edges 
+
+	// Get (alpha >> 2) + 2
+	shr (1) alpha2:w		r[ECM_AddrReg, bAlphaInternal_Y]:ub		2:w			// alpha >> 2
+
+	// alpha = bAlphaInternal_Y 
+	// beta = bBetaInternal_Y
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaInternal_Y]<2;2,1>:ub 		{ NoDDClr }
+
+	// Set MaskB = 0 for all 3 int edges, so it always uses bS < 4 algorithm.
+	mov (1) MaskB:uw	0:w						                        { NoDDChk }
+
+	add (1) alpha2:w		alpha2:w		2:w								// alpha2 = (alpha >> 2) + 2  
+		
+
+//---------- Deblock Y internal top edge (H1)  ----------
+
+	// Bypass deblocking if FilterInternal4x4EdgesFlag = 0  
+	and.z.f0.0 (1) null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal4x4EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+
+	//	p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1> 
+	//	p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		SRC_MB_Y_BASE:w					{ NoDDClr }
+	mov (1)	Q_AddrReg:w		4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w   { NoDDChk }
+
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntTopHorz]:uw		{ NoDDClr }
+	
+	// tc0 has bTc0_h13_Y + bTc0_h12_Y + bTc0_h11_Y + bTc0_h10_Y		
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_h10_Y]<4;4,1>:ub			{ NoDDChk }
+
+//    (f0.0)	jmpi	BYPASS_4x4_DEBLOCK_H
+//	(f0.0.anyv)	 jmpi	BYPASS_4x4_DEBLOCK_H
+
+//	CALL(FILTER_Y, 1)
+	PRED_CALL(-f0.0, FILTER_Y, 1)
+
+//BYPASS_4x4_DEBLOCK_H:
+//------------------------------------------------------------------
+
+
+//---------- Deblock Y internal mid horizontal edge (H2) ----------
+
+	// Bypass deblocking if FilterInternal8x8EdgesFlag = 0  
+	and.z.f0.0 (1) null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal8x8EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+
+//	and.z.f0.1 (1)	null:uw		r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw		0xFFFF:uw	// MaskA = 0? 
+
+	//	p3 = Cur MB Y row 4  = r[P_AddrReg, 0]<16;16,1> 
+	//	p2 = Cur MB Y row 5  = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 6  = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 7  = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 8  = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 9  = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w      { NoDDClr }
+	mov (1)	Q_AddrReg:w		8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w      { NoDDChk }
+
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw      { NoDDClr }
+//	mov (1) MaskB:uw	0:w						// Set MaskB = 0, so it always uses bS < 4 algorithm.
+
+	// tc0 has bTc0_h23_Y + bTc0_h22_Y + bTc0_h21_Y + bTc0_h20_Y		
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_h20_Y]<4;4,1>:ub			{ NoDDChk }
+
+//    (f0.0)	jmpi	BYPASS_8x8_DEBLOCK_H
+//	(f0.0.anyv)	 jmpi	BYPASS_8x8_DEBLOCK_H
+   
+//	CALL(FILTER_Y, 1)
+	PRED_CALL(-f0.0, FILTER_Y, 1)
+
+//BYPASS_8x8_DEBLOCK_H:
+//-----------------------------------------------
+
+
+//---------- Deblock Y internal bottom edge (H3) ----------	 
+
+	// Bypass deblocking if FilterInternal4x4EdgesFlag = 0  
+	and.z.f0.0 (1) null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal4x4EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+
+//	and.z.f0.1 (1)	null:uw		r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw		0xFFFF:uw	// MaskA = 0? 
+
+	//	p3 = Cur MB Y row 8  = r[P_AddrReg, 0]<16;16,1> 
+	//	p2 = Cur MB Y row 9  = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDClr } 
+	mov (1)	Q_AddrReg:w		12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w      { NoDDChk } 
+
+	
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw      { NoDDClr }
+//	mov (1) MaskB:uw	0:w						// Set MaskB = 0, so it always uses bS < 4 algorithm.
+
+	// tc0 has bTc0_h33_Y + bTc0_h32_Y + bTc0_h31_Y + bTc0_h30_Y         
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_h30_Y]<4;4,1>:ub			{ NoDDChk }
+
+//    (f0.0)	jmpi	BYPASS_4x4_DEBLOCK_H2
+//	(f0.0.anyv)	 jmpi	BYPASS_4x4_DEBLOCK_H2
+    
+//	CALL(FILTER_Y, 1)
+	PRED_CALL(-f0.0, FILTER_Y, 1)
+
+//BYPASS_4x4_DEBLOCK_H2:
+//-----------------------------------------------
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Y_v.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Filter_Y_v.asm
@@ -0,0 +1,233 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+////////// AVC ILDB filter vertical Y ///////////////////////////////////////////////////////
+//
+//	This filter code prepares the src data and control data for ILDB filtering on all vertical edges of Y.
+//
+//	It sssumes the data for vertical de-blocking is already transposed.  
+//
+//		Luma:
+//
+//		+-------+-------+-------+-------+
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		|		|		|		|		|
+//		+-------+-------+-------+-------+
+//
+//		V0		V1		V2		V3
+//		Edge	Edge	Edge	Edge
+//
+/////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xBBBB:w
+#endif	
+	
+
+//========== Luma deblocking ==========
+
+
+//---------- Deblock Y external left edge (V0) ----------	
+
+	// Bypass deblocking if it is left edge of the picture.  
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterLeftMbEdgeFlag:w		// Check for FilterLeftMbEdgeFlag 
+
+//	and.z.f0.1 (1)	null:uw		r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]:uw		0xFFFF:uw	// MaskA = 0? 
+
+	// Get (alpha >> 2) + 2
+	shr (1) alpha2:w		r[ECM_AddrReg, bAlphaLeft0_Y]:ub		2:w			// alpha >> 2
+
+	//	p3 = Prev MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
+	//	p2 = Prev MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Prev MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Prev MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 0  = r[Q_AddrReg, 0]<16;16,1>
+	//	q1 = Cur MB Y row 1  = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 2  = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 3  = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		PREV_MB_Y_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		SRC_MB_Y_BASE:w			{ NoDDChk }
+	
+	// Get vertical border edge control data  
+	// alpha = bAlphaLeft0_Y 
+	// beta = bBetaLeft0_Y
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaLeft0_Y]<2;2,1>:ub			{ NoDDClr }		// 2 channels for alpha and beta
+
+	mov (2)	MaskA<1>:uw	r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<2;2,1>:uw	{ NoDDClr, NoDDChk }
+	
+	// tc0 has bTc0_v30_0_Y | bTc0_v20_0_Y | bTc0_v10_0_Y | bTc0_v00_0_Y
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_v00_0_Y]<4;4,1>:ub			{ NoDDChk }
+
+//	(f0.0)	jmpi	BYPASS_EXT_LEFT_EDGE_Y	
+//	(f0.0.anyv)	 jmpi	BYPASS_EXT_LEFT_EDGE_Y
+		
+	add (1) alpha2:w		alpha2:w		2:w								// alpha2 = (alpha >> 2) + 2  
+		
+//	CALL(FILTER_Y, 1)
+	PRED_CALL(-f0.0, FILTER_Y, 1)
+
+
+//BYPASS_EXT_LEFT_EDGE_Y:
+//------------------------------------------------------------------
+	// Same alpha, alpha2, beta and MaskB for all internal edges 
+
+	// Get (alpha >> 2) + 2
+	shr (1) alpha2:w		r[ECM_AddrReg, bAlphaInternal_Y]:ub		2:w			// alpha >> 2
+
+	// alpha = bAlphaInternal_Y
+	// beta = bBetaInternal_Y
+	mov	(2)	alpha<1>:w	r[ECM_AddrReg, bAlphaInternal_Y]<2;2,1>:ub 		{ NoDDClr }
+
+	// Set MaskB = 0 for all 3 int edges, so it always uses bS < 4 algorithm.
+	mov (1) MaskB:uw	0:w												{ NoDDChk }
+
+	add (1) alpha2:w		alpha2:w		2:w								// alpha2 = (alpha >> 2) + 2  
+
+
+//---------- Deblock Y internal left edge (V1) ----------
+
+	// Bypass deblocking if FilterInternal4x4EdgesFlag = 0  
+	and.z.f0.0  (1) null:w		r[ECM_AddrReg, BitFlags]:ub		FilterInternal4x4EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+
+//	and.z.f0.1 (1)	null:uw		r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]:uw		0xFFFF:uw	// MaskA = 0? 
+
+	//	p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1>  
+	//	p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		SRC_MB_Y_BASE:w					{ NoDDClr }
+	mov (1)	Q_AddrReg:w		4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w   { NoDDChk }
+	
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]:uw		{ NoDDClr }
+
+	// tc0 has bTc0_v31_Y + bTc0_v21_Y + bTc0_v11_Y + bTc0_v01_Y	
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_v01_Y]<4;4,1>:ub			{ NoDDChk }
+
+//    (f0.0)	jmpi	BYPASS_4x4_DEBLOCK_V
+//	(f0.0.anyv)	 jmpi	BYPASS_4x4_DEBLOCK_V
+
+//	CALL(FILTER_Y, 1)
+	PRED_CALL(-f0.0, FILTER_Y, 1)
+
+//BYPASS_4x4_DEBLOCK_V:
+//------------------------------------------------------------------
+
+
+//---------- Deblock Y internal mid vert edge (V2) ----------
+
+	// Bypass deblocking if FilterInternal8x8EdgesFlag = 0  
+	and.z.f0.0	(1)	null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal8x8EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+
+//	and.z.f0.1 (1)	null:uw		r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]:uw		0xFFFF:uw	// MaskA = 0? 
+
+	//	p3 = Cur MB Y row 4  = r[P_AddrReg, 0]<16;16,1>  
+	//	p2 = Cur MB Y row 5  = r[P_AddrReg, 16]<16;16,1> 
+	//	p1 = Cur MB Y row 6  = r[P_AddrReg, 32]<16;16,1> 
+	//	p0 = Cur MB Y row 7  = r[P_AddrReg, 48]<16;16,1> 
+	// 	q0 = Cur MB Y row 8  = r[Q_AddrReg, 0]<16;16,1>  
+	//	q1 = Cur MB Y row 9  = r[Q_AddrReg, 16]<16;16,1> 
+	//	q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1> 
+	//	q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1> 
+	mov (1)	P_AddrReg:w		4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w	{ NoDDClr }
+	mov (1)	Q_AddrReg:w		8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w   { NoDDChk }
+
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]:uw		{ NoDDClr }
+//	mov (1) MaskB:uw	0:w						// Set MaskB = 0, so it always uses bS < 4 algorithm.
+
+	// tc0 has bTc0_v32_Y + bTc0_v22_Y + bTc0_v12_Y + bTc0_v02_Y	
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_v02_Y]<4;4,1>:ub			{ NoDDChk }
+
+//    (f0.0)	jmpi	BYPASS_8x8_DEBLOCK_V
+//	(f0.0.anyv)	 jmpi	BYPASS_8x8_DEBLOCK_V
+    
+//	CALL(FILTER_Y, 1)
+	PRED_CALL(-f0.0, FILTER_Y, 1)
+
+//BYPASS_8x8_DEBLOCK_V:
+//-----------------------------------------------
+
+
+//---------- Deblock Y interal right edge (V3) ----------	 
+
+	// Bypass deblocking if FilterInternal4x4EdgesFlag = 0  
+	and.z.f0.0	(1)	null:w	r[ECM_AddrReg, BitFlags]:ub		FilterInternal4x4EdgesFlag:w		// Check for FilterInternal4x4EdgesFlag 
+
+//	and.z.f0.1 (1)	null:uw		r[ECM_AddrReg, wEdgeCntlMap_IntRightVert]:uw		0xFFFF:uw	// MaskA = 0? 
+
+	//	p3 = Cur MB Y row 8  = r[P_AddrReg, 0]<16;16,1> 
+	//	p2 = Cur MB Y row 9  = r[P_AddrReg, 16]<16;16,1>
+	//	p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>
+	//	p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>
+	// 	q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1> 
+	//	q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>
+	//	q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>
+	//	q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>
+	mov (1)	P_AddrReg:w		8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w		{ NoDDClr }
+	mov (1)	Q_AddrReg:w		12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w      { NoDDChk }
+
+	mov (1)	MaskA:uw	r[ECM_AddrReg, wEdgeCntlMap_IntRightVert]:uw	{ NoDDClr }
+//	mov (1) MaskB:uw	0:w						// Set MaskB = 0, so it always uses bS < 4 algorithm.
+
+	// tc0 has bTc0_v33_Y + bTc0_v23_Y + bTc0_v13_Y + bTc0_v03_Y
+	mov (4)	tc0<1>:ub	r[ECM_AddrReg, bTc0_v03_Y]<4;4,1>:ub			{ NoDDChk }
+
+//    (f0.0)	jmpi	BYPASS_4x4_DEBLOCK_V2
+//	(f0.0.anyv)	 jmpi	BYPASS_4x4_DEBLOCK_V2
+    
+//	CALL(FILTER_Y, 1)
+	PRED_CALL(-f0.0, FILTER_Y, 1)
+
+//BYPASS_4x4_DEBLOCK_V2:
+//-----------------------------------------------
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_ForwardMsg.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_ForwardMsg.asm
@@ -0,0 +1,87 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//========== Forward message to root thread through gateway ==========
+// Each child thread write a byte into the root GRF r50 defiend in open Gataway.
+
+#if defined(_DEBUG) 
+mov		(1)		EntrySignatureC:w			0x7777:w
+#endif
+
+// Init payload to r0
+mov (8) 	GatewayPayload<1>:ud 	0:w								//{ NoDDClr } 
+
+// Forward a message:
+// Offset = x relative to r50 (defiend in open gataway), x = ORIX >> 4 [bit 28:16]
+// Need to shift left 16
+
+// shift 2 more bits for byte to word offset
+
+//shl	(1)		Offset_Length:ud		GateWayOffsetC:w	 	16:w		{ NoDDClr, NoDDChk }
+shl	(1)		Offset_Length:ud		GateWayOffsetC:w	 	18:w		
+
+// 2 bytes offset
+add	(1)		Offset_Length:ud			Offset_Length:ud		0x00020000:d	{ NoDDClr }
+	
+// Length = 1 byte,	[bit 10:8 = 000]
+//000 xxxxxxxxxxxxx 00000 000 00000000 ==> 000x xxxx xxxx xxxx 0000 0000 0000 0000
+
+//mov (1) 	DispatchID:ub 			r0.20:ub		// Dispatch ID
+
+//Move in EUid and Thread ID that we received from the PARENT thread
+mov (1) 	EUID_TID:uw 			r0.6:uw								{ NoDDClr, NoDDChk }
+
+mov (1) 	GatewayPayloadKey:uw 	0x1212:uw							{ NoDDClr, NoDDChk }	// Key
+
+//mov	(4)		GatewayPayload<1>:ud	0:ud								{ NoDDClr, NoDDChk }	// Init payload low 4 dword
+
+// Write back one byte (value = 0xFF) to root thread GRF to indicate this child thread is finished
+// All lower 4 bytes must be assigned to the same byte value.
+mov	(4)		GatewayPayload<1>:ub	0xFFFF:uw							{ NoDDChk }
+
+// msg descriptor bit 15 set to '1' for notification
+#ifdef GW_DCN
+// For ILK, EOT bit should also be set to terminate the thread. This is to fix a timing related HW issue.
+//
+send (8)  	null:ud 		m0	  		GatewayPayload<8;8,1>:ud    MSG_GW_EOT	FWDMSGDSC+NOTIFYMSG
+#else
+send (8)  	null:ud 		m0	  		GatewayPayload<8;8,1>:ud    MSG_GW	FWDMSGDSC+NOTIFYMSG
+#endif	// GW_DCN
+
+//========== Forward Msg Done ========================================
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_LumaThrdLimit.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_LumaThrdLimit.asm
@@ -0,0 +1,76 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//========== Forward message to root thread through gateway ==========
+
+// Chroma root kenrel updates luma thread limit.
+
+#if defined(_DEBUG) 
+mov		(1)		EntrySignatureC:w			0x7788:w
+#endif
+
+// Init payload to r0
+mov (8) 	GatewayPayload<1>:ud 	0:w								{ NoDDClr } 
+
+// Forward a message:
+// Offset = x relative to r50 (defiend in open gataway), x = ORIX >> 4 [bit 28:16]
+// Need to shift left 16
+
+mov	(1)		Offset_Length:ud		THREAD_LIMIT_OFFSET:ud	 			{ NoDDClr, NoDDChk }
+
+// Length = 1 byte,	[bit 10:8 = 000]
+//000 xxxxxxxxxxxxx 00000 000 00000000 ==> 000x xxxx xxxx xxxx 0000 0000 0000 0000
+
+//mov (1) 	DispatchID:ub 			r0.20:ub		// Dispatch ID
+
+//  Copy EUid and Thread ID that we received from the PARENT thread
+mov (1) 	EUID_TID:uw 			r0.6:uw								{ NoDDClr, NoDDChk }
+
+mov (1) 	GatewayPayloadKey:uw 	0x1212:uw							{ NoDDChk }	// Key
+
+//mov	(4)		GatewayPayload<1>:ud	0:ud								{ NoDDClr, NoDDChk }	// Init payload low 4 dword
+
+// Write back one byte (value = 0xFF) to root thread GRF to indicate this child thread is finished
+// All lower 4 bytes must be assigned to the same byte value.
+add	(1)		Temp1_W:w				MaxThreads:uw	-OutstandingThreads:uw
+mov	(4)		GatewayPayload<1>:ub	Temp1_B<0;1,0>:ub 
+
+send (8)  	GatewayResponse:ud 		m0	  		GatewayPayload<8;8,1>:ud    MSG_GW	FWDMSGDSC
+
+//========== Forward Msg Done ========================================
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Luma_Core.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Luma_Core.asm
@@ -0,0 +1,449 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#if !defined(__AVC_ILDB_LUMA_CORE__)	// Make sure this file is only included once
+#define __AVC_ILDB_LUMA_CORE__
+
+////////// AVC ILDB Luma Core /////////////////////////////////////////////////////////////////////////////////
+//
+//	This core performs AVC LUMA ILDB filtering on one horizontal edge (16 pixels) of a MB.  
+//	If data is transposed, it can also de-block a vertical edge.
+//
+//	Bafore calling this subroutine, caller needs to set the following parameters.
+//
+//	- EdgeCntlMap1				//	Edge control map A
+//	- EdgeCntlMap2				//	Edge control map B
+//	- P_AddrReg					//	Src and dest address register for P pixels
+//	- Q_AddrReg					//	Src and dest address register for Q pixels 	
+//	- alpha						//  alpha corresponding to the edge to be filtered
+//	- beta						//  beta corresponding to the edge to be filtered
+//	- tc0						// 	tc0  corresponding to the edge to be filtered
+//
+//
+//	+----+----+----+----+----+----+----+----+
+//	| p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |
+//	+----+----+----+----+----+----+----+----+
+//
+//	p3 = r[P_AddrReg, 0]<16;16,1>  
+//	p2 = r[P_AddrReg, 16]<16;16,1> 
+//	p1 = r[P_AddrReg, 32]<16;16,1> 
+//	p0 = r[P_AddrReg, 48]<16;16,1> 
+// 	q0 = r[Q_AddrReg, 0]<16;16,1>  
+//	q1 = r[Q_AddrReg, 16]<16;16,1> 
+//	q2 = r[Q_AddrReg, 32]<16;16,1> 
+//	q3 = r[Q_AddrReg, 48]<16;16,1> 
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// The region is both src and dest
+// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values
+#undef 	P3
+#undef 	P2
+#undef 	P1
+#undef 	P0
+#undef 	Q0
+#undef 	Q1
+#undef 	Q2
+#undef 	Q3
+  
+#define P3 		r[P_AddrReg,  0]<16;16,1>:ub
+#define P2 		r[P_AddrReg, 16]<16;16,1>:ub
+#define P1 		r[P_AddrReg, 32]<16;16,1>:ub
+#define P0 		r[P_AddrReg, 48]<16;16,1>:ub
+#define Q0 		r[Q_AddrReg,  0]<16;16,1>:ub
+#define Q1 		r[Q_AddrReg, 16]<16;16,1>:ub
+#define Q2 		r[Q_AddrReg, 32]<16;16,1>:ub
+#define Q3 		r[Q_AddrReg, 48]<16;16,1>:ub
+
+// New region as dest
+#undef 	NewP2
+#undef 	NewP1
+#undef 	NewP0
+#undef 	NewQ0
+#undef 	NewQ1
+#undef 	NewQ2
+
+#define NewP2 	r[P_AddrReg, 16]<1>:ub
+#define NewP1 	r[P_AddrReg, 32]<1>:ub
+#define NewP0 	r[P_AddrReg, 48]<1>:ub
+#define NewQ0 	r[Q_AddrReg,  0]<1>:ub
+#define NewQ1 	r[Q_AddrReg, 16]<1>:ub
+#define NewQ2 	r[Q_AddrReg, 32]<1>:ub
+
+// Filter one luma edge
+FILTER_Y:
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0x1111:w
+#endif
+	//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
+	// bS is in MaskA
+
+	// Src copy of the p3, p2, p1, p0, q0, q1, q2, q3
+//	mov (16) p0123_W(0)<1>		r[P_AddrReg]<16;16,1>:uw
+//	mov (16) p0123_W(1)<1>		r[P_AddrReg, 32]<16;16,1>:uw
+//	mov (16) q0123_W(0)<1>		r[Q_AddrReg]<16;16,1>:uw
+//	mov (16) q0123_W(1)<1>		r[Q_AddrReg, 32]<16;16,1>:uw
+
+	mov (2)	f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	add (16) q0_p0(0)<1>		Q0		-P0				// q0-p0
+	add (16) TempRow0(0)<1>		P1		-P0				// p1-p0
+	add (16) TempRow1(0)<1>		Q1		-Q0				// q1-q0
+
+	// Build FilterSampleFlag
+	// abs(q0-p0) < alpha
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)q0_p0(0)			alpha:w
+	// abs(p1-p0) < Beta
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)TempRow0(0)		beta:w
+	// abs(q1-q0) < Beta
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)TempRow1(0)		beta:w
+
+	//-----------------------------------------------------------------------------------------
+
+    (f0.0)	if	(16)		Y_ENDIF1
+		// For channels whose edge control map1 = 1 ---> perform de-blocking
+
+//		mov (1)		f0.1:uw		MaskB:uw	{NoMask}		// Now check for which algorithm to apply
+
+		// (abs)ap = |p2-p0|
+		add (16) ap(0)<1>		P2		-P0		// ap = p2-p0
+		// (abs)aq = |q2-q0|
+		add (16) aq(0)<1>		Q2		-Q0		// aq = q2-q0
+
+		// Make a copy of unmodified p0 and p1 for use in q0'and q1' calculation
+		mov (16) p0123_W(1)<1>		r[P_AddrReg, 32]<16;16,1>:uw		{NoMask}
+
+		(f0.1)	if	(16)		Y_ELSE2
+
+			// For channels whose edge control map2 = 1 ---> bS = 4 algorithm
+
+			// Compute q0', q1' and q2'
+			//-----------------------------------------------------------------------------
+			// bS = 4 Algorithm :			
+			//
+			// gama = |p0-q0| < ((alpha >> 2) + 2) 
+			// deltap = (ap<beta) && gama;  		// deep filter flag
+			//	if (deltap) {
+			//		p0' = (        p2 +2*p1 +2*p0 +2*q0 + q1 + 4) >> 3; 
+			// 		p1' = (        p2 +  p1 +  p0 +  q0      + 2) >> 2;
+			// 		p2' = (2*p3 +3*p2 +  p1 +  p0 +  q0      + 4) >> 3;
+			//	} else {  
+			//		p0' = (            2*p1 +  p0 +  q1      + 2) >> 2;
+			//	}
+			//-----------------------------------------------------------------------------
+
+			// gama = |p0-q0| < ((alpha >> 2) + 2) = |p0-q0| < alpha2  
+			cmp.l.f0.1 (16) null:w	(abs)q0_p0(0)	alpha2:w
+
+			// Common P01 = p0 + p1
+			add (16)	P0_plus_P1(0)<1>	P0			P1	
+
+			// Common Q01 = q0 + q1
+			add (16)	Q0_plus_Q1(0)<1>	Q0			Q1
+
+//			mov (1)	CTemp1_W:w		f0.1:uw						{NoMask}
+			mov (1)	f0.0:uw			f0.1:uw						{NoMask}
+	
+			// deltap = ((abs)ap < beta) && gama
+			(f0.1) cmp.l.f0.1 (16) null:w	(abs)ap(0)		beta<0;1,0>:w							// (abs)ap < beta ?
+
+			// deltaq = ((abs)aq < beta) && gama
+			(f0.0) cmp.l.f0.0 (16) null:w	(abs)aq(0)		beta<0;1,0>:w							// (abs)aq < beta ?
+
+
+//			mov (1)	CTemp1_W:w		f0.0:uw						{NoMask}					// gama = |p0-q0| < ((alpha >> 2) + 2) for each channel	
+//			and (1)		f0.1:w		f0.1:uw		CTemp1_W:w		{NoMask}					// deltap = (ap<beta) && gama
+
+
+			(f0.1)	if	(16)		Y_ELSE3			// for channels its deltap = true
+
+			add (16)	P2_plus_P3(0)<1>	P2		P3
+			
+			// A =  (p1 + p0) + q0 = P01 + q0
+			add (16)	A(0)<1>			P0_plus_P1(0)		Q0							// A =  P01 + q0
+
+			// Now acc0 = A
+
+			// B =  p2 + (p1 + p0 + q0) + 4 = p2 + A + 4
+//			add (16)	acc0.0<1>:w		P2				4:w								// p2 + 4 
+//			add (16)	BB(0)<1>			acc0.0<16;16,1>:w		A(0)					// B = p2 + A + 4
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w		4:w								// p2 + 4 
+			add (16)	BB(0)<1>			acc0.0<16;16,1>:w		P2					// B = p2 + A + 4
+			
+			// Now acc0 = B
+
+			// p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3
+//			mov	(16)	acc0.0<1>:w		BB(0)
+			mac (16)	acc0.0<1>:w		P2_plus_P3(0)		2:w		
+			shr.sat (16) TempRow3B(0)<2>	acc0.0<16;16,1>:w		3:w
+			
+			// p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2
+			add (16)	acc0.0<1>:w		BB(0)			-2:w
+			shr.sat (16) TempRow1B(0)<2>	acc0.0<16;16,1>:w		2:w
+	
+			// p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3
+			add (16)	acc0.0<1>:w		Q1				A(0)							// B + A
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w		BB(0)							// B + A + q1
+			shr.sat (16) TempRow0B(0)<2>	acc0.0<16;16,1>:w		3:w								// (B + A + q1) >> 3
+
+			// p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3
+//			mov	(16)	acc0.0<1>:w		BB(0)
+//			mac (16)	acc0.0<1>:w		P2_plus_P3(0)		2:w		
+//			shr.sat (16) TempRow3B(0)<2>	acc0.0<16;16,1>:w		3:w
+
+			mov (16) 	NewP2		TempRow3B(0)						// p2'
+			mov (16) 	NewP1		TempRow1B(0)						// p1'			
+			mov (16) 	NewP0		TempRow0B(0)						// p0'
+
+Y_ELSE3:
+			else (16)		Y_ENDIF3		// for channels its deltap = false
+
+			// p0' = (2*p1 + p0 + q1 + 2) >> 2 =  (p1 + P01 + q1 + 2) >> 2
+			add (16)	acc0.0<1>:w		P1			P0_plus_P1(0)			// p1 + P01 (TempRow1(0) = P01)
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w	Q1				
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w	2:w			// p1 + P01 + q1 + 2
+
+			shr.sat (16) TempRow0B(0)<2>	acc0.0<16;16,1>:w		2:w	// >> 2
+			mov (16) 	NewP0		TempRow0B(0)						// p0'
+
+			endif
+Y_ENDIF3:
+			// Compute q0', q1' and q2'
+			//-----------------------------------------------------------------------------
+			// bS = 4 Algorithm (cont):			
+			//
+			//	deltaq = (aq<beta) && gama;  		// deep filter flag
+			//	if (deltaq) {
+			//		q0' = (        q2 +2*q1 +2*q0 +2*p0 + p1 + 4) >> 3; 
+			//		q1' = (        q2 +  q1 +  q0 +  p0      + 2) >> 2;
+			//		q2' = (2*q3 +3*q2 +  q1 +  q0 +  p0      + 4) >> 3;
+			//	} else {
+			//		q0' = (            2*q1 +  q0 +  p1      + 2) >> 2;
+			//	}
+			
+			// deltaq = ((abs)aq < beta) && gama
+//			cmp.l.f0.1 (16) null:w	(abs)aq(0)		beta<0;1,0>:w							// (abs)aq < beta ?
+
+			// Common Q01 = q0 + q1
+//			add (16)	Q0_plus_Q1(0)<1>	Q0			Q1
+			
+//			and (1)		f0.1:w		f0.1:uw		CTemp1_W:w		{NoMask}				// deltaq = ((abs)ap < beta) && gama
+
+			(f0.0)	if	(16)		Y_ELSE4			// for channels its deltaq = true
+			
+			add (16)	Q2_plus_Q3(0)<1>	Q2			Q3
+
+			// A =  (q1 + q0) + p0 = Q01 + p0
+			add (16)	A(0)<1>			Q0_plus_Q1(0)		p0(0)							// A =  q1+q0 + p0
+
+			// Acc0 = A
+
+			// B =  q2 + q1 + q0 + p0 + 4 = q2 + A + 4
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w		4:w							// q2 + 4 
+			add (16)	BB(0)<1>			acc0.0<16;16,1>:w		Q2								// B = q2 + A + 4
+
+			// Acc0 = B
+			
+			// q2' = (2*q3 +3*q2 + A + 4) >> 3 = (2*(q3+q2) + B) >> 3
+//			mov (16)	acc0.0<1>:w		BB(0)	
+			mac (16)	acc0.0<1>:w		Q2_plus_Q3(0)	2:w
+			shr.sat (16) TempRow3B(0)<2>	acc0.0<16;16,1>:w		3:w
+
+			// q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2
+			add (16)	acc0.0<1>:w		BB(0)			-2:w
+			shr.sat (16) TempRow1B(0)<2>	acc0.0<16;16,1>:w	2:w
+			
+			// q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3
+			add (16)	acc0.0<1>:w		p1(0)					A(0)
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w		BB(0)
+			shr.sat (16) TempRow0B(0)<2>	acc0.0<16;16,1>:w	3:w
+			
+			mov (16) 	NewQ2		TempRow3B(0)						// q2'
+			mov (16) 	NewQ1		TempRow1B(0)						// q1'
+			mov (16) 	NewQ0		TempRow0B(0)						// q0'
+
+Y_ELSE4:
+			else (16)		Y_ENDIF4		// for channels its deltaq = false
+
+			// q0' = (2*q1 + q0 + p1 + 2) >> 2 =  (q1 + Q01 + p1 + 2) >> 2
+			// Use original p1 values in p1(0)
+			add (16)	acc0.0<1>:w		p1(0)			Q0_plus_Q1(0)			// p1 + P01 (TempRow1(0) = P01)
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w	Q1				
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w	2:w			// p1 + P01 + q1 + 2
+
+			shr.sat (16)	TempRow0B(0)<2>		acc0.0<16;16,1>:w		2:w								// >> 2
+			mov (16) 	NewQ0		TempRow0B(0)						// q0'
+
+			endif
+Y_ENDIF4:
+
+			
+			// Done with bS = 4 algorithm
+			
+Y_ELSE2: 
+		else 	(16)		Y_ENDIF2
+			// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
+
+			//-----------------------------------------------------------------------------
+			// bS < 4 Algorithm :
+			// tc = tc0 + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)
+			// delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))
+			// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
+			// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
+			// if (|p2-p0|<Beta)
+			// 		p1' = p1 + Clip3(-tc0, tc0, (p2 + ((p0+q0+1)>>1) - (p1<<1)) >> 1 )
+			// if (|q2-q0|<Beta)
+			// 		q1' = q1 + Clip3(-tc0, tc0, (q2 + ((p0+q0+1)>>1) - (q1<<1)) >> 1 )
+			//-----------------------------------------------------------------------------
+			
+			// Expand tc0
+			mov (16)	tc_exp(0)<1>	tc0<1;4,0>:ub	{NoMask}
+			mov (16)	tc0_exp(0)<1>	tc0<1;4,0>:ub	{NoMask}					// tc0_exp = tc0, each tc0 is duplicated 4 times for 4 adjcent 4 pixels	
+						
+			// tc_exp = tc0_exp + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)			
+//			mov (16)	tc_exp(0)<1>		tc0_exp(0)									// tc = tc0_exp first
+			
+
+			cmp.l.f0.0 (16)	null:w		(abs)ap(0)			beta:w						// |p2-p0|< Beta ? ---> (abs)ap < Beta ?
+			cmp.l.f0.1 (16)	null:w		(abs)aq(0)			beta:w						// |q2-q0|< Beta ? ---> (abs)aq < Beta ?
+			
+			//--- Use free cycles here ---
+			// delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))
+			// 4 * (q0-p0) + p1 - q1 + 4
+			add (16) acc0<1>:w		P1			4:w							// p1 + 4
+			mac (16) acc0<1>:w		q0_p0(0)	4:w							// 4 * (q0-p0) + p1 + 4
+			add (16) acc0<1>:w		acc0<16;16,1>:w		-Q1					// 4 * (q0-p0) + p1 - q1 + 4
+			shr (16) TempRow0(0)<1> acc0<16;16,1>:w		3:w
+						
+			// Continue on getting tc_exp
+			(f0.0) add (16)	tc_exp(0)<1>	tc_exp(0)	1:w							// tc0_exp + (|p2-p0|<Beta ? 1 : 0)
+			mov (2)	CTemp1_W<1>:w		f0.0<2;2,1>:w			{NoMask}					// Save	|p2-p0|<Beta flag			
+			(f0.1) add (16)	tc_exp(0)<1>	tc_exp(0)	1:w							// tc_exp = tc0_exp + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)
+			
+
+			// Continue on cliping tc to get delta
+			cmp.g.f0.0	(16) null:w		TempRow0(0)		tc_exp(0)					// Clip if delta' > tc
+			cmp.l.f0.1	(16) null:w		TempRow0(0)		-tc_exp(0)					// Clip if delta' < -tc
+
+			//--- Use free cycles here ---
+			// common = (p0+q0+1) >> 1 	  --->  TempRow2(0)
+			// Same as avg of p0 and q0
+			avg (16) TempRow2(0)<1>		P0			Q0
+
+			// Continue on cliping tc to get delta
+			(f0.0) mov (16) TempRow0(0)<1>				tc_exp(0)
+			(f0.1) mov (16) TempRow0(0)<1>				-tc_exp(0)
+
+			//--- Use free cycles here ---
+			mov (2)	f0.0<1>:w		CTemp1_W<2;2,1>:w	{NoMask}			// CTemp1_W = (|p2-p0|<Beta)
+																			// CTemp2_W = (|q2-q0|<Beta)		
+			//-----------------------------------------------------------------------
+
+			// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
+			// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
+			add.sat (16) TempRow1B(0)<2>		P0			TempRow0(0)					// p0+delta
+			add.sat (16) TempRow0B(0)<2>		Q0			-TempRow0(0) 				// q0-delta
+			mov (16) NewP0		TempRow1B(0)					// p0'
+			mov (16) NewQ0		TempRow0B(0)					// q0'
+			//-----------------------------------------------------------------------
+
+			// Now compute p1' and q1'
+
+			// if (|p2-p0|<Beta)
+//			mov (1)	f0.0:w		CTemp1_W:w				{NoMask}			// CTemp1_W = (|p2-p0|<Beta)
+			(f0.0)	if	(16)		Y_ENDIF6
+		
+			// p1' = p1 + Clip3(-tc0, tc0, adj)
+			// adj = (p2 + common - (p1<<1)) >> 1 = (p2 + common - (p1*2)) >> 1
+			add (16) acc0<1>:w	P2		TempRow2(0)							// TempRow2(0) = common = (p0+q0+1) >> 1
+			mac (16) acc0<1>:w	P1		-2:w
+			shr (16) TempRow1(0)<1>		acc0<16;16,1>:w		1:w
+
+			// tc clip to get tc_adj
+			cmp.g.f0.0	(16) null:w		TempRow1(0)		tc0_exp(0)					// Clip if delta' > tc
+			cmp.l.f0.1	(16) null:w		TempRow1(0)		-tc0_exp(0)					// Clip if delta' < -tc
+			
+			(f0.0) mov (16) TempRow1(0)<1>				tc0_exp(0)
+			(f0.1) mov (16) TempRow1(0)<1>				-tc0_exp(0)
+
+			//--- Use free cycles here ---
+			mov (1)	f0.1:w		CTemp2_W:w				{NoMask}			// CTemp2_W = (|q2-q0|<Beta)
+
+			// p1' = p1 + tc_adj
+			add.sat (16) TempRow1B(0)<2>		P1			TempRow1(0)					// p1+tc_adj
+			mov (16) NewP1			TempRow1B(0)				// p1'
+
+			//------------------------------------------------------------------------
+Y_ENDIF6:
+			endif
+			
+			// if (|q2-q0|<Beta)
+//			mov (1)	f0.1:w		CTemp2_W:w				{NoMask}			// CTemp2_W = (|q2-q0|<Beta)
+			(f0.1)	if	(16)		Y_ENDIF7
+					
+			// q1' = q1 + Clip3(-tc0, tc0, adj)
+			// adj = (q2 + common - (q1<<1)) >> 1 
+			// same as q2 + common - (q1 * 2)
+			add (16) acc0<1>:w	Q2		TempRow2(0)
+			mac (16) acc0<1>:w	Q1		-2:w
+			shr (16) TempRow1(0)<1>		acc0<16;16,1>:w		1:w	
+
+			// tc clip to get tc_adj
+			cmp.g.f0.0	(16) null:w		TempRow1(0)		tc0_exp(0)					// Clip if delta' > tc
+			cmp.l.f0.1	(16) null:w		TempRow1(0)		-tc0_exp(0)					// Clip if delta' < -tc
+
+			(f0.0) mov (16) TempRow1(0)<1>				tc0_exp(0)
+			(f0.1) mov (16) TempRow1(0)<1>				-tc0_exp(0)
+
+			// q1' = q1 + tc_adj
+			add.sat (16) TempRow1B(0)<2>		Q1			TempRow1(0)					// q1+tc_adj
+			mov (16) NewQ1			TempRow1B(0)				// q1'
+
+			//------------------------------------------------------------------------			
+Y_ENDIF7:
+			endif
+
+		endif
+Y_ENDIF2:
+Y_ENDIF1:
+	endif
+
+RETURN
+
+#endif	// !defined(__AVC_ILDB_LUMA_CORE__)
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Luma_Core_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Luma_Core_Mbaff.asm
@@ -0,0 +1,421 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#if !defined(__AVC_ILDB_LUMA_CORE_MBAFF__)	// Make sure this file is only included once
+#define __AVC_ILDB_LUMA_CORE_MBAFF__
+
+////////// AVC ILDB Luma Core Mbaff /////////////////////////////////////////////////////////////////////////////////
+//
+//	This core performs AVC LUMA ILDB filtering on one horizontal edge (16 pixels) of a MB.  
+//	If data is transposed, it can also de-block a vertical edge.
+//
+//	Bafore calling this subroutine, caller needs to set the following parameters.
+//
+//	- EdgeCntlMap1				//	Edge control map A
+//	- EdgeCntlMap2				//	Edge control map B
+//	- P_AddrReg					//	Src and dest address register for P pixels
+//	- Q_AddrReg					//	Src and dest address register for Q pixels 	
+//	- alpha						//  alpha corresponding to the edge to be filtered
+//	- beta						//  beta corresponding to the edge to be filtered
+//	- tc0						// 	tc0  corresponding to the edge to be filtered
+//
+//
+//	+----+----+----+----+----+----+----+----+
+//	| p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |
+//	+----+----+----+----+----+----+----+----+
+//
+//	p3 = r[P_AddrReg, 0]<16;16,1>  
+//	p2 = r[P_AddrReg, 16]<16;16,1> 
+//	p1 = r[P_AddrReg, 32]<16;16,1> 
+//	p0 = r[P_AddrReg, 48]<16;16,1> 
+// 	q0 = r[Q_AddrReg, 0]<16;16,1>  
+//	q1 = r[Q_AddrReg, 16]<16;16,1> 
+//	q2 = r[Q_AddrReg, 32]<16;16,1> 
+//	q3 = r[Q_AddrReg, 48]<16;16,1> 
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// The region is both src and dest
+// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values  
+#undef 	P3
+#undef 	P2
+#undef 	P1
+#undef 	P0
+#undef 	Q0
+#undef 	Q1
+#undef 	Q2
+#undef 	Q3
+
+#define P3 		r[P_AddrReg,  0]<16;16,1>:ub
+#define P2 		r[P_AddrReg, 16]<16;16,1>:ub
+#define P1 		r[P_AddrReg, 32]<16;16,1>:ub
+#define P0 		r[P_AddrReg, 48]<16;16,1>:ub
+#define Q0 		r[Q_AddrReg,  0]<16;16,1>:ub
+#define Q1 		r[Q_AddrReg, 16]<16;16,1>:ub
+#define Q2 		r[Q_AddrReg, 32]<16;16,1>:ub
+#define Q3 		r[Q_AddrReg, 48]<16;16,1>:ub
+
+// New region as dest
+#undef 	NewP2
+#undef 	NewP1
+#undef 	NewP0
+#undef 	NewQ0
+#undef 	NewQ1
+#undef 	NewQ2
+
+#define NewP2 	r[P_AddrReg, 16]<1>:ub
+#define NewP1 	r[P_AddrReg, 32]<1>:ub
+#define NewP0 	r[P_AddrReg, 48]<1>:ub
+#define NewQ0 	r[Q_AddrReg,  0]<1>:ub
+#define NewQ1 	r[Q_AddrReg, 16]<1>:ub
+#define NewQ2 	r[Q_AddrReg, 32]<1>:ub
+
+
+
+// Filter one luma edge - mbaff
+FILTER_Y_MBAFF:
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0x1111:w
+#endif
+	//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
+	// bS is in MaskA
+
+	// Src copy of the p3, p2, p1, p0, q0, q1, q2, q3
+//	mov (16) p0123_W(0)<1>		r[P_AddrReg]<16;16,1>:uw
+//	mov (16) p0123_W(1)<1>		r[P_AddrReg, 32]<16;16,1>:uw
+//	mov (16) q0123_W(0)<1>		r[Q_AddrReg]<16;16,1>:uw
+//	mov (16) q0123_W(1)<1>		r[Q_AddrReg, 32]<16;16,1>:uw
+
+	// Move MaskA and MaskB to flag regs
+	mov (2)	f0.0<1>:uw		MaskA<2;2,1>:uw
+
+	add (16) q0_p0(0)<1>		Q0			-P0				// q0-p0
+	add (16) TempRow0(0)<1>		P1			-P0				// p1-p0
+	add (16) TempRow1(0)<1>		Q1			-Q0				// q1-q0
+
+	// abs(q0-p0) < alpha
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)q0_p0(0)		Mbaff_ALPHA(0)
+	// abs(p1-p0) < Beta
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)TempRow0(0)	Mbaff_BETA(0)
+	// abs(q1-q0) < Beta
+	(f0.0) cmp.l.f0.0 (16) null:w		(abs)TempRow1(0)	Mbaff_BETA(0)
+
+	//-----------------------------------------------------------------------------------------
+
+    (f0.0)	if	(16)		MBAFF_Y_ENDIF1
+		// For channels whose edge control map1 = 1 ---> perform de-blocking
+
+//		mov (1)		f0.1:uw		MaskB:uw	{NoMask}		// Now check for which algorithm to apply
+
+		// (abs)ap = |p2-p0|
+		add (16) ap(0)<1>		P2		-P0
+
+		// (abs)aq = |q2-q0|
+		add (16) aq(0)<1>		Q2		-Q0
+
+		// Make a copy of unmodified p0 and p1 for use in q0'and q1' calculation
+		mov (16) p0123_W(1)<1>		r[P_AddrReg, 32]<16;16,1>:uw		{NoMask}
+			
+		(f0.1)	if	(16)		MBAFF_Y_ELSE2
+
+			// For channels whose edge control map2 = 1 ---> bS = 4 algorithm
+
+			// Compute q0', q1' and q2'
+			//-----------------------------------------------------------------------------
+			// bS = 4 Algorithm :			
+			//
+			// gama = |p0-q0| < ((alpha >> 2) + 2) 
+			// deltap = (ap<beta) && gama;  		// deep filter flag
+			//	if (deltap) {
+			//		p0' = (        p2 +2*p1 +2*p0 +2*q0 + q1 + 4) >> 3; 
+			// 		p1' = (        p2 +  p1 +  p0 +  q0      + 2) >> 2;
+			// 		p2' = (2*p3 +3*p2 +  p1 +  p0 +  q0      + 4) >> 3;
+			//	} else {  
+			//		p0' = (            2*p1 +  p0 +  q1      + 2) >> 2;
+			//	}
+			//-----------------------------------------------------------------------------
+
+			// gama = |p0-q0| < ((alpha >> 2) + 2) = |p0-q0| < alpha2  
+			cmp.l.f0.1 (16) null:w	(abs)q0_p0(0)	Mbaff_ALPHA2(0)
+
+			// Common P01 = p0 + p1
+			add (16)	P0_plus_P1(0)<1>	P0			P1	
+
+			// Common Q01 = q0 + q1
+			add (16)	Q0_plus_Q1(0)<1>	Q0			Q1
+
+			mov (1)	f0.0:uw			f0.1:uw						{NoMask}
+
+			// deltap = ((abs)ap < beta) && gama
+			(f0.1) cmp.l.f0.1 (16) null:w	(abs)ap(0)		Mbaff_BETA(0)							// (abs)ap < beta ?
+
+			// deltaq = ((abs)aq < beta) && gama
+			(f0.0) cmp.l.f0.0 (16) null:w	(abs)aq(0)		Mbaff_BETA(0)							// (abs)aq < beta ?
+
+
+			(f0.1)	if	(16)		MBAFF_Y_ELSE3			// for channels its deltap = true
+	
+			add (16)	P2_plus_P3(0)<1>	P2		P3
+			
+			// A =  p1 + p0 + q0 = P01 + q0
+			add (16)	A(0)<1>			P0_plus_P1(0)		Q0							// A =  P01 + q0
+
+			// Now acc0 = A
+
+			// B =  p2 + p1 + p0 + q0 + 4 = p2 + A + 4
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w		4:w								// p2 + 4 
+			add (16)	BB(0)<1>			acc0.0<16;16,1>:w		P2					// B = p2 + A + 4
+
+			// Now acc0 = B
+
+			// p2' = (2*p3 +3*p2 + A + 4) >> 3 = (2*(p3+p2) + B) >> 3
+			mac (16)	acc0.0<1>:w		P2_plus_P3(0)		2:w		
+			shr.sat (16) TempRow3B(0)<2>	acc0.0<16;16,1>:w		3:w
+
+			// p1' = (p2 + A + 2) >> 2 = (B - 2) >> 2
+			add (16)	acc0.0<1>:w		BB(0)			-2:w
+			shr.sat (16) TempRow1B(0)<2>	acc0.0<16;16,1>:w		2:w
+	
+			// p0' = (p2 +2*A + q1 + 4) >> 3 = (B + A + q1) >> 3
+			add (16)	acc0.0<1>:w		Q1				A(0)							// B + A
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w		BB(0)							// B + A + q1
+			shr.sat (16) TempRow0B(0)<2>	acc0.0<16;16,1>:w		3:w								// (B + A + q1) >> 3
+
+			mov (16) 	NewP2		TempRow3B(0)						// p2'
+			mov (16) 	NewP1		TempRow1B(0)						// p1'			
+			mov (16) 	NewP0		TempRow0B(0)						// p0'
+
+MBAFF_Y_ELSE3:
+			else (16)		MBAFF_Y_ENDIF3		// for channels its deltap = false
+
+			// p0' = (2*p1 + p0 + q1 + 2) >> 2 =  (p1 + P01 + q1 + 2) >> 2
+			add (16)	acc0.0<1>:w		P1			P0_plus_P1(0)			// p1 + P01 (TempRow1(0) = P01)
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w	Q1				
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w	2:w			// p1 + P01 + q1 + 2
+
+			shr.sat (16) TempRow0B(0)<2>	acc0.0<16;16,1>:w		2:w	// >> 2
+			mov (16) 	NewP0		TempRow0B(0)						// p0'
+
+			endif
+
+MBAFF_Y_ENDIF3:
+			// Compute q0', q1' and q2'
+			//-----------------------------------------------------------------------------
+			// bS = 4 Algorithm (cont):			
+			//
+			//	deltaq = (aq<beta) && gama;  		// deep filter flag
+			//	if (deltaq) {
+			//		q0' = (        q2 +2*q1 +2*q0 +2*p0 + p1 + 4) >> 3; 
+			//		q1' = (        q2 +  q1 +  q0 +  p0      + 2) >> 2;
+			//		q2' = (2*q3 +3*q2 +  q1 +  q0 +  p0      + 4) >> 3;
+			//	} else {
+			//		q0' = (            2*q1 +  q0 +  p1      + 2) >> 2;
+			//	}
+			
+			(f0.0)	if	(16)		MBAFF_Y_ELSE4			// for channels its deltaq = true
+			
+			add (16)	Q2_plus_Q3(0)<1>	Q2			Q3
+
+			// A =  q1 + q0 + p0 = Q01 + p0
+			add (16)	A(0)<1>			Q0_plus_Q1(0)		p0(0)							// A =  q1+q0 + p0
+
+			// B =  q2 + q1 + q0 + p0 + 4 = q2 + A + 4
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w		4:w							// q2 + 4 
+			add (16)	BB(0)<1>			acc0.0<16;16,1>:w		Q2								// B = q2 + A + 4
+			
+			// Acc0 = B
+
+			// q2' = (2*q3 +3*q2 + A + 4) >> 3 = (2*(q3+q2) + B) >> 3
+			mac (16)	acc0.0<1>:w		Q2_plus_Q3(0)	2:w
+			shr.sat (16) TempRow3B(0)<2>	acc0.0<16;16,1>:w		3:w
+
+			// q1' = (q2 + A + 2) >> 2 = (B - 2) >> 2
+			add (16)	acc0.0<1>:w		BB(0)			-2:w
+			shr.sat (16) TempRow1B(0)<2>	acc0.0<16;16,1>:w	2:w
+			
+			// q0' = (q2 +2*A + p1 + 4) >> 3 = (B + A + p1) >> 3
+			add (16)	acc0.0<1>:w		p1(0)					A(0)
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w		BB(0)
+			shr.sat (16) TempRow0B(0)<2>	acc0.0<16;16,1>:w	3:w
+			
+			mov (16) 	NewQ2		TempRow3B(0)						// q2'
+			mov (16) 	NewQ1		TempRow1B(0)						// q1'
+			mov (16) 	NewQ0		TempRow0B(0)						// q0'
+
+MBAFF_Y_ELSE4:
+			else (16)		MBAFF_Y_ENDIF4		// for channels its deltaq = false
+
+			// q0' = (2*q1 + q0 + p1 + 2) >> 2 =  (q1 + Q01 + p1 + 2) >> 2
+			// Use original p1 values in p1(0)
+			add (16)	acc0.0<1>:w		p1(0)			Q0_plus_Q1(0)			// p1 + P01 (TempRow1(0) = P01)
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w	Q1				
+			add (16)	acc0.0<1>:w		acc0.0<16;16,1>:w	2:w			// p1 + P01 + q1 + 2
+
+			shr.sat (16)	TempRow0B(0)<2>		acc0.0<16;16,1>:w		2:w								// >> 2
+			mov (16) 	NewQ0		TempRow0B(0)						// q0'
+
+			endif
+MBAFF_Y_ENDIF4:
+
+			
+			// Done with bS = 4 algorithm
+			
+MBAFF_Y_ELSE2: 
+		else 	(16)		MBAFF_Y_ENDIF2
+			// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
+
+			//-----------------------------------------------------------------------------
+			// bS < 4 Algorithm :
+			// tc = tc0 + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)
+			// delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))
+			// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
+			// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
+			// if (|p2-p0|<Beta)
+			// 		p1' = p1 + Clip3(-tc0, tc0, (p2 + ((p0+q0+1)>>1) - (p1<<1)) >> 1 )
+			// if (|q2-q0|<Beta)
+			// 		q1' = q1 + Clip3(-tc0, tc0, (q2 + ((p0+q0+1)>>1) - (q1<<1)) >> 1 )
+			//-----------------------------------------------------------------------------
+			
+			mov (16)	tc_exp(0)<1>		Mbaff_TC0(0)								// tc = tc0_exp first
+			
+			cmp.l.f0.0 (16)	null:w		(abs)ap(0)			Mbaff_BETA(0)				// |p2-p0|<Beta ?
+			cmp.l.f0.1 (16)	null:w		(abs)aq(0)			Mbaff_BETA(0)				// |q2-q0|<Beta ?
+				
+			//--- Use free cycles here ---
+			// delta = Clip3(-tc, tc, ((((q0-p0)<<2) + (p1-q1) + 4) >> 3))
+			// 4 * (q0-p0) + p1 - q1 + 4
+			add (16) acc0<1>:w		P1			4:w							// p1 + 4
+			mac (16) acc0<1>:w		q0_p0(0)	4:w							// 4 * (q0-p0) + p1 + 4
+			add (16) acc0<1>:w		acc0<16;16,1>:w		-Q1					// 4 * (q0-p0) + p1 - q1 + 4
+			shr (16) TempRow0(0)<1> acc0<16;16,1>:w		3:w
+						
+			// Continue on getting tc_exp
+			(f0.0) add (16)	tc_exp(0)<1>	tc_exp(0)	1:w							// tc0_exp + (|p2-p0|<Beta ? 1 : 0)
+			mov (2)	CTemp1_W<1>:w		f0.0<2;2,1>:w			{NoMask}					// Save	|p2-p0|<Beta flag			
+			(f0.1) add (16)	tc_exp(0)<1>	tc_exp(0)	1:w							// tc_exp = tc0_exp + (|p2-p0|<Beta ? 1 : 0) + (|q2-q0|<Beta ? 1 : 0)
+				
+			// Continue on cliping tc to get delta
+			cmp.g.f0.0	(16) null:w		TempRow0(0)		tc_exp(0)					// Clip if delta' > tc
+			cmp.l.f0.1	(16) null:w		TempRow0(0)		-tc_exp(0)					// Clip if delta' < -tc
+
+			//--- Use free cycles here ---
+			// common = (p0+q0+1) >> 1 	  --->  TempRow2(0)
+			// Same as avg of p0 and q0
+			avg (16) TempRow2(0)<1>		P0			Q0
+
+			// Continue on cliping tc to get delta
+			(f0.0) mov (16) TempRow0(0)<1>				tc_exp(0)
+			(f0.1) mov (16) TempRow0(0)<1>				-tc_exp(0)
+
+			//--- Use free cycles here ---
+			mov (2)	f0.0<1>:w		CTemp1_W<2;2,1>:w	{NoMask}			// CTemp1_W = (|p2-p0|<Beta)
+																			// CTemp2_W = (|q2-q0|<Beta)		
+
+			// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
+			// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
+			add.sat (16) TempRow1B(0)<2>		P0			TempRow0(0)					// p0+delta
+			add.sat (16) TempRow0B(0)<2>		Q0			-TempRow0(0) 				// q0-delta
+			
+			mov (16) NewP0		TempRow1B(0)					// p0'
+			mov (16) NewQ0		TempRow0B(0)					// q0'
+
+			//-----------------------------------------------------------------------
+
+			// Now compute p1' and q1'
+
+			// if (|p2-p0|<Beta)
+			(f0.0)	if	(16)		MBAFF_Y_ENDIF6
+
+			// p1' = p1 + Clip3(-tc0, tc0, adj)
+			// adj = (p2 + common - (p1<<1)) >> 1 = (p2 + common - (p1*2)) >> 1
+			add (16) acc0<1>:w	P2		TempRow2(0)							// TempRow2(0) = common = (p0+q0+1) >> 1
+			mac (16) acc0<1>:w	P1		-2:w
+			shr (16) TempRow1(0)<1>		acc0<16;16,1>:w		1:w
+
+			// tc clip to get tc_adj
+			cmp.g.f0.0	(16) null:w		TempRow1(0)		Mbaff_TC0(0)					// Clip if delta' > tc
+			cmp.l.f0.1	(16) null:w		TempRow1(0)		-Mbaff_TC0(0)					// Clip if delta' < -tc
+			
+			(f0.0) mov (16) TempRow1(0)<1>				Mbaff_TC0(0)
+			(f0.1) mov (16) TempRow1(0)<1>				-Mbaff_TC0(0)
+
+			//--- Use free cycles here ---
+			mov (1)	f0.1:w		CTemp2_W:w				{NoMask}			// CTemp2_W = (|q2-q0|<Beta)
+
+			// p1' = p1 + tc_adj
+			add.sat (16) TempRow1B(0)<2>		P1			TempRow1(0)					// p1+tc_adj
+			mov (16) NewP1			TempRow1B(0)				// p1'
+			//------------------------------------------------------------------------
+
+MBAFF_Y_ENDIF6:
+			endif
+			
+			// if (|q2-q0|<Beta)
+			(f0.1)	if	(16)		MBAFF_Y_ENDIF7
+
+			// q1' = q1 + Clip3(-tc0, tc0, adj)
+			// adj = (q2 + common - (q1<<1)) >> 1 
+			// same as q2 + common - (q1 * 2)
+			add (16) acc0<1>:w	Q2		TempRow2(0)
+			mac (16) acc0<1>:w	Q1		-2:w
+			shr (16) TempRow1(0)<1>		acc0<16;16,1>:w		1:w	
+					
+			// tc clip to get tc_adj
+			cmp.g.f0.0	(16) null:w		TempRow1(0)		Mbaff_TC0(0)					// Clip if delta' > tc
+			cmp.l.f0.1	(16) null:w		TempRow1(0)		-Mbaff_TC0(0)					// Clip if delta' < -tc
+
+			(f0.0) mov (16) TempRow1(0)<1>				Mbaff_TC0(0)
+			(f0.1) mov (16) TempRow1(0)<1>				-Mbaff_TC0(0)
+			
+			// q1' = q1 + tc_adj
+			add.sat (16) TempRow1B(0)<2>		Q1			TempRow1(0)					// q1+tc_adj
+			mov (16) NewQ1			TempRow1B(0)				// q1'
+			
+			//------------------------------------------------------------------------			
+MBAFF_Y_ENDIF7:
+			endif
+
+		endif
+MBAFF_Y_ENDIF2:
+MBAFF_Y_ENDIF1:
+	endif
+
+RETURN
+
+#endif	// !defined(__AVC_ILDB_LUMA_CORE_MBAFF__)
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_OpenGateway.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_OpenGateway.asm
@@ -0,0 +1,73 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//----- Open a Message Gateway -----
+// The parent thread is the recipient thread
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignature:w			0x1111:w
+#endif
+
+mov (8) 	GatewayPayload<1>:ud 		r0.0<8;8,1>:ud			// Init payload to r0
+
+// r50- (16 GRFs) are the GRFs child thread can wtite to. 
+
+// Reg base is at bit 28:21, Gateway size is at [bit 10:8]
+// r6: 6 = 00000110
+//000 00000110 0000000000 100 00000000 ==> 0000 0000 1100 0000 0000 0100 0000 0000
+mov (1) 	RegBase_GatewaySize:ud 	0x00C00400:ud	// Reg base + Gateway size (16 GRFs)
+
+
+//000 00110010 0000000000 100 00000000 ==> 0000 0110 0100 0000 0000 0100 0000 0000
+//mov (1) 	RegBase_GatewaySize:ud 	0x06400400:ud	// Reg base (r50 = 0x640 byte offset) + Gateway size (16 GRFs)
+
+//mov (1) 	DispatchID:ub 			r0.20:ub		// Dispatch ID
+mov (1) 	GatewayPayloadKey:uw 	0x1212:uw		// Key=0x1212
+
+// Message descriptor
+// bit 31	EOD
+// 27:24	FFID = 0x0011 for msg gateway
+// 23:20	msg length = 1 MRF
+// 19:16	Response length	= 0
+// 14		AckReg = 1
+// 1:0		SubFuncID = 00 for OpenGateway
+// Message descriptor: 0 000 0011 0001 0000 + 0 1 000000000000 00 ==> 0000 0011 0001 0000 0100 0000 0000 0000
+// Send message to gateway: the ack message is put into response GRF r49 ==> Good for debugging
+send (8)  	GatewayResponse:ud	m7	  GatewayPayload<8;8,1>:ud    MSG_GW	OGWMSGDSC
+
+//----- End of Open a Message Gateway -----
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Field_UV.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Field_UV.asm
@@ -0,0 +1,39 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#include "AVC_ILDB_Root_UV.asm"
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Field_Y.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Field_Y.asm
@@ -0,0 +1,39 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#include "AVC_ILDB_Root_Y.asm"
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_UV.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_UV.asm
@@ -0,0 +1,170 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+/////////////////////////////////////////////////////////////////////////////////////
+// Kernel name: AVC_ILDB_Root_Mbaff.asm
+//
+//  Root kernel serves as a scheduler for child threads.
+//
+//
+//	***** Note *****
+//	Initial design bundle MB pair for each thread, and share AVC_ILDB_MB_Dep_Check.asm
+//	with non mbaff kernels.
+//
+//	Optimization will be done later, putting top and bottom MBs on separate threads.
+//
+//
+/////////////////////////////////////////////////////////////////////////////////////
+//
+//  $Revision: 1 $
+//  $Date: 10/19/06 5:06p $
+//
+
+// ----------------------------------------------------
+//  AVC_ILDB_ROOT_MBAFF_UV
+// ----------------------------------------------------
+#define AVC_ILDB
+
+.kernel AVC_ILDB_ROOT_MBAFF_UV
+#if defined(COMBINED_KERNEL)
+ILDB_LABEL(AVC_ILDB_ROOT_UV):
+#endif
+
+#include "SetupVPKernel.asm"
+#include "AVC_ILDB.inc"
+
+
+#if defined(_DEBUG) 
+
+/////////////////////////////////////////////////////////////////////////////////////
+// Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  
+// Will remove it for production release.
+
+
+//mov (8) m1:ud 		0x11111111:ud
+//mov (8) m2:ud 		0x22222222:ud 
+//mov (8) m3:ud 		0x33333333:ud
+//mov (8) m4:ud 		0x44444444:ud 
+
+//mov (1)	Temp1_W:w	0:w
+
+//ILDB_INIT_URB:
+//mul (1)	URBOffset:w				Temp1_W:w		4:w
+//shl (1) URBWriteMsgDescLow:uw 	URBOffset:w		4:w		// Msg descriptor: URB write dest offset (9:4)
+//mov (1) URBWriteMsgDescHigh:uw 	0x0650:uw				// Msg descriptor: URB write 5 MRFs (m0 - m4)
+//#include "writeURB.asm"
+
+//add		(1)		Temp1_W:w	Temp1_W:w	1:w				// Increase block count
+//cmp.l.f0.0 (1) 	null		Temp1_W:w	MBsCntY:w		// Check the block count limit
+//(f0.0) jmpi		ILDB_INIT_URB							// Loop back
+
+
+/////////////////////////////////////////////////////////////////////////////////////
+
+
+mov		(1)		EntrySignature:w			0xEFF0:w
+
+#endif
+//----------------------------------------------------------------------------------------------------------------
+
+// Set global variable
+mov (32) 	ChildParam:uw			0:uw								// Reset local variables
+//mul	(1)	 	TotalBlocks:w			MBsCntX:w		MBsCntY:w			// Total # of MB pairs
+//add	(1)	 	GatewayApertureE:w		MBsCntY:w 		GatewayApertureB:w	// Aperture End = aperture Head + BlockCntY
+
+
+// 2 URB entries for Y:
+// Entry 0 - Child thread R0Hdr
+// Entry 1 - input parameter to child kernel (child r1)
+
+#define 	URB_ENTRIES_PER_MB 	2
+
+// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
+mov (1) URB_EntriesPerMB_2:w		URB_ENTRIES_PER_MB-1:w
+shl (1) URB_EntriesPerMB_2:w		URB_EntriesPerMB_2:w	10:w
+
+#define 	CHROMA_ROOT														// Compiling flag for chroma only
+//mul (1)		URBOffsetUVBase:w		MBsCntY:w		URB_ENTRIES_PER_MB:w	// Right after Y entries
+
+// URB base for UV kernels
+#if defined(DEV_CL)	
+	mov (1)		URBOffsetUVBase:w		240:w
+#else
+	mov (1)		URBOffsetUVBase:w		320:w
+#endif
+
+
+mov	(1)		ChildThreadsID:uw		3:uw
+
+shr (1)		ThreadLimit:w		MaxThreads:w		1:w		// Initial luma thread limit to 50%
+mul	(1)	 	TotalBlocks:w		MBsCntX:w		MBsCntY:w	// MBs to be processed count down from TotalBlocks
+
+//***** Init CT_R0Hdr fields that are common to all threads *************************
+mov (8)	CT_R0Hdr.0:ud			r0.0<8;8,1>:ud				// Init to root R0 header
+mov (1)	CT_R0Hdr.7:ud			r0.6:ud						// Copy Parent Thread Cnt; JJ did the change on 06/20/2006
+mov (1) CT_R0Hdr.31:ub			0:w							// Reset the highest byte
+mov (1) CT_R0Hdr.3:ud 			0x00000000	 
+mov (1) CT_R0Hdr.6:uw 			sr0.0:uw					// sr0.0: state reg contains general thread states, e.g. EUID/TID.
+
+//***** Init ChildParam fields that are common to all threads ***********************
+mov (8) 	ChildParam<1>:ud	RootParam<8;8,1>:ud		// Copy all root parameters
+mov (4)		CurCol<1>:w			0:w						// Reset CurCol, CurRow
+add	(2)		LastCol<1>:w		MBsCntX<2;2,1>:w		-1:w	// Get LastCol and LastRow
+
+mov (1) URBWriteMsgDesc:ud		MSG_LEN(2)+URBWMSGDSC:ud
+
+//===================================================================================
+
+#include "AVC_ILDB_OpenGateway.asm"		// Open root thread gateway for receiving notification 
+
+#include "AVC_ILDB_Dep_Check.asm"  	// Check dependency and spawn all MBs
+
+//#include "AVC_ILDB_UpdateThrdLimit.asm"	// Update thread limit in luma root thread via gateway
+
+#include "AVC_ILDB_CloseGateway.asm"	// Close root thread gateway 
+
+// Chroma root EOT = child send EOT : Request type = 1
+	END_CHILD_THREAD
+
+#undef 	CHROMA_ROOT
+
+#if !defined(COMBINED_KERNEL)		// For standalone kernel only
+.end_code
+
+.end_kernel
+#endif
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_Y.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Mbaff_Y.asm
@@ -0,0 +1,170 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+/////////////////////////////////////////////////////////////////////////////////////
+// Kernel name: AVC_ILDB_Root_Mbaff.asm
+//
+//  Root kernel serves as a scheduler for child threads.
+//
+//
+//	***** Note *****
+//	Initial design bundle MB pair for each thread, and share AVC_ILDB_MB_Dep_Check.asm
+//	with non mbaff kernels.
+//
+//	Optimization will be done later, putting top and bottom MBs on separate threads.
+//
+//
+/////////////////////////////////////////////////////////////////////////////////////
+//
+//  $Revision: 1 $
+//  $Date: 10/19/06 5:06p $
+//
+
+// ----------------------------------------------------
+//  AVC_ILDB_ROOT_MBAFF_Y
+// ----------------------------------------------------
+#define AVC_ILDB
+
+.kernel AVC_ILDB_ROOT_MBAFF_Y
+#if defined(COMBINED_KERNEL)
+ILDB_LABEL(AVC_ILDB_ROOT_Y):
+#endif
+
+#include "SetupVPKernel.asm"
+#include "AVC_ILDB.inc"
+
+
+#if defined(_DEBUG) 
+
+/////////////////////////////////////////////////////////////////////////////////////
+// Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  
+// Will remove it for production release.
+
+
+//mov (8) m1:ud 		0x11111111:ud
+//mov (8) m2:ud 		0x22222222:ud 
+//mov (8) m3:ud 		0x33333333:ud
+//mov (8) m4:ud 		0x44444444:ud 
+
+//mov (1)	Temp1_W:w	0:w
+
+//ILDB_INIT_URB:
+//mul (1)	URBOffset:w				Temp1_W:w		4:w
+//shl (1) URBWriteMsgDescLow:uw 	URBOffset:w		4:w		// Msg descriptor: URB write dest offset (9:4)
+//mov (1) URBWriteMsgDescHigh:uw 	0x0650:uw				// Msg descriptor: URB write 5 MRFs (m0 - m4)
+//#include "writeURB.asm"
+
+//add		(1)		Temp1_W:w	Temp1_W:w	1:w				// Increase block count
+//cmp.l.f0.0 (1) 	null		Temp1_W:w	MBsCntY:w		// Check the block count limit
+//(f0.0) jmpi		ILDB_INIT_URB							// Loop back
+
+/////////////////////////////////////////////////////////////////////////////////////
+
+
+mov		(1)		EntrySignature:w			0xEFF0:w
+
+#endif
+//----------------------------------------------------------------------------------------------------------------
+
+// Set global variable
+mov (32) 	ChildParam:uw			0:uw								// Reset local variables
+//mul	(1)	 	TotalBlocks:w			MBsCntX:w		MBsCntY:w			// Total # of MB pairs
+//add	(1)	 	GatewayApertureE:w		MBsCntY:w 		GatewayApertureB:w	// Aperture End = aperture Head + BlockCntY
+
+
+// 2 URB entries for Y:
+// Entry 0 - Child thread R0Hdr
+// Entry 1 - input parameter to child kernel (child r1)
+
+#undef 		URB_ENTRIES_PER_MB
+#define 	URB_ENTRIES_PER_MB	 	2
+
+// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
+mov (1) 	URB_EntriesPerMB_2:w		URB_ENTRIES_PER_MB-1:w
+shl (1) 	URB_EntriesPerMB_2:w		URB_EntriesPerMB_2:w	10:w
+
+mov	(1)		ChildThreadsID:uw		1:uw					// ChildThreadsID for chroma root
+
+shr (1)		ThreadLimit:w		MaxThreads:w		1:w		// Initial luma thread limit to 50%
+mul	(1)	 	TotalBlocks:w		MBsCntX:w		MBsCntY:w	// MBs to be processed count down from TotalBlocks
+
+//***** Init CT_R0Hdr fields that are common to all threads *************************
+mov (8)		CT_R0Hdr.0:ud			r0.0<8;8,1>:ud				// Init to root R0 header
+mov (1)		CT_R0Hdr.7:ud			r0.6:ud						// Copy Parent Thread Cnt; JJ did the change on 06/20/2006
+mov (1) 	CT_R0Hdr.31:ub			0:w							// Reset the highest byte
+mov (1) 	CT_R0Hdr.3:ud 			0x00000000	 
+mov (1) 	CT_R0Hdr.6:uw 			sr0.0:uw					// sr0.0: state reg contains general thread states, e.g. EUID/TID.
+
+//***** Init ChildParam fields that are common to all threads ***********************
+mov (8) 	ChildParam<1>:ud	RootParam<8;8,1>:ud		// Copy all root parameters
+mov (4)		CurCol<1>:w			0:w						// Reset CurCol, CurRow
+add	(2)		LastCol<1>:w		MBsCntX<2;2,1>:w		-1:w	// Get LastCol and LastRow
+
+mov (1) 	URBWriteMsgDesc:ud		MSG_LEN(2)+URBWMSGDSC:ud
+
+//===================================================================================
+
+#include "AVC_ILDB_OpenGateway.asm"		// Open root thread gateway for receiving notification 
+
+#if defined(DEV_CL)	
+	mov	(1)		URBOffset:uw		240:uw	// Use chroma URB offset to spawn chroma root
+#else
+	mov	(1)		URBOffset:uw		320:uw	// Use chroma URB offset to spawn chroma root
+#endif
+
+#include "AVC_ILDB_SpawnChromaRoot.asm"	// Spawn chroma root
+
+mov	(1)		URBOffset:uw		0:uw	// Use luma URB offset to spawn luma child 
+mov	(1)		ChildThreadsID:uw	2:uw	// Starting ChildThreadsID for luma child threads
+
+#include "AVC_ILDB_Dep_Check.asm"  	// Check dependency and spawn all MBs
+
+// Wait for UV root thread to finish
+ILDB_LABEL(WAIT_FOR_UV):
+cmp.l.f0.0 (1) null:w	ThreadLimit:w		MaxThreads:w
+(f0.0) 	jmpi 	ILDB_LABEL(WAIT_FOR_UV)
+
+#include "AVC_ILDB_CloseGateway.asm"	// Close root thread gateway 
+
+END_THREAD								// End of root thread
+
+#if !defined(COMBINED_KERNEL)		// For standalone kernel only
+.end_code
+
+.end_kernel
+#endif
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_UV.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_UV.asm
@@ -0,0 +1,157 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Kernel name: AVC_ILDB_Root_UV.asm
+//
+//  Root kernel serves as a scheduler for child threads
+//
+//  $Revision: 1 $
+//  $Date: 10/19/06 5:06p $
+//
+
+// ----------------------------------------------------
+//  AVC_ILDB_ROOT_UV
+// ----------------------------------------------------
+#define AVC_ILDB
+
+.kernel AVC_ILDB_ROOT_UV
+#if defined(COMBINED_KERNEL)
+ILDB_LABEL(AVC_ILDB_ROOT_UV):
+#endif
+ 
+#include "SetupVPKernel.asm"
+#include "AVC_ILDB.inc"
+
+#if defined(_DEBUG) 
+mov		(1)		EntrySignature:w			0xFF11:w
+#endif
+
+/////////////////////////////////////////////////////////////////////////////////////
+#if defined(_DEBUG) 
+
+// Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  
+// Will remove it for production release.
+
+mov (8) m1:ud 		0x55555555:ud
+mov (8) m2:ud 		0x66666666:ud 
+mov (8) m3:ud 		0x77777777:ud
+mov (8) m4:ud 		0x88888888:ud 
+
+mov (1)	Temp1_W:w	MBsCntY:w
+shl (1)	Temp2_W:w	MBsCntY:w		1:w
+
+ILDB_LABEL(ILDB_INIT_URB_UV):
+mul (1)	URBOffset:uw			Temp1_W:uw		4:w		// Each thread uses 4 URB entries (1 r0 + 1 inline + 2 data)
+mov (1) URBWriteMsgDesc:ud		MSG_LEN(4)+URBWMSGDSC:ud			// Msg descriptor: URB write msg length = 5
+#include "writeURB.asm"
+
+add		(1)		Temp1_W:w	Temp1_W:w	1:w				// Increase block count
+cmp.l.f0.0 (1) 	null		Temp1_W:w	Temp2_W:w		// Check the block count limit
+(f0.0) jmpi		ILDB_LABEL(ILDB_INIT_URB_UV)			// Loop back
+
+mov		(1)		EntrySignature:w			0xFFF0:w
+
+#endif
+/////////////////////////////////////////////////////////////////////////////////////
+
+// Set global variable
+mov (32) 	ChildParam:uw			0:uw								// Reset local variables, 2 GRFs
+//mul	(1)	 	TotalBlocks:w			MBsCntX:w		MBsCntY:w			// Total # of blocks
+//add	(1)	 	GatewayApertureE:w		MBsCntY:w 		GatewayApertureB:w	// Aperture End = aperture Head + BlockCntY
+
+// 4 URB entries for Y:
+// Entry 0 - Child thread R0Hdr
+// Entry 1 - input parameter to child kernel (child r1)
+// Entry 2 - Prev MB data UV 2x8
+// Entry 3 - Unused
+
+#define 	URB_ENTRIES_PER_MB 	4
+
+// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
+mov (1) 	URB_EntriesPerMB_2:w		URB_ENTRIES_PER_MB-1:w
+shl (1) 	URB_EntriesPerMB_2:w		URB_EntriesPerMB_2:w	10:w
+
+#define 	CHROMA_ROOT													// Compiling flag for chroma only
+
+// URB base for UV kernels
+#if defined(DEV_CL)	
+	mov (1)		URBOffsetUVBase:w		240:w
+#else
+	mov (1)		URBOffsetUVBase:w		320:w
+#endif
+
+
+mov	(1)		ChildThreadsID:uw		3:uw
+
+shr (1)		ThreadLimit:w		MaxThreads:w		1:w		// Initial luma thread limit to 50%
+mul	(1)	 	TotalBlocks:w		MBsCntX:w		MBsCntY:w	// MBs to be processed count down from TotalBlocks
+
+//***** Init CT_R0Hdr fields that are common to all threads *************************
+mov (8)		CT_R0Hdr.0:ud		r0.0<8;8,1>:ud				// Init to root R0 header
+mov (1) 	CT_R0Hdr.7:ud		r0.6:ud						// Copy Parent Thread Cnt; JJ did the change on 06/20/2006
+mov (1) 	CT_R0Hdr.31:ub		0:w							// Reset the highest byte
+mov (1) 	CT_R0Hdr.3:ud 		0x00000000	 
+mov (1) 	CT_R0Hdr.6:uw 		sr0.0:uw					// sr0.0: state reg contains general thread states, e.g. EUID/TID.
+
+//***** Init ChildParam fields that are common to all threads ***********************
+mov (8) 	ChildParam<1>:ud	RootParam<8;8,1>:ud		// Copy all root parameters
+mov (4)		CurCol<1>:w			0:w						// Reset CurCol, CurRow, 
+add	(2)		LastCol<1>:w		MBsCntX<2;2,1>:w		-1:w	// Get LastCol and LastRow
+
+mov (1) 	URBWriteMsgDesc:ud		MSG_LEN(2)+URBWMSGDSC:ud
+
+//===================================================================================
+
+#include "AVC_ILDB_OpenGateway.asm"		// Open gateway for receiving notification 
+
+#include "AVC_ILDB_Dep_Check.asm"  		// Check dependency and spawn all luma child threads in parallel with chroma root
+
+//#include "AVC_ILDB_LumaThrdLimit.asm"	// Update thread limit in luma root thread via gateway
+
+#include "AVC_ILDB_CloseGateway.asm"	// Close root thread gateway 
+
+// Chroma root EOT = child send EOT : Request type = 1
+	END_CHILD_THREAD
+	
+#undef 	CHROMA_ROOT
+
+#if !defined(COMBINED_KERNEL)		// For standalone kernel only
+.end_code
+
+.end_kernel
+#endif
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Y.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Root_Y.asm
@@ -0,0 +1,160 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Kernel name: AVC_ILDB_Root_Y.asm
+//
+//  Root kernel serves as a scheduler for child threads
+//
+//  $Revision: 1 $
+//  $Date: 10/19/06 5:06p $
+//
+
+// ----------------------------------------------------
+//  AVC_ILDB_ROOT_Y
+// ----------------------------------------------------
+#define AVC_ILDB
+
+.kernel AVC_ILDB_ROOT_Y
+#if defined(COMBINED_KERNEL)
+ILDB_LABEL(AVC_ILDB_ROOT_Y):
+#endif
+
+#include "SetupVPKernel.asm"
+#include "AVC_ILDB.inc"
+
+/////////////////////////////////////////////////////////////////////////////////////
+#if defined(_DEBUG) 
+
+// Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  
+// Will remove it for production release.
+
+mov (8) m1:ud 		0x11111111:ud
+mov (8) m2:ud 		0x22222222:ud 
+mov (8) m3:ud 		0x33333333:ud
+mov (8) m4:ud 		0x44444444:ud 
+
+mov (1)	Temp1_W:w	0:w
+
+ILDB_LABEL(ILDB_INIT_URB_Y):
+//mul (1)	Temp2_W:w				Temp1_W:w		4:w		// URBOffset
+//shl (1) URBWriteMsgDescLow:uw 	Temp2_W:w		4:w		// Msg descriptor: URB write dest offset (9:4)
+//mov (1) URBWriteMsgDescHigh:uw 	0x0650:uw				// Msg descriptor: URB write 5 MRFs (m0 - m4)
+
+//mul (1)	URBOffset:uw			Temp1_W:uw		4:w		// Each thread uses 4 URB entries (1 r0 + 1 inline + 2 data)
+mul (1)	URBOffset:uw			Temp1_W:uw		2:w		// Each thread uses 2 URB entries (1 r0 + 1 inline)
+mov (1) URBWriteMsgDesc:ud		MSG_LEN(2)+URBWMSGDSC:ud			// Msg descriptor: URB write msg length = 3
+#include "writeURB.asm"
+
+add		(1)		Temp1_W:w	Temp1_W:w	1:w				// Increase block count
+cmp.l.f0.0 (1) 	null		Temp1_W:w	MBsCntY:w		// Check the block count limit
+(f0.0) jmpi		ILDB_LABEL(ILDB_INIT_URB_Y)							// Loop back
+
+mov		(1)		EntrySignature:w			0xFFF0:w
+
+#endif
+/////////////////////////////////////////////////////////////////////////////////////
+
+
+// Set global variable
+mov (32) 	ChildParam:uw			0:uw								// Reset local variables, 2 GRFs
+//mul	(1)	 	TotalBlocks:w			MBsCntX:w		MBsCntY:w			// Total # of blocks
+//add	(1)	 	GatewayApertureE:w		MBsCntY:w 		GatewayApertureB:w	// Aperture End = aperture Head + BlockCntY
+
+// 4 URB entries for Y:
+// Entry 0 - Child thread R0Hdr
+// Entry 1 - input parameter to child kernel (child r1)
+// Entry 2 - Prev MB data Y 4x16, col 1 and col 0
+// Entry 3 - Prev MB data Y 4x16, col 3 and col 2
+
+#undef 		URB_ENTRIES_PER_MB
+#define 	URB_ENTRIES_PER_MB		4
+
+// URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
+mov (1) 	URB_EntriesPerMB_2:w		URB_ENTRIES_PER_MB-1:w
+shl (1) 	URB_EntriesPerMB_2:w		URB_EntriesPerMB_2:w	10:w
+
+shr (1)		ThreadLimit:w		MaxThreads:w	1:w			// Initial luma thread limit to 50%
+mul	(1)	 	TotalBlocks:w		MBsCntX:w		MBsCntY:w	// MBs to be processed count down from TotalBlocks
+
+//***** Init CT_R0Hdr fields that are common to all threads *************************
+mov (8)		CT_R0Hdr.0:ud		r0.0<8;8,1>:ud				// Init to root R0 header
+mov (1) 	CT_R0Hdr.7:ud		r0.6:ud						// Copy Parent Thread Cnt; JJ did the change on 06/20/2006
+mov (1) 	CT_R0Hdr.31:ub		0:w							// Reset the highest byte
+mov (1) 	CT_R0Hdr.3:ud 		0x00000000	 
+mov (1) 	CT_R0Hdr.6:uw 		sr0.0:uw					// sr0.0: state reg contains general thread states, e.g. EUID/TID.
+
+//***** Init ChildParam fields that are common to all threads ***********************
+mov (8) 	ChildParam<1>:ud	RootParam<8;8,1>:ud		// Copy all root parameters
+mov (4)		CurCol<1>:w			0:w						// Reset CurCol, CurRow, 
+add	(2)		LastCol<1>:w		MBsCntX<2;2,1>:w		-1:w	// Get LastCol and LastRow
+	
+mov (1) 	URBWriteMsgDesc:ud		MSG_LEN(2)+URBWMSGDSC:ud
+
+//===================================================================================
+
+#include "AVC_ILDB_OpenGateway.asm"		// Open gateway for receiving notification 
+
+#if defined(DEV_CL)	
+	mov	(1)		URBOffset:uw		240:uw	// Use chroma URB offset to spawn chroma root
+#else
+	mov	(1)		URBOffset:uw		320:uw	// Use chroma URB offset to spawn chroma root
+#endif
+
+#include "AVC_ILDB_SpawnChromaRoot.asm"	// Spawn chroma root
+
+mov	(1)		URBOffset:uw		0:uw	// Use luma URB offset to spawn luma child 
+mov	(1)		ChildThreadsID:uw	2:uw	// Starting ChildThreadsID for luma child threads
+
+#include "AVC_ILDB_Dep_Check.asm"  		// Check dependency and spawn all luma child threads in parallel with chroma root
+
+
+// Wait for UV root thread to finish
+ILDB_LABEL(WAIT_FOR_UV):
+cmp.l.f0.0 (1) null:w	ThreadLimit:w		MaxThreads:w
+(f0.0) 	jmpi 	ILDB_LABEL(WAIT_FOR_UV)
+
+
+#include "AVC_ILDB_CloseGateway.asm"	// Close root thread gateway 
+
+END_THREAD								// End of root thread
+
+#if !defined(COMBINED_KERNEL)		// For standalone kernel only
+.end_code
+
+.end_kernel
+#endif
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Spawn.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_Spawn.asm
@@ -0,0 +1,52 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//=============== Spawn a child thread for a vertical child ===============
+
+#if defined(_DEBUG) 
+	mov	(1)		EntrySignature:w	0x6666:w
+#endif
+	
+	mul	(1)		URBOffset:uw		CurRow:uw		2:w // 5:w			// Each row uses 5 URB entries (R0, child R0, 3 GRFs of data from left MB)
+
+	mov (8)		CT_R0Hdr.0:ud		r0.0<8;8,1>:ud				// Init to root R0 header
+	
+	// R0.2: Interface Discriptor Ptr.  Add offset 16 for next Interface Discriptor for child kernel
+	add (1) 	CT_R0Hdr.2:ud 		r0.2:ud 		IDesc_Child_Offset:w
+	
+	#include "AVC_ILDB_SpawnChild.asm"
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_SpawnChild.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_SpawnChild.asm
@@ -0,0 +1,85 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//=============== Spawn a child thread for Luma or Chroma ===============
+
+	//----- Create child thread R0 header -----
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignature:w			0xAAAA:w
+#endif
+
+	//***** Set CT_R0Hdr fields that change for every thread 
+	
+	// Restore CT_R0Hdr.4:ud to r0.4:ud 
+	mov (1) CT_R0Hdr.4:ud		r0.4:ud
+
+	// R0.2: Interface Discriptor Ptr.  Add a child offset for child kernel
+//	add (1) CT_R0Hdr.2:ud 		r0.2:ud 		CHILD_OFFSET:w
+
+	// Assign a new Thread Count for this child
+	mov (1) CT_R0Hdr.6:ud 		ChildThreadsID:uw
+
+	//----- Prepare URB for launching a child thread -----
+	mov (16) m2.0:w		ChildParam<16;16,1>:w
+
+	shr (1)	 MRF0.0:uw	URBOffset:uw	1:w
+
+	add	(1)	ChildThreadsID:uw		ChildThreadsID:uw	2:uw	// Luma child=even, chroma child=odd
+		
+	//--------------------------------------------------
+//	#include "writeURB.asm"
+	send  null:uw 	MRF0	 null:ud	URBWRITE	URBWriteMsgDesc:ud		// URB write	
+
+	//--------------------------------------------------
+	// Set URB handle for child thread launching:
+	// URB handle Length	 	(bit 15:10) - 0000 0000 0000 0000  yyyy yy00 0000 0000
+	// URB handle offset  		(bit 9:0) 	- 0000 0000 0000 0000  0000 00xx xxxx xxxx
+
+	or  (1) CT_R0Hdr.4:ud		URB_EntriesPerMB_2:w	URBOffset:uw
+	
+	// 2 URB entries:
+	// Entry 0 - CT_R0Hdr
+	// Entry 1 - input parameter to child kernel
+
+	//----- Spawn a child now -----
+	send (8) null:ud 	CT_R0Hdr	  null:ud    TS	TSMSGDSC
+//	send (8) null:ud 	CT_Spawn_Reg	null:ud    0x07100001
+
+
+	// Restore CT_R0Hdr.4:ud to r0.4:ud for next use
+//	mov (1) CT_R0Hdr.4:ud		r0.4:ud
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_SpawnChromaRoot.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/AVC_ILDB_SpawnChromaRoot.asm
@@ -0,0 +1,77 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//=============== Spawn a chroma root thread ===============
+
+	//----- Create chroma root thread R0 header -----
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignature:w			0xAABA:w
+#endif
+
+
+
+	// Restore CT_R0Hdr.4:ud to r0.4:ud 
+//	mov (1) CT_R0Hdr.4:ud		r0.4:ud
+
+	// R0.2: Interface Discriptor Ptr.  Add child offset for child kernel
+	add (1) CT_R0Hdr.2:ud 		r0.2:ud 		CHROMA_ROOT_OFFSET:w
+
+	// Assign a new Thread Count for this child
+	mov (1) CT_R0Hdr.6:ud 		1:w		// ThreadID=1 for chroma root
+
+	//----- Copy luma root r1 for launching chroma root thread -----
+	mov (16) m2.0:w		RootParam<16;16,1>:w
+
+	#include "writeURB.asm"
+
+	//--------------------------------------------------
+	// Set URB handle for child thread launching:
+	// URB handle Length	 	(bit 15:10) - 0000 0000 0000 0000  yyyy yy00 0000 0000
+	// URB handle offset  		(bit 9:0) 	- 0000 0000 0000 0000  0000 00xx xxxx xxxx
+
+	or  (1) CT_R0Hdr.4:ud		URB_EntriesPerMB_2:w	URBOffset:uw
+	
+	// 2 URB entries:
+	// Entry 0 - CT_R0Hdr
+	// Entry 1 - input parameter to child kernel
+
+	//----- Spawn a child now -----
+	send (8) null:ud 	CT_R0Hdr	null:ud    TS	TSMSGDSC
+
+	// Restore CT_R0Hdr.4:ud to r0.4:ud for next use 
+	mov (1) CT_R0Hdr.4:ud		r0.4:ud
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Child_Undefs.inc
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Child_Undefs.inc
@@ -0,0 +1,54 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Modual name: Child_Undefs.inc
+//
+// Undefine global symbols for new process in child thread
+//
+
+#undef		P1	
+#undef		P2	
+#undef		P3	
+#undef		P4
+#undef		P5
+#undef		P6
+#undef		P7
+#undef		P8
+#undef		EDGECNTLMAP
+#undef		CLIP_NEGATIVE
+#undef		CLIP_DONE
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/ILDB_header.inc
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/ILDB_header.inc
@@ -0,0 +1,336 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#if !defined(__ILDB_HEADER__)	// Make sure this file is only included once
+#define __ILDB_HEADER__
+
+// Module name: ILDB_header.inc
+//
+
+.default_execution_size	(16)
+.default_register_type	:ub
+
+#undef NULLREG
+#undef RETURN_REG
+#undef EOTMSGDSC
+#undef MSGSRC
+#undef END_THREAD
+#undef TSMSGDSC
+
+//  ----------- Common constant definitions ------------
+//
+//  Bit position constants
+//
+#define BIT0	0x01
+#define BIT1	0x02
+#define BIT2	0x04
+#define BIT3	0x08
+#define BIT4	0x10
+#define BIT5	0x20
+#define BIT6	0x40
+#define BIT7	0x80
+#define BIT8	0x0100
+#define BIT9	0x0200
+#define BIT10	0x0400
+#define BIT11	0x0800
+#define BIT12	0x1000
+#define BIT13	0x2000
+#define BIT14	0x4000
+#define BIT15	0x8000
+#define BIT16	0x00010000
+#define BIT17	0x00020000
+#define BIT18	0x00040000
+#define BIT19	0x00080000
+#define BIT20	0x00100000
+#define BIT21	0x00200000
+#define BIT22	0x00400000
+#define BIT23	0x00800000
+#define BIT24	0x01000000
+#define BIT25	0x02000000
+#define BIT26	0x04000000
+#define BIT27	0x08000000
+#define BIT28	0x10000000
+#define BIT29	0x20000000
+#define BIT30	0x40000000
+#define BIT31	0x80000000
+
+// Common constants
+//
+#define	INST_SIZE	16	// Instruction size in byte
+
+#define	GRFWIB	32		// GRF register width in byte
+#define	GRFWIW	16		// GRF register width in word
+#define	GRFWID	8		// GRF register width in dword
+
+#define TOP_FIELD	0
+#define BOTTOM_FIELD	1
+
+#define PREVIOUS_FRAME	0	// Previous frame
+#define CURRENT_FRAME	1	// Current frame
+#define NEXT_FRAME	2	// Next frame
+
+#define Y_ROW_WIDTH		16	// in bytes
+#define UV_ROW_WIDTH	8
+
+//  Useful macros
+//
+#define REGION(Width,HStride) <Width*HStride;Width,HStride>	// Region definition when ExecSize = Width
+
+#define NULLREG		null<1>:d
+#define NULLREGW	null<1>:w
+
+#define RETURN_REG	r62		// Return pointer for all sub-routine calls (type DWORD)
+
+#define CALL(subFunc, skipInst)	add (1) RETURN_REG<1>:ud   ip:ud	(1+skipInst)*INST_SIZE \n\
+				jmpi (1) subFunc
+
+#define	RETURN		mov (1)	ip:ud	RETURN_REG<0;1,0>:ud		// Return to calling module
+
+#define PRED_CALL(flag, subFunc, skipInst)	add (1) RETURN_REG<1>:ud   ip:ud	(1+skipInst)*INST_SIZE \n\
+			(flag)	jmpi (1) subFunc
+
+
+//  Definitions for surface states, GRF regions, and common data fields
+//
+//  Note: Each kernel needs to define a specific symbol before including this
+//  header file to ensure correct definitions.
+//
+
+
+
+#if defined(AVC_ILDB)	
+
+.reg_count_total    64
+.reg_count_payload  4
+
+    // Binding Table Index
+	#define	BI_CNTRL_DATA		0			// Control data map  
+    #define	BI_SRC_Y			1
+    #define	BI_SRC_UV			2
+    #define	BI_DEST_Y			3
+    #define	BI_DEST_UV			4
+	
+
+	//========== Left MB, 4x16 in r2 and r3 ==========
+	#define		PREV_MB_Y_BASE	64		//2*GRFWIB												// Byte offset to r2
+	.declare    PREV_MB_YD  Base=r2 	ElementSize=4 SrcRegion=REGION(8,1) Type=ud
+	.declare    PREV_MB_YW  Base=r2 	ElementSize=2 SrcRegion=REGION(8,1) Type=uw
+    .declare    PREV_MB_YB  Base=r2 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+    
+    #define		PREV_MB_U_BASE	64		//2*GRFWIB		// seperate thread from Y				// Byte offset to r2
+	.declare	PREV_MB_UD	Base=r2 	ElementSize=4 SrcRegion=REGION(8,1) Type=ud
+    .declare    PREV_MB_UW  Base=r2 	ElementSize=2 SrcRegion=REGION(16,1) Type=uw
+    .declare    PREV_MB_UB  Base=r2 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+    
+	#define		PREV_MB_V_BASE	65		//2*GRFWIB+1		// NV12									// Byte offset to r2.1
+    .declare    PREV_MB_VB	Base=r2.1 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+
+	//========== Top MB, 16x4 in r4 and r5 ==========	
+	#define		TOP_MB_Y_BASE	128		//4*GRFWIB												// Byte offset to r4
+	.declare    TOP_MB_YD	Base=r4		ElementSize=4 SrcRegion=REGION(8,1) Type=ud
+	.declare    TOP_MB_YW	Base=r4		ElementSize=2 SrcRegion=REGION(8,1) Type=uw
+    .declare    TOP_MB_YB	Base=r4		ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+    
+    #define		TOP_MB_U_BASE	128		//4*GRFWIB		// seperate thread from Y				// Byte offset to r4
+	.declare	TOP_MB_UD	Base=r4 	ElementSize=4 SrcRegion=REGION(8,1) Type=ud
+    .declare    TOP_MB_UW	Base=r4		ElementSize=2 SrcRegion=REGION(16,1) Type=uw
+    .declare    TOP_MB_UB	Base=r4 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+    
+	#define		TOP_MB_V_BASE	129		//4*GRFWIB+1		// NV12									// Byte offset to r4.1
+    .declare    TOP_MB_VB  Base=r4.1 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+
+    
+    //========== Current MB, 16x16 in r6-r13 ==========
+	#define		SRC_MB_Y_BASE	192 	//6*GRFWIB												// Byte offset to r6
+    .declare    SRC_YD  	Base=r6 	ElementSize=4 SrcRegion=REGION(8,1) Type=ud		// For read and write, 8 GRFs
+    .declare    SRC_YW  	Base=r6 	ElementSize=2 SrcRegion=REGION(8,1) Type=uw
+    .declare    SRC_YB  	Base=r6 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub	// 8 GRFs	
+
+	#define		SRC_MB_U_BASE	192		//6*GRFWIB		// seperate thread from Y				// Byte offset to r6
+    .declare    SRC_UD  	Base=r6 	ElementSize=4 SrcRegion=REGION(8,1) Type=ud		// For read and write, 2 GRFs
+    .declare    SRC_UW  	Base=r6 	ElementSize=2 SrcRegion=REGION(16,1) Type=uw	// For read and write, 4 GRFs
+    .declare    SRC_UB  	Base=r6 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub	// 2 GRFs
+          
+	#define		SRC_MB_V_BASE	193		// 6*GRFWIB+1		// NV12									// Byte offset to r6.1
+    .declare    SRC_VD  	Base=r6.1 	ElementSize=4 SrcRegion=REGION(8,1) Type=ud		// For read and write, 2 GRFs
+    .declare    SRC_VW  	Base=r6.1 	ElementSize=2 SrcRegion=REGION(16,1) Type=uw	// For read and write, 4 GRFs
+    .declare    SRC_VB  	Base=r6.1 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub	// 2 GRFs
+
+#else				// No kernel specified, define nothing.
+
+.reg_count_total    64
+.reg_count_payload  2
+
+	#define	SRCAOFF	r1.0:ud		// Offset into alpha data
+	#define	SRCOFF	r1.1:ud		// Offset into source YUV data
+	#define ORIX	r1.4		// :w, H. origin of the destination block in pel
+	#define ORIY	r1.5		// :w, V. origin of the destination block in pel
+
+#endif
+
+//  ----------- Message Payload Header fields------------
+//
+#define IDP		r0.2:ud		// Interface Descriptor Pointer
+#define	BTP		r0.4:ud		// Binding Table Pointer
+
+//  ----------- Common Message Descriptor ------------
+//
+#ifdef DEV_ILK
+#define GW_DCN				// Should be enabled only for ILK-B0 and beyond
+#define MSG_GW		0x03		// Message Gateway
+#define MSG_GW_EOT	0x23		// Message Gateway plus EOT bit set (For ILK only)
+#define DAPREAD		0x04		// Data Port Read Extended Message Descriptor,
+#define DAPWRITE	0x05		// Data Port Write Extended Message Descriptor,
+#define URBWRITE	0x06		// URB
+#define TS			0x07		// Thread Spawner Extended Message Descriptor
+#define TS_EOT		0x27		// End of Thread Extended Message Descriptor
+
+#define EOTMSGDSC	0x02000000	// End of Thread Message Descriptor /w URB handle dereferenced (used by root kernel)
+#define CHILD_EOTMSGDSC	0x02000012	// End of Child Thread Message Descriptor w/o URB handle dereferenced
+
+// Data Port Message Descriptor
+#define DWBRMSGDSC_RC	 0x02086000	// DWORD Block Read Message Descriptor, reading from render cache = 6.
+#define DWBRMSGDSC_RC_TF 0x02086600	// DWORD Block Read Message Descriptor, reading from render cache = 6.
+#define DWBRMSGDSC_RC_BF 0x02086700	// DWORD Block Read Message Descriptor, reading from render cache = 6.
+#define DWBRMSGDSC_SC	 0x0208A000	// DWORD Block Read Message Descriptor, reading from sampler cache = A.
+#define DWBRMSGDSC_SC_TF 0x0208E600	// DWORD Block Read Message Descriptor, reading top field from field mode sampler cache.
+#define DWBRMSGDSC_SC_BF 0x0208E700	// DWORD Block Read Message Descriptor, reading bottom field from field mode sampler cache.
+
+#define ILDBRMSGDSC		 0x02085800	// AVC ILDB Control Data Read Msg Desc on Bearlake-C
+
+#define DWBWMSGDSC		 0x02082000	// DWORD Block Write Message Descriptor
+#define DWBWMSGDSC_WC 	 0x0218A000	// DWORD Block Write Message Descriptor + write commit
+
+// URB Message Descriptor
+#define	URBWMSGDSC	0x02080000		// URB Write Message Descriptor
+
+// Thread Spawner Message Descriptor
+#define	TSMSGDSC	0x02000001
+
+// Message Gateway Message Descriptors
+#define OGWMSGDSC	0x02000000	// OpenGateway Message Descriptor
+#define CGWMSGDSC	0x02000001	// CloseGateway Message Descriptor
+#define FWDMSGDSC	0x02000002	// ForwardMsg Message Descriptor
+#define	NOTIFYMSG	0x00008000	// Send notification with ForwardMsg message
+
+#define	RESP_LEN(len)	0x100000*len
+#define MSG_LEN(len)	0x2000000*len
+
+#else	// Pre DEV_ILK
+
+#define MSG_GW
+#define MSG_GW_EOT
+#define DAPREAD
+#define DAPWRITE
+#define URBWRITE
+#define TS
+#define TS_EOT
+
+#define EOTMSGDSC	0x87100000		// End of Thread Message Descriptor /w URB handle dereferenced (used by root kernel) 
+#define CHILD_EOTMSGDSC	0x87100012	// End of Child Thread Message Descriptor w/o URB handle dereferenced
+
+// Data Port Message Descriptor
+#define DWBRMSGDSC_RC	 0x04106000	// DWORD Block Read Message Descriptor, reading from render cache = 6.
+#define DWBRMSGDSC_RC_TF 0x04106600	// DWORD Block Read Message Descriptor, reading from render cache = 6.
+#define DWBRMSGDSC_RC_BF 0x04106700	// DWORD Block Read Message Descriptor, reading from render cache = 6.
+#define DWBRMSGDSC_SC	 0x0410A000	// DWORD Block Read Message Descriptor, reading from sampler cache = A.
+#define DWBRMSGDSC_SC_TF 0x0410A600	// DWORD Block Read Message Descriptor, reading top field from field mode sampler cache.
+#define DWBRMSGDSC_SC_BF 0x0410A700	// DWORD Block Read Message Descriptor, reading bottom field from field mode sampler cache.
+
+#define ILDBRMSGDSC		 0x04105800	// AVC ILDB Control Data Read Msg Desc on Bearlake-C
+
+#define DWBWMSGDSC		 0x05102000	// DWORD Block Write Message Descriptor
+#define DWBWMSGDSC_WC 	 0x0511A000	// DWORD Block Write Message Descriptor + write commit
+
+// URB Message Descriptor
+#define	URBWMSGDSC	0x06100000		// URB Write Message Descriptor
+
+// Thread Spawner Message Descriptor
+#define	TSMSGDSC	0x07100001
+
+// Message Gateway Message Descriptors
+#define OGWMSGDSC	0x03100000	// OpenGateway Message Descriptor
+#define CGWMSGDSC	0x03100001	// CloseGateway Message Descriptor
+#define FWDMSGDSC	0x03100002	// ForwardMsg Message Descriptor
+#define	NOTIFYMSG	0x00008000	// Send notification with ForwardMsg message
+
+#define	RESP_LEN(len)	0x10000*len
+#define MSG_LEN(len)	0x100000*len
+
+//	bits 15 - 0 = 01 011 000 00000000 = 0101 1000 0000 0000 = 5800
+// Render cache, AVC loop rd, 				  			
+#endif	// DEV_ILK
+				
+//	Enable frame/field selection in message descriptor
+#define ENMSGDSCFM	0x400		// Enable MSGDSC to select frame surface
+#define ENMSGDSCTF	0x600		// Enable MSGDSC to select top field surface
+#define ENMSGDSCBF	0x700		// Enable MSGDSC to select bottom field surface
+
+#define END_THREAD      send (8) NULLREG MSGHDR r0:ud TS_EOT	EOTMSGDSC
+#define END_CHILD_THREAD	send (8) NULLREG MSGHDR r0:ud TS_EOT	CHILD_EOTMSGDSC
+
+//  ----------- Message related register ------------
+//
+#define MSGHDR		m1		// Message Payload Header
+#define MSGHDRY		m1		// Message Payload Header register for Y data
+#define MSGHDRU		m2		// Message Payload Header register for U data
+#define MSGHDRV		m3		// Message Payload Header register for V data
+
+#define MSGHDRC		m1		// Message Payload Header register for CUR MB
+#define MSGHDRL		m2		// Message Payload Header register for LEFT MB
+#define MSGHDRT		m3		// Message Payload Header register for TOP MB
+
+#define MSGHDRYA	m4		// Second Message Payload Header register for Y data
+#define MSGSRC		r63		// Message source register
+#define MSGDSC		a0.0:ud		// Message Descriptor register (type DWORD)
+
+#define MH_ORI		MSGSRC.0	// DWORD block R/W message header block offset
+#define MH_ORIX		MSGSRC.0	// DWORD block R/W message header X offset
+#define MH_ORIY		MSGSRC.1	// DWORD block R/W message header Y offset
+#define MH_SIZE		MSGSRC.2	// DWORD block R/W message header block width & height
+
+
+//  M2 - M9 for message data payload
+.declare    MSGPAYLOADB	Base=m2 ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+.declare    MSGPAYLOADW	Base=m2 ElementSize=2 SrcRegion=REGION(16,1) Type=uw
+.declare    MSGPAYLOADD	Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
+.declare    MSGPAYLOADF	Base=m2 ElementSize=4 SrcRegion=REGION(8,1) Type=f
+
+// End of ILDB_header.inc
+
+#endif	// !defined(__ILDB_HEADER__)
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Load_ILDB_Cntrl_Data.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Load_ILDB_Cntrl_Data.asm
@@ -0,0 +1,110 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_ILDB_Cntrl_Data.asm
+//
+// This module loads AVC ILDB control data for one MB.  
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	CNTRL_DATA_D:	CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 8 GRFs
+//
+//	Binding table index: 
+//	BI_CNTRL_DATA:	Binding table index of control data surface
+//
+//----------------------------------------------------------------
+
+	// We need to get control data offset for the bottom MB in mbaff mode.
+	// That is, get f0.1=1 if MbaffFlag==1 && BotFieldFlag==1
+	and (1)	CTemp1_W:uw 		BitFields:uw  	MbaffFlag+BotFieldFlag:uw	// Mute all other bits
+
+	and.nz.f0.0	(1)	null:w		BitFields:w		CntlDataExpFlag:w			// Get CntlDataExpFlag
+
+	cmp.e.f0.1 (1) NULLREGW 	CTemp1_W:uw  	MbaffFlag+BotFieldFlag:uw	// Check mbaff and bot flags
+
+	(f0.0)  jmpi	ILDB_LABEL(READ_BLC_CNTL_DATA) 
+
+	// On Crestline, MB control data in memory occupy 64 DWs (expanded).  
+//    mov (1)	MSGSRC.0<1>:ud	0:w						{ NoDDClr }				// Block origin X
+//    mov (1)	MSGSRC.1<1>:ud	CntrlDataOffsetY:ud		{ NoDDClr, NoDDChk }	// Block origin Y
+//    mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud			{ NoDDChk }				// Block width and height (16x16=256 bytes)
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_CUR<2;2,1>:uw			{ NoDDClr }				// Block origin X,Y
+    mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud				{ NoDDChk }				// Block width and height (16x16=256 bytes)
+
+	(f0.1) add (1)  MSGSRC.1:ud		MSGSRC.1:ud		16:w	// +16 to for bottom MB in a pair
+
+    send (8) CNTRL_DATA_D(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	DAPREAD	DWBRMSGDSC_SC+0x00080000+BI_CNTRL_DATA	// Receive 8 GRFs
+	jmpi	ILDB_LABEL(READ_CNTL_DATA_DONE)
+	
+	
+ILDB_LABEL(READ_BLC_CNTL_DATA):
+	// On Bearlake-C, MB control data in memory occupy 16 DWs. Data port returns 8 GRFs with expanded control data.
+
+	// Global offset
+	mov (1)	MSGSRC.2:ud		CntrlDataOffsetY:ud	// CntrlDataOffsetY is the global offset
+
+	(f0.1) add (1) MSGSRC.2:ud		MSGSRC.2:ud		64:w	// +64 to the next MB control data (bot MB)
+
+    send (8) CNTRL_DATA_D(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(8)+ILDBRMSGDSC+BI_CNTRL_DATA	// Receive 8 GRFs
+
+ILDB_LABEL(READ_CNTL_DATA_DONE):
+
+// End of load_ILDB_Cntrl_Data.asm
+
+
+
+
+// AVC ILDB control data message header format
+
+//DWord	Bit	Description
+//M0.7	31:0	Debug 
+//M0.6	31:0	Debug
+//M0.5	31:8	Ignored
+//		7:0		Dispatch ID. // This ID is assigned by the fixed function unit and is a unique identifier for the thread.  It is used to free up resources used by the thread upon thread completion.
+//M0.4	31:0	Ignored
+//M0.3	31:0	Ignored
+//M0.2	31:0	Global Offset. Specifies the global byte offset into the buffer.
+				//	This offset must be OWord aligned (bits 3:0 MBZ) Format = U32 Range = [0,FFFFFFF0h]
+//M0.1	31:0	Ignored
+//M0.0	31:0	Ignored
+
+
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Load_ILDB_Cntrl_Data_16DW.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Load_ILDB_Cntrl_Data_16DW.asm
@@ -0,0 +1,92 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_ILDB_Cntrl_Data_16DW.asm
+//
+// This module loads AVC ILDB 64DW control data for one MB CTG. 
+// Dataport expands from 16DW to 64DW.
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	CNTRL_DATA_D:	CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 8 GRFs
+//
+//	Binding table index: 
+//	BI_CNTRL_DATA:	Binding table index of control data surface
+//
+//----------------------------------------------------------------
+
+// On CTG, MB control data in memory occupy 16 DWs. Data port returns 8 GRFs with expanded control data.
+
+#if defined(_MBAFF) 
+	// We need to get control data offset for the bottom MB in mbaff mode.
+	// That is, get f0.1=1 if MbaffFlag==1 && BotFieldFlag==1
+//	and (1)	CTemp1_W:uw 		BitFields:uw  	MbaffFlag+BotFieldFlag:uw	// Mute all other bits
+//	cmp.e.f0.1 (1) NULLREGW 	CTemp1_W:uw  	MbaffFlag+BotFieldFlag:uw	// Check mbaff and bot flags
+		
+	and.ne.f0.1 (1)	NULLREGW 	BitFields:uw  	BotFieldFlag:uw
+
+	// Global offset
+	mov (1)	MSGSRC.2:ud		CntrlDataOffsetY:ud
+		
+	(f0.1) add (1) MSGSRC.2:ud		MSGSRC.2:ud		64:w	// +64 to the next MB control data (bot MB)
+#endif
+
+    send (8) CNTRL_DATA_D(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(8)+ILDBRMSGDSC+BI_CNTRL_DATA	// Receive 8 GRFs
+
+// End of load_ILDB_Cntrl_Data_16DW.asm
+
+
+
+// AVC ILDB control data message header format
+
+//DWord	Bit	Description
+//M0.7	31:0	Debug 
+//M0.6	31:0	Debug
+//M0.5	31:8	Ignored
+//		7:0		Dispatch ID. // This ID is assigned by the fixed function unit and is a unique identifier for the thread.  It is used to free up resources used by the thread upon thread completion.
+//M0.4	31:0	Ignored
+//M0.3	31:0	Ignored
+//M0.2	31:0	Global Offset. Specifies the global byte offset into the buffer.
+				//	This offset must be OWord aligned (bits 3:0 MBZ) Format = U32 Range = [0,FFFFFFF0h]
+//M0.1	31:0	Ignored
+//M0.0	31:0	Ignored
+
+
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Load_ILDB_Cntrl_Data_22DW.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Load_ILDB_Cntrl_Data_22DW.asm
@@ -0,0 +1,66 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_ILDB_Cntrl_Data_22DW.asm
+//
+// ********** Apple only module **********
+//
+// This module loads AVC ILDB 22DW control data for one MB for CLN.
+// The reduced control data set is for progressive picture ONLY.
+//
+// Control data memory layout for each MB is 8x11 = 88 bytes.  
+// It ocuppies 3 GRFs after reading in.
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	CNTRL_DATA_D:	CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 3 GRFs
+//
+//	Binding table index: 
+//	BI_CNTRL_DATA:	Binding table index of control data surface
+//
+//----------------------------------------------------------------
+
+    mul (1)	MSGSRC.0<1>:ud	ORIX:uw			8:uw		{ NoDDClr }				// Block origin X
+    mul (1)	MSGSRC.1<1>:ud	ORIY:uw			11:uw		{ NoDDClr, NoDDChk }	// Block origin Y
+    mov (1)	MSGSRC.2<1>:ud	0x000A0007:ud				{ NoDDChk }				// Block width and height (8x11=88 bytes)
+
+    send (8) CNTRL_DATA_D(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(3)+DWBRMSGDSC_SC+BI_CNTRL_DATA	// Receive 3 GRFs
+	
+// End of load_ILDB_Cntrl_Data_22DW.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Load_ILDB_Cntrl_Data_64DW.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Load_ILDB_Cntrl_Data_64DW.asm
@@ -0,0 +1,72 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_ILDB_Cntrl_Data_64DW.asm
+//
+// This module loads AVC ILDB 64DW control data for one MB for CLN.  
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	CNTRL_DATA_D:	CNTRL_DATA_D Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 8 GRFs
+//
+//	Binding table index: 
+//	BI_CNTRL_DATA:	Binding table index of control data surface
+//
+//----------------------------------------------------------------
+
+// On CLN, MB control data in memory occupy 64 DWs.
+
+#if defined(_MBAFF) 
+	// We need to get control data offset for the bottom MB in mbaff mode.
+	// That is, set f0.1=1 if MbaffFlag==1 && BotFieldFlag==1
+	and (1)	acc0.0:uw 		BitFields:uw  	MbaffFlag+BotFieldFlag:uw	// Mute all other bits
+	cmp.e.f0.1 (1) NULLREGW 	acc0.0:uw  	MbaffFlag+BotFieldFlag:uw	// Check mbaff and bot flags
+#endif		// CTemp1_W
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_CUR<2;2,1>:uw			{ NoDDClr }				// Block origin X,Y
+    mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud				{ NoDDChk }				// Block width and height (16x16=256 bytes)
+
+#if defined(_MBAFF) 
+	(f0.1) add (1) MSGSRC.1:ud	MSGSRC.1:ud		16:w	// +16 to the bottom MB control data (bot MB)
+#endif
+
+    send (8) CNTRL_DATA_D(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(8)+DWBRMSGDSC_SC+BI_CNTRL_DATA	// Receive 8 GRFs
+	
+// End of load_ILDB_Cntrl_Data_64DW.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Makefile.am
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Makefile.am
@@ -0,0 +1,98 @@
+INTEL_ILDB_INC = \
+	AVC_ILDB.inc				\
+	Child_Undefs.inc			\
+	ILDB_header.inc				\
+	Root_Undefs.inc				\
+	$(NULL)
+
+INTEL_ILDB_ASM = \
+	AVC_ILDB_Child_Field_UV.asm		\
+	AVC_ILDB_Child_Field_Y.asm		\
+	AVC_ILDB_Child_Mbaff_UV.asm		\
+	AVC_ILDB_Child_Mbaff_Y.asm		\
+	AVC_ILDB_Child_UV.asm			\
+	AVC_ILDB_Child_Y.asm			\
+	AVC_ILDB_Chroma_Core.asm		\
+	AVC_ILDB_Chroma_Core_Mbaff.asm		\
+	AVC_ILDB_CloseGateway.asm		\
+	AVC_ILDB_Dep_Check.asm			\
+	AVC_ILDB_Filter_Mbaff_UV_h.asm		\
+	AVC_ILDB_Filter_Mbaff_UV_v.asm		\
+	AVC_ILDB_Filter_Mbaff_Y_h.asm		\
+	AVC_ILDB_Filter_Mbaff_Y_v.asm		\
+	AVC_ILDB_Filter_UV_h.asm		\
+	AVC_ILDB_Filter_UV_v.asm		\
+	AVC_ILDB_Filter_Y_h.asm			\
+	AVC_ILDB_Filter_Y_v.asm			\
+	AVC_ILDB_ForwardMsg.asm			\
+	AVC_ILDB_LumaThrdLimit.asm		\
+	AVC_ILDB_Luma_Core.asm			\
+	AVC_ILDB_Luma_Core_Mbaff.asm		\
+	AVC_ILDB_OpenGateway.asm		\
+	AVC_ILDB_Root_Field_UV.asm		\
+	AVC_ILDB_Root_Field_Y.asm		\
+	AVC_ILDB_Root_Mbaff_UV.asm		\
+	AVC_ILDB_Root_Mbaff_Y.asm		\
+	AVC_ILDB_Root_UV.asm			\
+	AVC_ILDB_Root_Y.asm			\
+	AVC_ILDB_Spawn.asm			\
+	AVC_ILDB_SpawnChild.asm			\
+	AVC_ILDB_SpawnChromaRoot.asm		\
+	Load_ILDB_Cntrl_Data.asm		\
+	Load_ILDB_Cntrl_Data_16DW.asm		\
+	Load_ILDB_Cntrl_Data_22DW.asm		\
+	Load_ILDB_Cntrl_Data_64DW.asm		\
+	SetupVPKernel.asm			\
+	TransposeNV12_16x16.asm			\
+	TransposeNV12_4x16.asm			\
+	Transpose_Cur_UV_2x8.asm		\
+	Transpose_Cur_UV_8x8.asm		\
+	Transpose_Cur_UV_Right_Most_2x8.asm	\
+	Transpose_Cur_Y_16x16.asm		\
+	Transpose_Cur_Y_4x16.asm		\
+	Transpose_Cur_Y_Right_Most_4x16.asm	\
+	Transpose_Left_UV_2x8.asm		\
+	Transpose_Left_Y_4x16.asm		\
+	loadNV12_16x16T.asm			\
+	loadNV12_16x4.asm			\
+	load_Cur_UV_8x8T.asm			\
+	load_Cur_UV_8x8T_Mbaff.asm		\
+	load_Cur_UV_Right_Most_2x8.asm		\
+	load_Cur_Y_16x16T.asm			\
+	load_Cur_Y_16x16T_Mbaff.asm		\
+	load_Cur_Y_Right_Most_4x16.asm		\
+	load_Left_UV_2x8T.asm			\
+	load_Left_UV_2x8T_Mbaff.asm		\
+	load_Left_Y_4x16T.asm			\
+	load_Left_Y_4x16T_Mbaff.asm		\
+	load_Top_UV_8x2.asm			\
+	load_Top_UV_8x2_Mbaff.asm		\
+	load_Top_Y_16x4.asm			\
+	load_Top_Y_16x4_Mbaff.asm		\
+	saveNV12_16x16.asm			\
+	saveNV12_16x4.asm			\
+	saveNV12_16x4T.asm			\
+	save_Cur_UV_8x8.asm			\
+	save_Cur_UV_8x8_Mbaff.asm		\
+	save_Cur_Y_16x16.asm			\
+	save_Cur_Y_16x16_Mbaff.asm		\
+	save_Left_UV_8x2T.asm			\
+	save_Left_UV_8x2T_Mbaff.asm		\
+	save_Left_Y_16x4T.asm			\
+	save_Left_Y_16x4T_Mbaff.asm		\
+	save_Top_UV_8x2.asm			\
+	save_Top_UV_8x2_Mbaff.asm		\
+	save_Top_Y_16x4.asm			\
+	save_Top_Y_16x4_Mbaff.asm		\
+	writeURB.asm				\
+	writeURB_UV_Child.asm			\
+	writeURB_Y_Child.asm			\
+	$(NULL)
+
+EXTRA_DIST = \
+	$(INTEL_ILDB_ASM)			\
+	$(INTEL_ILDB_INC)			\
+	$(NULL)
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Makefile.in
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Makefile.in
@@ -0,0 +1,548 @@
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/shaders/h264/ildb
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/src/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DRM_CFLAGS = @DRM_CFLAGS@
+DRM_LIBS = @DRM_LIBS@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_LIBS = @EGL_LIBS@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GEN4ASM = @GEN4ASM@
+GEN4ASM_CFLAGS = @GEN4ASM_CFLAGS@
+GEN4ASM_LIBS = @GEN4ASM_LIBS@
+GIT = @GIT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_DRIVER_LT_LDFLAGS = @INTEL_DRIVER_LT_LDFLAGS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBDRM_VERSION = @LIBDRM_VERSION@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBVA_DEPS_CFLAGS = @LIBVA_DEPS_CFLAGS@
+LIBVA_DEPS_LIBS = @LIBVA_DEPS_LIBS@
+LIBVA_DRIVERS_PATH = @LIBVA_DRIVERS_PATH@
+LIBVA_DRM_DEPS_CFLAGS = @LIBVA_DRM_DEPS_CFLAGS@
+LIBVA_DRM_DEPS_LIBS = @LIBVA_DRM_DEPS_LIBS@
+LIBVA_PACKAGE_VERSION = @LIBVA_PACKAGE_VERSION@
+LIBVA_WAYLAND_DEPS_CFLAGS = @LIBVA_WAYLAND_DEPS_CFLAGS@
+LIBVA_WAYLAND_DEPS_LIBS = @LIBVA_WAYLAND_DEPS_LIBS@
+LIBVA_X11_DEPS_CFLAGS = @LIBVA_X11_DEPS_CFLAGS@
+LIBVA_X11_DEPS_LIBS = @LIBVA_X11_DEPS_LIBS@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+PYTHON2 = @PYTHON2@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+wayland_protocoldir = @wayland_protocoldir@
+wayland_scanner = @wayland_scanner@
+INTEL_ILDB_INC = \
+	AVC_ILDB.inc				\
+	Child_Undefs.inc			\
+	ILDB_header.inc				\
+	Root_Undefs.inc				\
+	$(NULL)
+
+INTEL_ILDB_ASM = \
+	AVC_ILDB_Child_Field_UV.asm		\
+	AVC_ILDB_Child_Field_Y.asm		\
+	AVC_ILDB_Child_Mbaff_UV.asm		\
+	AVC_ILDB_Child_Mbaff_Y.asm		\
+	AVC_ILDB_Child_UV.asm			\
+	AVC_ILDB_Child_Y.asm			\
+	AVC_ILDB_Chroma_Core.asm		\
+	AVC_ILDB_Chroma_Core_Mbaff.asm		\
+	AVC_ILDB_CloseGateway.asm		\
+	AVC_ILDB_Dep_Check.asm			\
+	AVC_ILDB_Filter_Mbaff_UV_h.asm		\
+	AVC_ILDB_Filter_Mbaff_UV_v.asm		\
+	AVC_ILDB_Filter_Mbaff_Y_h.asm		\
+	AVC_ILDB_Filter_Mbaff_Y_v.asm		\
+	AVC_ILDB_Filter_UV_h.asm		\
+	AVC_ILDB_Filter_UV_v.asm		\
+	AVC_ILDB_Filter_Y_h.asm			\
+	AVC_ILDB_Filter_Y_v.asm			\
+	AVC_ILDB_ForwardMsg.asm			\
+	AVC_ILDB_LumaThrdLimit.asm		\
+	AVC_ILDB_Luma_Core.asm			\
+	AVC_ILDB_Luma_Core_Mbaff.asm		\
+	AVC_ILDB_OpenGateway.asm		\
+	AVC_ILDB_Root_Field_UV.asm		\
+	AVC_ILDB_Root_Field_Y.asm		\
+	AVC_ILDB_Root_Mbaff_UV.asm		\
+	AVC_ILDB_Root_Mbaff_Y.asm		\
+	AVC_ILDB_Root_UV.asm			\
+	AVC_ILDB_Root_Y.asm			\
+	AVC_ILDB_Spawn.asm			\
+	AVC_ILDB_SpawnChild.asm			\
+	AVC_ILDB_SpawnChromaRoot.asm		\
+	Load_ILDB_Cntrl_Data.asm		\
+	Load_ILDB_Cntrl_Data_16DW.asm		\
+	Load_ILDB_Cntrl_Data_22DW.asm		\
+	Load_ILDB_Cntrl_Data_64DW.asm		\
+	SetupVPKernel.asm			\
+	TransposeNV12_16x16.asm			\
+	TransposeNV12_4x16.asm			\
+	Transpose_Cur_UV_2x8.asm		\
+	Transpose_Cur_UV_8x8.asm		\
+	Transpose_Cur_UV_Right_Most_2x8.asm	\
+	Transpose_Cur_Y_16x16.asm		\
+	Transpose_Cur_Y_4x16.asm		\
+	Transpose_Cur_Y_Right_Most_4x16.asm	\
+	Transpose_Left_UV_2x8.asm		\
+	Transpose_Left_Y_4x16.asm		\
+	loadNV12_16x16T.asm			\
+	loadNV12_16x4.asm			\
+	load_Cur_UV_8x8T.asm			\
+	load_Cur_UV_8x8T_Mbaff.asm		\
+	load_Cur_UV_Right_Most_2x8.asm		\
+	load_Cur_Y_16x16T.asm			\
+	load_Cur_Y_16x16T_Mbaff.asm		\
+	load_Cur_Y_Right_Most_4x16.asm		\
+	load_Left_UV_2x8T.asm			\
+	load_Left_UV_2x8T_Mbaff.asm		\
+	load_Left_Y_4x16T.asm			\
+	load_Left_Y_4x16T_Mbaff.asm		\
+	load_Top_UV_8x2.asm			\
+	load_Top_UV_8x2_Mbaff.asm		\
+	load_Top_Y_16x4.asm			\
+	load_Top_Y_16x4_Mbaff.asm		\
+	saveNV12_16x16.asm			\
+	saveNV12_16x4.asm			\
+	saveNV12_16x4T.asm			\
+	save_Cur_UV_8x8.asm			\
+	save_Cur_UV_8x8_Mbaff.asm		\
+	save_Cur_Y_16x16.asm			\
+	save_Cur_Y_16x16_Mbaff.asm		\
+	save_Left_UV_8x2T.asm			\
+	save_Left_UV_8x2T_Mbaff.asm		\
+	save_Left_Y_16x4T.asm			\
+	save_Left_Y_16x4T_Mbaff.asm		\
+	save_Top_UV_8x2.asm			\
+	save_Top_UV_8x2_Mbaff.asm		\
+	save_Top_Y_16x4.asm			\
+	save_Top_Y_16x4_Mbaff.asm		\
+	writeURB.asm				\
+	writeURB_UV_Child.asm			\
+	writeURB_Y_Child.asm			\
+	$(NULL)
+
+EXTRA_DIST = \
+	$(INTEL_ILDB_ASM)			\
+	$(INTEL_ILDB_INC)			\
+	$(NULL)
+
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/shaders/h264/ildb/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu src/shaders/h264/ildb/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+tags TAGS:
+
+ctags CTAGS:
+
+cscope cscopelist:
+
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+	-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+	cscopelist-am ctags-am distclean distclean-generic \
+	distclean-libtool distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags-am uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Root_Undefs.inc
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Root_Undefs.inc
@@ -0,0 +1,57 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Modual name: Root_Undefs.inc
+//
+// Undefine global symbols for new process in root thread
+//
+
+#undef 		READ_BI
+#undef 		WRITE_BI
+
+#undef		ILDB_H_INDEPENDENT
+#undef		ILDB_H_INDEPENDENT_CONT
+#undef		ILDB_H_DEPENDENT
+#undef		ILDB_H_DEPENDENT_SCAN
+#undef		ILDB_H_NO_DEPENDENT
+
+#undef		ILDB_V_INDEPENDENT
+#undef		ILDB_V_INDEPENDENT_CONT
+#undef		ILDB_V_DEPENDENT
+#undef		ILDB_V_DEPENDENT_SCAN
+#undef		ILDB_V_NO_DEPENDENT
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/SetupVPKernel.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/SetupVPKernel.asm
@@ -0,0 +1,54 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Modual name: SetupVPKernel.asm
+//
+// Initial setup for running video-processing kernels
+//
+
+#include "ILDB_header.inc"
+
+//
+//  Now, begin source code....
+//
+
+.code
+
+    mov (8)	MSGSRC.0<1>:ud	r0.0<8;8,1>:ud	// Initialize message payload header with R0
+
+// End of SetupVPKernel
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/TransposeNV12_16x16.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/TransposeNV12_16x16.asm
@@ -0,0 +1,165 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////
+//	Module name: TransposeNV12_16x16.asm
+//	
+//	Transpose a 16x16 NV12 MB.  The output is also in NV12
+//
+//----------------------------------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region is :ub
+//	SRC_YB:			SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub	// 8 GRFs
+//	SRC_UW:			SRC_UW Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw	// 4 GRFs
+//
+//  Temp buffer:
+//	BUF_B:			BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub		// 8 GRFs
+//	BUF_W:			BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// 4 GRFs
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDDA:w
+#endif
+
+
+// Transpose Y (16x16 bytes)
+
+// The first step
+mov (16)	BUF_B(0,0)<1>		SRC_YB(0,0)<16;4,1>
+mov (16)	BUF_B(0,16)<1>		SRC_YB(2,0)<16;4,1>
+mov (16)	BUF_B(1,0)<1>		SRC_YB(4,0)<16;4,1>
+mov (16)	BUF_B(1,16)<1>		SRC_YB(6,0)<16;4,1>
+
+mov (16)	BUF_B(2,0)<1>		SRC_YB(0,4)<16;4,1>
+mov (16)	BUF_B(2,16)<1>		SRC_YB(2,4)<16;4,1>
+mov (16)	BUF_B(3,0)<1>		SRC_YB(4,4)<16;4,1>
+mov (16)	BUF_B(3,16)<1>		SRC_YB(6,4)<16;4,1>
+
+mov (16)	BUF_B(4,0)<1>		SRC_YB(0,8)<16;4,1>
+mov (16)	BUF_B(4,16)<1>		SRC_YB(2,8)<16;4,1>
+mov (16)	BUF_B(5,0)<1>		SRC_YB(4,8)<16;4,1>
+mov (16)	BUF_B(5,16)<1>		SRC_YB(6,8)<16;4,1>
+
+mov (16)	BUF_B(6,0)<1>		SRC_YB(0,12)<16;4,1>
+mov (16)	BUF_B(6,16)<1>		SRC_YB(2,12)<16;4,1>
+mov (16)	BUF_B(7,0)<1>		SRC_YB(4,12)<16;4,1>
+mov (16)	BUF_B(7,16)<1>		SRC_YB(6,12)<16;4,1>
+
+// The second step
+mov (16)	SRC_YB(0,0)<1>		BUF_B(0,0)<32;8,4>
+mov (16)	SRC_YB(0,16)<1>		BUF_B(0,1)<32;8,4>
+mov (16)	SRC_YB(1,0)<1>		BUF_B(0,2)<32;8,4>
+mov (16)	SRC_YB(1,16)<1>		BUF_B(0,3)<32;8,4>
+
+mov (16)	SRC_YB(2,0)<1>		BUF_B(2,0)<32;8,4>
+mov (16)	SRC_YB(2,16)<1>		BUF_B(2,1)<32;8,4>
+mov (16)	SRC_YB(3,0)<1>		BUF_B(2,2)<32;8,4>
+mov (16)	SRC_YB(3,16)<1>		BUF_B(2,3)<32;8,4>
+
+mov (16)	SRC_YB(4,0)<1>		BUF_B(4,0)<32;8,4>
+mov (16)	SRC_YB(4,16)<1>		BUF_B(4,1)<32;8,4>
+mov (16)	SRC_YB(5,0)<1>		BUF_B(4,2)<32;8,4>
+mov (16)	SRC_YB(5,16)<1>		BUF_B(4,3)<32;8,4>
+
+mov (16)	SRC_YB(6,0)<1>		BUF_B(6,0)<32;8,4>
+mov (16)	SRC_YB(6,16)<1>		BUF_B(6,1)<32;8,4>
+mov (16)	SRC_YB(7,0)<1>		BUF_B(6,2)<32;8,4>
+mov (16)	SRC_YB(7,16)<1>		BUF_B(6,3)<32;8,4>
+
+// Y is transposed.
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// Src U and V are mixed in NV12 format. U on even bytes, V on odd bytes.
+// Transpose by treating UV pair as a word.
+
+
+// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//  First step 		(16)	<1>:w <==== <8;4,1>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|33 33 32 32 31 31 30 30 23 23 22 22 21 21 20 20 13 13 12 12 11 11 10 10 03 03 02 02 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 73 72 72 71 71 70 70 63 63 62 62 61 61 60 60 53 53 52 52 51 51 50 50 43 43 42 42 41 41 40 40|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|37 37 36 36 35 35 34 34 27 27 26 26 25 25 24 24 17 17 16 16 15 15 14 14 07 07 06 06 05 05 04 04|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 76 76 75 75 74 74 67 67 66 66 65 65 64 64 57 57 56 56 55 55 54 54 47 47 46 46 45 45 44 44|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+// Transpose UV (8x8 words), The first step
+mov (16)	BUF_W(0,0)<1>		SRC_UW(0,0)<8;4,1>
+mov (16)	BUF_W(1,0)<1>		SRC_UW(2,0)<8;4,1>
+mov (16)	BUF_W(2,0)<1>		SRC_UW(0,4)<8;4,1>
+mov (16)	BUF_W(3,0)<1>		SRC_UW(2,4)<8;4,1>
+
+
+//	Second step		(16)	<1>:w <=== <16;4,4>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 73 63 63 53 53 43 43 33 33 23 23 13 13 03 03 72 72 62 62 52 52 42 42 32 32 22 22 12 12 02 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|75 75 65 65 55 55 45 45 35 35 25 25 15 15 05 05 74 74 64 64 54 54 44 44 34 34 24 24 14 14 04 04|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 67 67 57 57 47 47 37 37 27 27 17 17 07 07 76 76 66 66 56 56 46 46 36 36 26 26 16 16 06 06|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+// Transpose UV (8x8 words), The second step
+mov (8)		SRC_UW(0,0)<1>		BUF_W(0,0)<16;4,4>
+mov (8)		SRC_UW(0,8)<1>		BUF_W(0,1)<16;4,4>
+mov (8)		SRC_UW(1,0)<1>		BUF_W(0,2)<16;4,4>
+mov (8)		SRC_UW(1,8)<1>		BUF_W(0,3)<16;4,4>
+mov (8)		SRC_UW(2,0)<1>		BUF_W(2,0)<16;4,4>
+mov (8)		SRC_UW(2,8)<1>		BUF_W(2,1)<16;4,4>
+mov (8)		SRC_UW(3,0)<1>		BUF_W(2,2)<16;4,4>
+mov (8)		SRC_UW(3,8)<1>		BUF_W(2,3)<16;4,4>
+
+// U and V are now transposed and separated.
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/TransposeNV12_4x16.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/TransposeNV12_4x16.asm
@@ -0,0 +1,124 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////
+//	Module name: TransposeNV12_4x16.asm
+//	
+//	Transpose a 4x16 internal planar to 16x4 internal planar block
+//
+//----------------------------------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region is :ub
+//	SRC_YB:			SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub	// 8 GRFs
+//	SRC_UW:			SRC_UB Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// 4 GRFs
+//
+//  Temp buffer:
+//	BUF_B:			BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub		// 8 GRFs
+//	BUF_W:			BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// 4 GRFs
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDDB:w
+#endif
+
+// Transpose Y (4x16) right most 4 columns
+
+// The first step
+mov (16)	BUF_B(0,0)<1>		SRC_YB(0,0)<16;4,1>		// Read 2 rows, write 1 row
+mov (16)	BUF_B(0,16)<1>		SRC_YB(2,0)<16;4,1>
+mov (16)	BUF_B(1,0)<1>		SRC_YB(4,0)<16;4,1>
+mov (16)	BUF_B(1,16)<1>		SRC_YB(6,0)<16;4,1>
+
+// The second step
+mov (16)	BUF_B(2,0)<1>		BUF_B(0,0)<32;8,4> 		// Read 2 rows, write 1 row
+mov (16)	BUF_B(2,16)<1>		BUF_B(0,1)<32;8,4>
+mov (16)	BUF_B(3,0)<1>		BUF_B(0,2)<32;8,4>
+mov (16)	BUF_B(3,16)<1>		BUF_B(0,3)<32;8,4>
+
+// Y is now transposed. the result is in BUF_B(2) and BUF_B(3).
+
+
+
+// Transpose UV (4x8),  right most 2 columns in word
+// Use BUF_W(0) as temp buf
+
+// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//  First step 		(8)	<1>:w <==== <8;2,1>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+mov (8)		BUF_W(0,0)<1>		SRC_UW(0,0)<8;2,1>
+mov (8)		BUF_W(0,8)<1>		SRC_UW(2,0)<8;2,1>
+
+//	Second step		(16) <1>:w <==== <1;8,2>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+mov (16)	BUF_W(1,0)<1>		BUF_W(0,0)<1;8,2>
+
+// UV are now transposed.  the result is in BUF_W(1).
+
+
+
+//The first step
+//mov (16)	BUF_B(0,0)<1>		SRC_UW(0,0)<8;2,1>		// Read 2 rows, write 1 row
+// The second step
+//mov (8)		SRC_UB(4,0)<1>		BUF_B(0,0)<16;8,2> 		// Read 1 row, write 1 row
+//mov (8)		SRC_UB(4,8)<1>		BUF_B(0,1)<16;8,2> 		// Read 1 row, write 1 row
+
+// Transpose V (8x8),  right most 2 columns
+// The first step
+//mov (16)	BUF_B(0,0)<1>		SRC_VB(0,1)<8;2,1>		// Read 2 rows, write 1 row
+// The second step
+//mov (8)		SRC_UB(4,16)<1>		BUF_B(0,0)<16;8,2> 		// Read 1 row, write 1 row
+//mov (8)		SRC_UB(4,24)<1>		BUF_B(0,1)<16;8,2> 		// Read 1 row, write 1 row
+
+// U and V are now transposed.  the result is in BUF_B(4).
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_UV_2x8.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_UV_2x8.asm
@@ -0,0 +1,86 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////
+//	Module name: Transpose_UV_2x8.asm
+//	
+//	Transpose UV 2x8 to 8x2 block (2x8U + 2x8V in NV12)
+//
+//----------------------------------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region is :ub
+//	SRC_UW:			SRC_UB Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// 4 GRFs
+//
+//  Temp buffer:
+//	BUF_W:			BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// 4 GRFs
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDDB:w
+#endif
+
+// Transpose UV (4x8),  right most 2 columns in word
+// Use BUF_W(0) as temp buf
+
+// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//  First step 		(8)	<1>:w <==== <8;2,1>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 76 76 67 67 66 66 57 57 56 56 47 47 46 46 37 37 36 36 27 27 26 26 17 17 16 16 07 07 06 06|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+mov (8)		LEFT_TEMP_W(0,0)<1>		SRC_UW(0,6)<8;2,1>		{ NoDDClr }
+mov (8)		LEFT_TEMP_W(0,8)<1>		SRC_UW(2,6)<8;2,1>		{ NoDDChk }
+
+//	Second step		(16) <1>:w <==== <1;8,2>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 67 67 57 57 47 47 37 37 27 27 17 17 07 07 76 76 66 66 56 56 46 46 36 36 26 26 16 16 06 06|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+mov (16)	LEFT_TEMP_W(1,0)<1>		LEFT_TEMP_W(0,0)<1;8,2>
+
+// UV are now transposed.  the result is in BUF_W(1)
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_UV_8x8.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_UV_8x8.asm
@@ -0,0 +1,115 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////
+//	Module name: Transpose_UV_8x8.asm
+//	
+//	Transpose a 8x8 UV block. (8x8U + 8x8V)  The output is also in NV12
+//
+//----------------------------------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region is :ub
+//	SRC_UW:			SRC_UW Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw	// 4 GRFs
+//
+//  Temp buffer:
+//	BUF_W:			BUF_W Base=rxx ElementSize=2 SrcRegion=REGION(8,1) Type=uw		// 4 GRFs
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDDA:w
+#endif
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// Src U and V are mixed in NV12 format. U on even bytes, V on odd bytes.
+// Transpose by treating UV pair as a word.
+
+
+// Src U 8x8 and V 8x8 are mixed. (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|17 17 16 16 15 15 14 14 13 13 12 12 11 11 10 10 07 07 06 06 05 05 04 04 03 03 02 02 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|37 37 36 36 35 35 34 34 33 33 32 32 31 31 30 30 27 27 26 26 25 25 24 24 23 23 22 22 21 21 20 20|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|57 57 56 56 55 55 54 54 53 53 52 52 51 51 50 50 47 47 46 46 45 45 44 44 43 43 42 42 41 41 40 40|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 76 76 75 75 74 74 73 73 72 72 71 71 70 70 67 67 66 66 65 65 64 64 63 63 62 62 61 61 60 60|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//  First step 		(16)	<1>:w <==== <8;4,1>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|33 33 32 32 31 31 30 30 23 23 22 22 21 21 20 20 13 13 12 12 11 11 10 10 03 03 02 02 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 73 72 72 71 71 70 70 63 63 62 62 61 61 60 60 53 53 52 52 51 51 50 50 43 43 42 42 41 41 40 40|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|37 37 36 36 35 35 34 34 27 27 26 26 25 25 24 24 17 17 16 16 15 15 14 14 07 07 06 06 05 05 04 04|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 76 76 75 75 74 74 67 67 66 66 65 65 64 64 57 57 56 56 55 55 54 54 47 47 46 46 45 45 44 44|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+// Transpose UV (8x8 words), The first step
+mov (16)	CUR_TEMP_W(0,0)<1>		SRC_UW(0,0)<8;4,1>
+mov (16)	CUR_TEMP_W(1,0)<1>		SRC_UW(2,0)<8;4,1>
+mov (16)	CUR_TEMP_W(2,0)<1>		SRC_UW(0,4)<8;4,1>
+mov (16)	CUR_TEMP_W(3,0)<1>		SRC_UW(2,4)<8;4,1>
+
+
+//	Second step		(16)	<1>:w <=== <16;4,4>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 73 63 63 53 53 43 43 33 33 23 23 13 13 03 03 72 72 62 62 52 52 42 42 32 32 22 22 12 12 02 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|75 75 65 65 55 55 45 45 35 35 25 25 15 15 05 05 74 74 64 64 54 54 44 44 34 34 24 24 14 14 04 04|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|77 77 67 67 57 57 47 47 37 37 27 27 17 17 07 07 76 76 66 66 56 56 46 46 36 36 26 26 16 16 06 06|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+// Transpose UV (8x8 words), The second step
+mov (8)		SRC_UW(0,0)<1>		CUR_TEMP_W(0,0)<16;4,4>		{ NoDDClr }
+mov (8)		SRC_UW(0,8)<1>		CUR_TEMP_W(0,1)<16;4,4>		{ NoDDChk }
+mov (8)		SRC_UW(1,0)<1>		CUR_TEMP_W(0,2)<16;4,4>		{ NoDDClr }
+mov (8)		SRC_UW(1,8)<1>		CUR_TEMP_W(0,3)<16;4,4>		{ NoDDChk }
+mov (8)		SRC_UW(2,0)<1>		CUR_TEMP_W(2,0)<16;4,4>		{ NoDDClr }
+mov (8)		SRC_UW(2,8)<1>		CUR_TEMP_W(2,1)<16;4,4>		{ NoDDChk }
+mov (8)		SRC_UW(3,0)<1>		CUR_TEMP_W(2,2)<16;4,4>		{ NoDDClr }
+mov (8)		SRC_UW(3,8)<1>		CUR_TEMP_W(2,3)<16;4,4>		{ NoDDChk }
+
+// U and V are now transposed and separated.
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_UV_Right_Most_2x8.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_UV_Right_Most_2x8.asm
@@ -0,0 +1,55 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//	Transpose Cur MB Right Most 2x8 to 8x2
+//  Assume source is LEFT_TEMP_W(0), and detination is LEFT_TEMP_W(1)
+
+//	Input from dport for transpose:	
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+//	Output of transpose:	<1>	<=== <16;8,2>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//	mov (8)	LEFT_TEMP_W(1,0)<1>		LEFT_TEMP_W(0,0)<16;8,2>		{ NoDDClr }
+//	mov (8)	LEFT_TEMP_W(1,8)<1>		LEFT_TEMP_W(0,1)<16;8,2>		{ NoDDChk }
+
+	mov (16)	LEFT_TEMP_W(1,0)<1>		LEFT_TEMP_W(0,0)<1;8,2>
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_Y_16x16.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_Y_16x16.asm
@@ -0,0 +1,104 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////
+//	Module name: Transpose_Y_16x16.asm
+//	
+//	Transpose Y 16x16 block.
+//
+//----------------------------------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region is :ub
+//	SRC_YB:			SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub	// 8 GRFs
+//
+//  Temp buffer:
+//	CUR_TEMP_B:		BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub		// 8 GRFs
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDDA:w
+#endif
+
+
+// Transpose Y (16x16 bytes)
+
+// The first step
+mov (16)	CUR_TEMP_B(0,0)<1>		SRC_YB(0,0)<16;4,1>		{ NoDDClr } 
+mov (16)	CUR_TEMP_B(0,16)<1>		SRC_YB(2,0)<16;4,1>		{ NoDDChk }
+mov (16)	CUR_TEMP_B(1,0)<1>		SRC_YB(4,0)<16;4,1>		{ NoDDClr }
+mov (16)	CUR_TEMP_B(1,16)<1>		SRC_YB(6,0)<16;4,1>		{ NoDDChk }
+
+mov (16)	CUR_TEMP_B(2,0)<1>		SRC_YB(0,4)<16;4,1>		{ NoDDClr }
+mov (16)	CUR_TEMP_B(2,16)<1>		SRC_YB(2,4)<16;4,1>		{ NoDDChk }
+mov (16)	CUR_TEMP_B(3,0)<1>		SRC_YB(4,4)<16;4,1>		{ NoDDClr }
+mov (16)	CUR_TEMP_B(3,16)<1>		SRC_YB(6,4)<16;4,1>		{ NoDDChk }
+
+mov (16)	CUR_TEMP_B(4,0)<1>		SRC_YB(0,8)<16;4,1>		{ NoDDClr }
+mov (16)	CUR_TEMP_B(4,16)<1>		SRC_YB(2,8)<16;4,1>		{ NoDDChk }
+mov (16)	CUR_TEMP_B(5,0)<1>		SRC_YB(4,8)<16;4,1>		{ NoDDClr }
+mov (16)	CUR_TEMP_B(5,16)<1>		SRC_YB(6,8)<16;4,1>		{ NoDDChk }
+
+mov (16)	CUR_TEMP_B(6,0)<1>		SRC_YB(0,12)<16;4,1>	{ NoDDClr }
+mov (16)	CUR_TEMP_B(6,16)<1>		SRC_YB(2,12)<16;4,1>	{ NoDDChk }
+mov (16)	CUR_TEMP_B(7,0)<1>		SRC_YB(4,12)<16;4,1>	{ NoDDClr }
+mov (16)	CUR_TEMP_B(7,16)<1>		SRC_YB(6,12)<16;4,1>	{ NoDDChk }
+
+// The second step
+mov (16)	SRC_YB(0,0)<1>		CUR_TEMP_B(0,0)<32;8,4>		{ NoDDClr }
+mov (16)	SRC_YB(0,16)<1>		CUR_TEMP_B(0,1)<32;8,4>		{ NoDDChk }
+mov (16)	SRC_YB(1,0)<1>		CUR_TEMP_B(0,2)<32;8,4>		{ NoDDClr }
+mov (16)	SRC_YB(1,16)<1>		CUR_TEMP_B(0,3)<32;8,4>		{ NoDDChk }
+
+mov (16)	SRC_YB(2,0)<1>		CUR_TEMP_B(2,0)<32;8,4>		{ NoDDClr }
+mov (16)	SRC_YB(2,16)<1>		CUR_TEMP_B(2,1)<32;8,4>		{ NoDDChk }
+mov (16)	SRC_YB(3,0)<1>		CUR_TEMP_B(2,2)<32;8,4>		{ NoDDClr }
+mov (16)	SRC_YB(3,16)<1>		CUR_TEMP_B(2,3)<32;8,4>		{ NoDDChk }
+
+mov (16)	SRC_YB(4,0)<1>		CUR_TEMP_B(4,0)<32;8,4>		{ NoDDClr }
+mov (16)	SRC_YB(4,16)<1>		CUR_TEMP_B(4,1)<32;8,4>		{ NoDDChk }
+mov (16)	SRC_YB(5,0)<1>		CUR_TEMP_B(4,2)<32;8,4>		{ NoDDClr }
+mov (16)	SRC_YB(5,16)<1>		CUR_TEMP_B(4,3)<32;8,4>		{ NoDDChk }
+
+mov (16)	SRC_YB(6,0)<1>		CUR_TEMP_B(6,0)<32;8,4>		{ NoDDClr }
+mov (16)	SRC_YB(6,16)<1>		CUR_TEMP_B(6,1)<32;8,4>		{ NoDDChk }
+mov (16)	SRC_YB(7,0)<1>		CUR_TEMP_B(6,2)<32;8,4>		{ NoDDClr }
+mov (16)	SRC_YB(7,16)<1>		CUR_TEMP_B(6,3)<32;8,4>		{ NoDDChk }
+
+// Y is transposed.
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_Y_4x16.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_Y_4x16.asm
@@ -0,0 +1,105 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//////////////////////////////////////////////////////////////////////////////////////////
+//	Module name: Transpose_Y_4x16.asm
+//	
+//	Transpose a 4x16 internal planar to 16x4 internal planar block.
+//	The src block is 16x16.  Right moft 4 columns are transposed.
+//
+//----------------------------------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region is :ub
+//	SRC_YB:			SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub	// 8 GRFs
+//
+//  Temp buffer:
+//	BUF_B:			BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub		// 8 GRFs
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDDB:w
+#endif
+
+// Transpose Y (4x16) right most 4 columns
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|3f 3e 3d 3c 3b 3a 39 38 37 36 35 34 33 32 31 30 2f 2e 2d 2c 2b 2a 29 28 27 26 25 24 23 22 21 20|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|5f 5e 5d 5c 5b 5a 59 58 57 56 55 54 53 52 51 50 4f 4e 4d 4c 4b 4a 49 48 47 46 45 44 43 42 41 40|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|7f 7e 7d 7c 7b 7a 79 78 77 76 75 74 73 72 71 70 6f 6e 6d 6c 6b 6a 69 68 67 66 65 64 63 62 61 60|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|9f 9e 9d 9c 9b 9a 99 98 97 96 95 94 93 92 91 90 8f 8e 8d 8c 8b 8a 89 88 87 86 85 84 83 82 81 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|bf be bd bc bb ba b9 b8 b7 b6 b5 b4 b3 b2 b1 b0 af ae ad ac ab aa a9 a8 a7 a6 a5 a4 a3 a2 a1 a0|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|df de dd dc db da d9 d8 d7 d6 d5 d4 d3 d2 d1 d0 cf ce cd cc cb ca c9 c8 c7 c6 c5 c4 c3 c2 c1 c0|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|ff fe fd fc fb fa f9 f8 f7 f6 f5 f4 f3 f2 f1 f0 ef ee ed ec eb ea e9 e8 e7 e6 e5 e4 e3 e2 e1 e0|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+// The first step
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|7f 7e 7d 7c 6f 6e 6d 6c 5f 5e 5d 5c 4f 4e 4d 4c 3f 3e 3d 3c 2f 2e 2d 2c 1f 1e 1d 1c 0f 0e 0d 0c|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|ff fe fd fc ef ee ed ec df de dd dc cf ce cd cc bf be bd bc af ae ad ac 9f 9e 9d 9c 8f 8e 8d 8c|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+// The second step
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|fd ed dd cd bd ad 9d 8d 7d 6d 5d 4d 3d 2d 1d 0d fc ec dc cc bc ac 9c 8c 7c 6c 5c 4c 3c 2c 1c 0c|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|ff ef df cf bf af 9f 8f 7f 6f 5f 4f 3f 2f 1f 0f fe ee de ce be ae 9e 8e 7e 6e 5e 4e 3e 2e 1e 0e|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+
+mov (16)	LEFT_TEMP_B(0,0)<1>		SRC_YB(0,12)<16;4,1>		{ NoDDClr }	
+mov (16)	LEFT_TEMP_B(0,16)<1>	SRC_YB(2,12)<16;4,1>		{ NoDDChk }
+mov (16)	LEFT_TEMP_B(1,0)<1>		SRC_YB(4,12)<16;4,1>		{ NoDDClr }
+mov (16)	LEFT_TEMP_B(1,16)<1>	SRC_YB(6,12)<16;4,1>		{ NoDDChk }
+
+// The second step
+mov (16)	LEFT_TEMP_B(2,0)<1>		LEFT_TEMP_B(0,0)<32;8,4> 		{ NoDDClr }	
+mov (16)	LEFT_TEMP_B(2,16)<1>	LEFT_TEMP_B(0,1)<32;8,4>		{ NoDDChk }
+mov (16)	LEFT_TEMP_B(3,0)<1>		LEFT_TEMP_B(0,2)<32;8,4>		{ NoDDClr }
+mov (16)	LEFT_TEMP_B(3,16)<1>	LEFT_TEMP_B(0,3)<32;8,4>		{ NoDDChk }
+
+// Y is now transposed. the result is in LEFT_TEMP_B(2) and LEFT_TEMP_B(3).
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_Y_Right_Most_4x16.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Cur_Y_Right_Most_4x16.asm
@@ -0,0 +1,61 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//	Transpose cur Y right most 4x16 to 16x4
+//  Assume source is LEFT_TEMP_B(0), and detination is LEFT_TEMP_B(2)
+
+
+//	Input received from dport:
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//	Output of transpose:		<1>	<= <32;8,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+	// Transpose the data, also occupy 2 GRFs
+	mov (16)	LEFT_TEMP_B(2)<1>			LEFT_TEMP_B(0, 0)<32;8,4>		{ NoDDClr }
+	mov (16)	LEFT_TEMP_B(2, 16)<1>		LEFT_TEMP_B(0, 1)<32;8,4>		{ NoDDChk }
+	mov (16)	LEFT_TEMP_B(3)<1>			LEFT_TEMP_B(0, 2)<32;8,4>		{ NoDDClr }
+	mov (16)	LEFT_TEMP_B(3, 16)<1>		LEFT_TEMP_B(0, 3)<32;8,4>		{ NoDDChk }
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Left_UV_2x8.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Left_UV_2x8.asm
@@ -0,0 +1,58 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//	Transpose left MB 2x8 to 8x2
+//  Assume source is LEFT_TEMP_W, and detination is PREV_MB_UW
+
+//	Input from dport for transpose:	
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+//	Output of transpose:	<1>	<=== <16;8,2>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//	mov (8)	PREV_MB_UW(0,0)<1>		BUF_W(0,0)<16;8,2>		{ NoDDClr }
+//	mov (8)	PREV_MB_UW(0,8)<1>		BUF_W(0,1)<16;8,2>		{ NoDDChk }
+	
+//	mov (8)	PREV_MB_UW(0,0)<1>		LEFT_TEMP_W(0,0)<16;8,2>		{ NoDDClr }
+//	mov (8)	PREV_MB_UW(0,8)<1>		LEFT_TEMP_W(0,1)<16;8,2>		{ NoDDChk }
+
+	mov (16)	PREV_MB_UW(0,0)<1>		LEFT_TEMP_W(0,0)<1;8,2>
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Left_Y_4x16.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/Transpose_Left_Y_4x16.asm
@@ -0,0 +1,61 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//	Transpose left MB 4x16 to 16x4
+//  Assume source is LEFT_TEMP_B, and detination is PREV_MB_YB
+
+
+//	Input received from dport:
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//	Output of transpose:		<1>	<= <32;8,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+	// Transpose the data, also occupy 2 GRFs
+	mov (16)	PREV_MB_YB(0)<1>			LEFT_TEMP_B(0, 0)<32;8,4>		{ NoDDClr }
+	mov (16)	PREV_MB_YB(0, 16)<1>		LEFT_TEMP_B(0, 1)<32;8,4>		{ NoDDChk }
+	mov (16)	PREV_MB_YB(1)<1>			LEFT_TEMP_B(0, 2)<32;8,4>		{ NoDDClr }
+	mov (16)	PREV_MB_YB(1, 16)<1>		LEFT_TEMP_B(0, 3)<32;8,4>		{ NoDDChk }
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/loadNV12_16x16T.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/loadNV12_16x16T.asm
@@ -0,0 +1,83 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: loadNV12_16x16T.asm
+//
+// Load and transpose NV12 16x16 block 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_YD:			SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 8 GRFs
+//	SRC_UD:			SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud   (U+V for NV12) 	// 4 GRFs
+//
+//	Source region is :ub.  The same region as :ud region
+//	SRC_YB:			SRC_YB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub		// 8 GRFs
+//	SRC_UB:			SRC_UB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub		// 2 GRFs
+//	SRC_VB:			SRC_VB Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub		// 2 GRFs
+//
+//	Binding table index: 
+//	BI_SRC_Y:		Binding table index of Y surface
+//	BI_SRC_UV:		Binding table index of UV surface (NV12)
+//
+//	Temp buffer:
+//	BUF_B:			BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD1:w
+#endif
+
+	// Read Y
+    mov (2)	MSGSRC.0<1>:ud	ORIX_CUR<2;2,1>:w		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud		// Block width and height (16x16)
+    send (8) SRC_YD(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(8)+DWBRMSGDSC_RC+BI_SRC_Y	// Read 8 GRFs
+
+	// Read U+V
+    asr (1)	MSGSRC.1:ud		MSGSRC.1:ud			1:w						// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2<1>:ud	0x0007000F:ud		// NV12 U+V block width and height (16x8)
+    send (8) SRC_UD(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(4)+DWBRMSGDSC_RC+BI_SRC_UV	// Read 4 GRFs
+
+	#include "TransposeNV12_16x16.asm"
+
+//	#include "Transpose_Y_16x16.asm"	
+//	#include "Transpose_NV12_UV_16x8.asm"	
+		
+// End of loadNV12_16x16T
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/loadNV12_16x4.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/loadNV12_16x4.asm
@@ -0,0 +1,84 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module Name: Loadnv12_16X4.Asm
+//
+// Load Nv12 16X4 Block 
+//
+//----------------------------------------------------------------
+//  Symbols Need To Be Defined Before Including This Module
+//
+//	Source Region In :Ud
+//	Src_Yd:			Src_Yd Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud			// 3 Grfs (2 For Y, 1 For U+V)
+//
+//	Source Region Is :Ub.  The Same Region As :Ud Region
+//	Src_Yb:			Src_Yb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub		// 2 Grfs
+//	Src_Ub:			Src_Ub Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub		// 0.5 Grf
+//	Src_Vb:			Src_Vb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub		// 0.5 Grf
+//
+//	Binding Table Index: 
+//	Bi_Src_Y:		Binding Table Index Of Y Surface
+//	Bi_Src_UV:		Binding Table Index Of UV Surface (Nv12)
+//
+//	Temp Buffer:
+//	Buf_D:			Buf_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud
+//	Buf_B:			Buf_B Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD2:w
+#endif
+
+	// Read Y
+    mov (2)	MSGSRC.0<1>:ud	ORIX<2;2,1>:w		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x0003000F:ud		// Block width and height (16x4)
+    send (8) PREV_MB_YD(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(2)+DWBRMSGDSC_RC+BI_SRC_Y	// Read 2 GRFs
+
+	// Read U+V
+    asr (1)	MSGSRC.1:ud		MSGSRC.1:ud			1:w						// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2<1>:ud	0x0001000F:ud		// NV12 U+V block width and height (16x2)
+
+	// Load NV12 U+V tp a temp buf  
+	send (8) BUF_D(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(1)+DWBRMSGDSC_RC+BI_SRC_UV	// Read 1 GRF
+
+	// Convert NV12 U+V to internal planar U and V and place them right after Y.
+//	mov (16)	SRC_UB(0,0)<1>		BUF_B(0,0)<32;16,2>
+//	mov (16)	SRC_VB(0,0)<1>		BUF_B(0,1)<32;16,2>	
+	
+// End of loadNV12_16x4.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_UV_8x8T.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_UV_8x8T.asm
@@ -0,0 +1,95 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_Cur_UV_8x8T.asm
+//
+// Load and transpose UV 8x8 block (NV12: 8x8U and 8x8V mixed)
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_UD:			SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud   (U+V for NV12) 	// 4 GRFs
+//
+//	Binding table index: 
+//	BI_SRC_UV:		Binding table index of UV surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD1:w
+#endif
+
+	// Read U+V blk
+#if defined(_PROGRESSIVE) 
+    mov (1)	MSGSRC.0:ud		ORIX_CUR:w				{ NoDDClr } 		// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_CUR:w		1:w		{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0007000F:ud			{ NoDDChk }			// NV12 U+V block width and height (16x8 bytes)
+
+    //send (8) SRC_UD(0)<1>	MSGHDRU		MSGSRC<8;8,1>:ud	DWBRMSGDSC_SC+0x00040000+BI_SRC_UV
+    mov (1)	MSGDSC	RESP_LEN(4)+DWBRMSGDSC_SC+BI_SRC_UV:ud	
+#endif
+
+#if defined(_FIELD)
+//    cmp.z.f0.0 (1)  NULLREGW 	PicTypeC:w  	0:w						// Get pic type flag
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+	// they are used later in this file
+
+    mov (1)	MSGSRC.0:ud		ORIX_CUR:w				{ NoDDClr } 		// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_CUR:w		1:w		{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0007000F:ud			{ NoDDChk }			// NV12 U+V block width and height (16x8 bytes)
+
+    // Set message descriptor
+
+    // Frame picture
+//    (f0.0) mov (1)	MSGDSC	RESP_LEN(4)+DWBRMSGDSC_SC+BI_SRC_UV:ud			// Read 4 GRFs from SRC_UV
+//	(f0.0) jmpi		load_UV_8x8T
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(4)+DWBRMSGDSC_SC_BF+BI_SRC_UV:ud  // Read 4 GRFs from SRC_UV bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(4)+DWBRMSGDSC_SC_TF+BI_SRC_UV:ud  // Read 4 GRFs from SRC_UV top field
+
+//load_UV_8x8T:
+
+#endif
+
+    send (8) SRC_UD(0)<1>	MSGHDRU		MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC
+
+//	#include "Transpose_Cur_UV_8x8.asm"
+
+// End of load_UV_8x8T
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_UV_8x8T_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_UV_8x8T_Mbaff.asm
@@ -0,0 +1,92 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_Cur_UV_8x8T.asm
+//
+// Load and transpose UV 8x8 block (NV12: 8x8U and 8x8V mixed)
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_UD:			SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud   (U+V for NV12) 	// 4 GRFs
+//
+//	Binding table index: 
+//	BI_SRC_UV:		Binding table index of UV surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD1:w
+#endif
+    // FieldModeCurrentMbFlag determines how to access left MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 	BitFields:w  	BotFieldFlag:w					// Get bottom field flag
+
+	// Read U+V
+    mov (1)	MSGSRC.0:ud		ORIX_CUR:w						{ NoDDClr } 		// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_CUR:w			1:w			{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0007000F:ud					{ NoDDChk }			// NV12 U+V block width and height (16x8 bytes)
+
+    // Set message descriptor
+
+	(f0.0)	if	(1)		ILDB_LABEL(ELSE_UV_8X8T)
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(4)+DWBRMSGDSC_SC+BI_SRC_UV:ud			// Read 4 GRFs from SRC_UV
+
+	(f0.1) add (1)	MSGSRC.1:d	MSGSRC.1:d		8:w		// Add vertical offset 8 for bot MB in MBAFF mode
+    
+ILDB_LABEL(ELSE_UV_8X8T): 
+	else 	(1)		ILDB_LABEL(ENDIF_UV_8X8T)
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(4)+DWBRMSGDSC_SC_BF+BI_SRC_UV:ud  // Read 4 GRFs from SRC_UV bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(4)+DWBRMSGDSC_SC_TF+BI_SRC_UV:ud  // Read 4 GRFs from SRC_UV top field
+
+	asr (1)	MSGSRC.1:d		MSGSRC.1:d		1:w					// Reduce y by half in field access mode
+
+	endif
+ILDB_LABEL(ENDIF_UV_8X8T):
+
+    send (8) SRC_UD(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC
+
+//	#include "Transpose_Cur_UV_8x8.asm"
+
+// End of load_UV_8x8T
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_UV_Right_Most_2x8.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_UV_Right_Most_2x8.asm
@@ -0,0 +1,91 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module Name: Load_Cur_UV_Right_Most_2X8.Asm
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD0:w
+#endif
+
+#if defined(_PROGRESSIVE) 
+	// Read U+V, (UV MB size = 16x8)
+    add (1)	MSGSRC.0:ud		ORIX_CUR:w			12:w			{ NoDDClr }		// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_CUR:w			1:w				{ NoDDClr, NoDDChk }		// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x00070003:ud						{ NoDDChk }		// NV12 U+V block width and height (4x8)
+	send (8) LEFT_TEMP_D(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV	
+#endif
+
+#if defined(_FIELD) || defined(_MBAFF)
+
+    // FieldModeCurrentMbFlag determines how to access left MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 		BitFields:w  	BotFieldFlag:w				// Get bottom field flag
+
+	// Read U+V
+    add (1)	MSGSRC.0:ud		ORIX_CUR:w			12:w				{ NoDDClr }		// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_CUR:w			1:w				{ NoDDClr, NoDDChk }		// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x00070003:ud						{ NoDDChk }		// NV12 U+V block width and height (4x8)
+
+	// Load NV12 U+V 
+	
+    // Set message descriptor
+
+	(f0.0)	if	(1)		ILDB_LABEL(ELSE_Y_2x8T)
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud			// Read 1 GRF from SRC_UV
+
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		8:w		// Add vertical offset 8 for bot MB in MBAFF mode
+
+ILDB_LABEL(ELSE_Y_2x8T): 
+	else 	(1)		ILDB_LABEL(ENDIF_Y_2x8T)
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field
+
+	endif
+ILDB_LABEL(ENDIF_Y_2x8T):
+
+	// Read 1 GRF from DEST surface as the above MB has been deblocked.
+//	send (8) BUF_D(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	MSGDSC	
+	send (8) LEFT_TEMP_D(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC	
+
+#endif
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_Y_16x16T.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_Y_16x16T.asm
@@ -0,0 +1,93 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_Y_16x16T.asm
+//
+// Load and transpose Y 16x16 block 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_YD:			SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 8 GRFs
+//
+//	Binding table index: 
+//	BI_SRC_Y:		Binding table index of Y surface
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD1:w
+#endif
+	// Read Y
+	
+#if defined(_PROGRESSIVE) 
+    mov (2)	MSGSRC.0<1>:ud	ORIX_CUR<2;2,1>:w		{ NoDDClr }			// Block origin
+	mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud			{ NoDDChk }			// Block width and height (16x16)
+
+    //send (8) SRC_YD(0)<1>	MSGHDRC		MSGSRC<8;8,1>:ud	DWBRMSGDSC_SMPLR+0x00080000+BI_SRC_Y
+	mov (1)	MSGDSC	RESP_LEN(8)+DWBRMSGDSC_SC+BI_SRC_Y:ud    	
+#endif
+
+    
+#if defined(_FIELD)
+//    cmp.z.f0.0 (1)  NULLREGW 	PicTypeC:w  	0:w						// Get pic type flag
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+	// they are used later in this file
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_CUR<2;2,1>:w		{ NoDDClr }			// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud			{ NoDDChk }			// Block width and height (16x16)
+    
+    // Set message descriptor
+    // Frame picture
+//	(f0.0) mov (1)	MSGDSC	RESP_LEN(8)+DWBRMSGDSC_SC+BI_SRC_Y:ud			// Read 8 GRFs from SRC_Y
+//	(f0.0) jmpi		load_Y_16x16T
+
+	// Non frame picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(8)+DWBRMSGDSC_SC_BF+BI_SRC_Y:ud  // Read 8 GRFs from SRC_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(8)+DWBRMSGDSC_SC_TF+BI_SRC_Y:ud  // Read 8 GRFs from SRC_Y top field
+
+//load_Y_16x16T:
+
+#endif
+
+    send (8) SRC_YD(0)<1>	MSGHDRC		MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC
+    	
+//	#include "Transpose_Cur_Y_16x16.asm"
+
+// End of load_Y_16x16T
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_Y_16x16T_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_Y_16x16T_Mbaff.asm
@@ -0,0 +1,92 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_Y_16x16T.asm
+//
+// Load and transpose Y 16x16 block 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_YD:			SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 8 GRFs
+//
+//	Binding table index: 
+//	BI_SRC_Y:		Binding table index of Y surface
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD1:w
+#endif
+
+    // FieldModeCurrentMbFlag determines how to access left MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+	
+    and.nz.f0.1 (1)	NULLREGW 	BitFields:w  	BotFieldFlag:w		// Get bottom field flag
+
+	// Read Y
+    mov (2)	MSGSRC.0<1>:d	ORIX_CUR<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud			{ NoDDChk }		// Block width and height (16x16)
+    
+    // Set message descriptor, etc.
+    
+	(f0.0)	if	(1)		ILDB_LABEL(ELSE_Y_16x16T)
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(8)+DWBRMSGDSC_SC+BI_SRC_Y:ud			// Read 8 GRFs from SRC_Y
+    
+	(f0.1) add (1)	MSGSRC.1:d	MSGSRC.1:d		16:w		// Add vertical offset 16 for bot MB in MBAFF mode
+    
+ILDB_LABEL(ELSE_Y_16x16T): 
+	else 	(1)		ILDB_LABEL(ENDIF_Y_16x16T)
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(8)+DWBRMSGDSC_SC_BF+BI_SRC_Y:ud  // Read 8 GRFs from SRC_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(8)+DWBRMSGDSC_SC_TF+BI_SRC_Y:ud  // Read 8 GRFs from SRC_Y top field
+
+	asr (1)	MSGSRC.1:d		MSGSRC.1:d		1:w					// Reduce y by half in field access mode
+
+	endif
+ILDB_LABEL(ENDIF_Y_16x16T):
+
+    send (8) SRC_YD(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC
+
+//	#include "Transpose_Cur_Y_16x16.asm"
+
+// End of load_Y_16x16T
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_Y_Right_Most_4x16.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Cur_Y_Right_Most_4x16.asm
@@ -0,0 +1,115 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_Cur_Y_Right_Most_4x16.asm
+//
+// Load luma cur MB right most 4x16 into LEFT_TEMP_B
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD0:w
+#endif
+
+
+#if defined(_PROGRESSIVE) 
+	// Read Y 
+    add (1)	MSGSRC.0<1>:ud	ORIX_CUR:w		12:w	{ NoDDClr }				// Block origin, move right 12 bytes
+    mov (1)	MSGSRC.1<1>:ud	ORIY_CUR:w				{ NoDDClr, NoDDChk }	// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F0003:ud			{ NoDDChk }				// Block width and height (4x16)
+    send (8) LEFT_TEMP_D(0)<1>	MSGHDRL		MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y	
+#endif
+
+
+#if defined(_FIELD) || defined(_MBAFF)
+
+    // FieldModeCurrentMbFlag determines how to access left MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 		BitFields:w  	BotFieldFlag:w	// Get bottom field flag
+
+	// Read Y
+    add (1)	MSGSRC.0<1>:ud	ORIX_CUR:w		12:w	{ NoDDClr }				// Block origin, move right 12 bytes
+    mov (1)	MSGSRC.1<1>:ud	ORIY_CUR:w				{ NoDDClr, NoDDChk }	// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F0003:ud			{ NoDDChk }				// Block width and height (4x16)
+    
+    // Set message descriptor, etc.
+    
+	(f0.0)	if	(1)		ILDB_LABEL(ELSE_Y_4x16T)
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud			// Read 2 GRFs from DEST_Y
+    
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		16:w		// Add vertical offset 16 for bot MB in MBAFF mode
+    
+ILDB_LABEL(ELSE_Y_4x16T): 
+	else 	(1)		ILDB_LABEL(ENDIF_Y_4x16T)
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y top field
+
+	endif
+ILDB_LABEL(ENDIF_Y_4x16T):
+
+//    send (8) BUF_D(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	MSGDSC
+    send (8) LEFT_TEMP_D(0)<1>	MSGHDRL		MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC
+#endif
+
+//	Transpose 4x16 to 16x4
+
+//	Input received from dport:
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//	Output of transpose:		<1>	<= <32;8,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+/*
+	// Transpose the data, also occupy 2 GRFs
+	mov (16)	PREV_MB_YB(0)<1>			BUF_B(0, 0)<32;8,4>		{ NoDDClr }
+	mov (16)	PREV_MB_YB(0, 16)<1>		BUF_B(0, 1)<32;8,4>		{ NoDDChk }
+	mov (16)	PREV_MB_YB(1)<1>			BUF_B(0, 2)<32;8,4>		{ NoDDClr }
+	mov (16)	PREV_MB_YB(1, 16)<1>		BUF_B(0, 3)<32;8,4>		{ NoDDChk }
+*/
+// End of load_Y_4x16T
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Left_UV_2x8T.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Left_UV_2x8T.asm
@@ -0,0 +1,106 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module Name: Load_Left_UV_2X8T.Asm
+//
+// Load UV 8X2 Block 
+//
+//----------------------------------------------------------------
+//  Symbols ceed To be defined before including this module
+//
+//	Source Region Is :UB
+//	BUF_D:			BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD
+
+//	Binding Table Index: 
+//	BI_SRC_UV:		Binding Table Index Of UV Surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD0:w
+#endif
+
+#if defined(_PROGRESSIVE) 
+	// Read U+V
+    mov (1)	MSGSRC.0:ud		ORIX_LEFT:w							{ NoDDClr }		// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_LEFT:w			1:w				{ NoDDClr, NoDDChk }		// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x00070003:ud						{ NoDDChk }		// NV12 U+V block width and height (4x8)
+
+	send (8) LEFT_TEMP_D(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV	
+#endif
+
+#if defined(_FIELD) || defined(_MBAFF)
+
+    // FieldModeCurrentMbFlag determines how to access left MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 		BitFields:w  	BotFieldFlag:w				// Get bottom field flag
+
+	// Read U+V
+    mov (1)	MSGSRC.0:ud		ORIX_LEFT:w							{ NoDDClr }		// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_LEFT:w			1:w				{ NoDDClr, NoDDChk }		// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x00070003:ud						{ NoDDChk }		// NV12 U+V block width and height (4x8)
+
+	// Load NV12 U+V 
+	
+    // Set message descriptor
+
+	(f0.0)	if	(1)		ILDB_LABEL(ELSE_Y_2x8T)
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud			// Read 1 GRF from SRC_UV
+
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		8:w		// Add vertical offset 8 for bot MB in MBAFF mode
+
+ILDB_LABEL(ELSE_Y_2x8T): 
+	else 	(1)		ILDB_LABEL(ENDIF_Y_2x8T)
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field
+
+	endif
+ILDB_LABEL(ENDIF_Y_2x8T):
+
+	// Read 1 GRF from DEST surface as the above MB has been deblocked.
+//	send (8) BUF_D(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	MSGDSC	
+	send (8) LEFT_TEMP_D(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC	
+
+#endif
+
+// End of load_Left_UV_2x8T.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Left_UV_2x8T_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Left_UV_2x8T_Mbaff.asm
@@ -0,0 +1,109 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module Name: Load_Left_UV_2X8T.Asm
+//
+// Load UV 8X2 Block 
+//
+//----------------------------------------------------------------
+//  Symbols ceed To be defined before including this module
+//
+//	Source Region Is :UB
+//	BUF_D:			BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD
+
+//	Binding Table Index: 
+//	BI_SRC_UV:		Binding Table Index Of UV Surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD0:w
+#endif
+
+    // FieldModeCurrentMbFlag determines how to access left MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 		BitFields:w  	BotFieldFlag:w				// Get bottom field flag
+
+	// Read U+V
+    mov (1)	MSGSRC.0:ud		ORIX_LEFT:w							{ NoDDClr }		// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_LEFT:w			1:w				{ NoDDClr, NoDDChk }		// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x00070003:ud						{ NoDDChk }		// NV12 U+V block width and height (4x8)
+
+	// Load NV12 U+V 
+	
+    // Set message descriptor
+
+	(f0.0)	if	(1)		ILDB_LABEL(ELSE_Y_2x8T)
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud			// Read 1 GRF from SRC_UV
+
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		8:w		// Add vertical offset 8 for bot MB in MBAFF mode
+
+ILDB_LABEL(ELSE_Y_2x8T): 
+	else 	(1)		ILDB_LABEL(ENDIF_Y_2x8T)
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field
+
+	asr (1)	MSGSRC.1:d		MSGSRC.1:d		1:w					// Reduce y by half in field access mode
+
+	endif
+ILDB_LABEL(ENDIF_Y_2x8T):
+
+	// Read 1 GRF from DEST surface as the above MB has been deblocked.
+//	send (8) BUF_D(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	MSGDSC	
+	send (8) LEFT_TEMP_D(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC	
+
+
+//	Input from dport for transpose:	
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+//	Output of transpose:	<1>	<=== <16;8,2>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+/*
+	mov (8)	PREV_MB_UW(0,0)<1>		BUF_W(0,0)<16;8,2>		{ NoDDClr }
+	mov (8)	PREV_MB_UW(0,8)<1>		BUF_W(0,1)<16;8,2>		{ NoDDChk }
+*/
+// End of load_Left_UV_2x8T.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Left_Y_4x16T.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Left_Y_4x16T.asm
@@ -0,0 +1,126 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_Y_4x16T.asm
+//
+// Load luma left MB 4x16 and transpose 4x16 to 16x4.
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	PREV_MB_YD:		PREV_MB_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 2 GRFs
+//
+//	Binding table index: 
+//	BI_SRC_Y:		Binding table index of Y surface
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD0:w
+#endif
+
+
+#if defined(_PROGRESSIVE) 
+	// Read Y
+    mov (2)	MSGSRC.0<1>:ud	ORIX_LEFT<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F0003:ud			{ NoDDChk }		// Block width and height (4x16)
+    
+//    mov (1)	MSGDSC	DWBRMSGDSC_RC+0x00020000+BI_DEST_Y:ud			// Read 2 GRFs from DEST_Y
+    send (8) LEFT_TEMP_D(0)<1>	MSGHDRL		MSGSRC<8;8,1>:ud	DAPREAD	RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y	
+#endif
+
+
+#if defined(_FIELD) || defined(_MBAFF)
+
+    // FieldModeCurrentMbFlag determines how to access left MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 		BitFields:w  	BotFieldFlag:w	// Get bottom field flag
+
+	// Read Y
+    mov (2)	MSGSRC.0<1>:ud	ORIX_LEFT<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F0003:ud			{ NoDDChk }		// Block width and height (4x16)
+    
+    // Set message descriptor, etc.
+    
+	(f0.0)	if	(1)		ILDB_LABEL(ELSE_Y_4x16T)
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud			// Read 2 GRFs from DEST_Y
+    
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		16:w		// Add vertical offset 16 for bot MB in MBAFF mode
+    
+ILDB_LABEL(ELSE_Y_4x16T): 
+	else 	(1)		ILDB_LABEL(ENDIF_Y_4x16T)
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y top field
+
+	endif
+ILDB_LABEL(ENDIF_Y_4x16T):
+
+//    send (8) BUF_D(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	MSGDSC
+    send (8) LEFT_TEMP_D(0)<1>	MSGHDRL		MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC
+#endif
+
+//	Transpose 4x16 to 16x4
+
+//	Input received from dport:
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//	Output of transpose:		<1>	<= <32;8,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+/*
+	// Transpose the data, also occupy 2 GRFs
+	mov (16)	PREV_MB_YB(0)<1>			BUF_B(0, 0)<32;8,4>		{ NoDDClr }
+	mov (16)	PREV_MB_YB(0, 16)<1>		BUF_B(0, 1)<32;8,4>		{ NoDDChk }
+	mov (16)	PREV_MB_YB(1)<1>			BUF_B(0, 2)<32;8,4>		{ NoDDClr }
+	mov (16)	PREV_MB_YB(1, 16)<1>		BUF_B(0, 3)<32;8,4>		{ NoDDChk }
+*/
+// End of load_Y_4x16T
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Left_Y_4x16T_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Left_Y_4x16T_Mbaff.asm
@@ -0,0 +1,114 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: load_Y_4x16T.asm
+//
+// Load luma left MB 4x16 and transpose 4x16 to 16x4.
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	PREV_MB_YD:		PREV_MB_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 2 GRFs
+//
+//	Binding table index: 
+//	BI_SRC_Y:		Binding table index of Y surface
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD0:w
+#endif
+
+    // FieldModeCurrentMbFlag determines how to access left MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 		BitFields:w  	BotFieldFlag:w	// Get bottom field flag
+
+	// Read Y
+    mov (2)	MSGSRC.0<1>:ud	ORIX_LEFT<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F0003:ud			{ NoDDChk }		// Block width and height (4x16)
+    
+    // Set message descriptor, etc.
+    
+	(f0.0)	if	(1)		ELSE_Y_4x16T
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud			// Read 2 GRFs from DEST_Y
+    
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		16:w		// Add vertical offset 16 for bot MB in MBAFF mode
+    
+ELSE_Y_4x16T: 
+	else 	(1)		ENDIF_Y_4x16T
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from DEST_Y top field
+
+	asr (1)	MSGSRC.1:d		MSGSRC.1:d		1:w					// Reduce y by half in field access mode
+
+	endif
+ENDIF_Y_4x16T:
+
+//    send (8) BUF_D(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	MSGDSC
+    send (8) LEFT_TEMP_D(0)<1>	MSGHDRL		MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC
+
+
+//	Transpose 4x16 to 16x4
+
+//	Input received from dport:
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+//	Output of transpose:		<1>	<= <32;8,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+/*
+	// Transpose the data, also occupy 2 GRFs
+	mov (16)	PREV_MB_YB(0)<1>			BUF_B(0, 0)<32;8,4>		{ NoDDClr }
+	mov (16)	PREV_MB_YB(0, 16)<1>		BUF_B(0, 1)<32;8,4>		{ NoDDChk }
+	mov (16)	PREV_MB_YB(1)<1>			BUF_B(0, 2)<32;8,4>		{ NoDDClr }
+	mov (16)	PREV_MB_YB(1, 16)<1>		BUF_B(0, 3)<32;8,4>		{ NoDDChk }
+*/
+// End of load_Y_4x16T
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Top_UV_8x2.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Top_UV_8x2.asm
@@ -0,0 +1,100 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module Name: Load_Top_UV_8X2.Asm
+//
+// Load UV 8X2 Block 
+//
+//----------------------------------------------------------------
+//  Symbols ceed To be defined before including this module
+//
+//	Source Region Is :UB
+//	BUF_D:			BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD
+
+//	Binding Table Index: 
+//	BI_SRC_UV:		Binding Table Index Of UV Surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD2:w
+#endif
+
+#if defined(_PROGRESSIVE) 
+	// Read U+V
+    mov (1)	MSGSRC.0:ud		ORIX_TOP:w						{ NoDDClr }			// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_TOP:w			1:w			{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0001000F:ud					{ NoDDChk }			// NV12 U+V block width and height (16x2)
+
+	// Read 1 GRF from DEST surface as the above MB has been deblocked.
+	//send (8) TOP_MB_UD(0)<1>	MSGHDRU		MSGSRC<8;8,1>:ud	RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV	
+	mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud	
+#endif
+
+#if defined(_FIELD)
+
+//    cmp.z.f0.0 (1)  NULLREGW PicTypeC:w  	0:w							// Get pic type flag
+    and.nz.f0.1 (1)  NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+	// They are used later in this file
+
+	// Read U+V
+    mov (1)	MSGSRC.0:ud		ORIX_TOP:w						{ NoDDClr }			// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_TOP:w			1:w			{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0001000F:ud					{ NoDDChk }			// NV12 U+V block width and height (16x2)
+
+	// Load NV12 U+V 
+	
+    // Set message descriptor
+    // Frame picture
+//    (f0.0) mov (1)	MSGDSC	DWBRMSGDSC_RC+0x00010000+BI_DEST_UV:ud			// Read 1 GRF from SRC_UV
+//	(f0.0) jmpi		Load_Top_UV_8x2
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field
+
+//Load_Top_UV_8x2:
+
+	// Read 1 GRF from DEST surface as the above MB has been deblocked.
+//	send (8) PREV_MB_UD(0)<1>	MSGHDRU		MSGSRC<8;8,1>:ud	MSGDSC	
+
+#endif
+
+	send (8) TOP_MB_UD(0)<1>	MSGHDRU		MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC	
+		
+// End of load_Top_UV_8x2.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Top_UV_8x2_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Top_UV_8x2_Mbaff.asm
@@ -0,0 +1,109 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module Name: Load_Top_UV_8X2.Asm
+//
+// Load UV 8X2 Block 
+//
+//----------------------------------------------------------------
+//  Symbols ceed To be defined before including this module
+//
+//	Source Region Is :UB
+//	BUF_D:			BUF_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=UD
+
+//	Binding Table Index: 
+//	BI_SRC_UV:		Binding Table Index Of UV Surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD2:w
+#endif
+
+    // FieldModeCurrentMbFlag determines how to access above MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w   	BotFieldFlag:w
+
+	// Read U+V
+    mov (1)	MSGSRC.0:ud		ORIX_TOP:w						{ NoDDClr }			// Block origin
+    asr (1)	MSGSRC.1:d		ORIY_TOP:w			1:w			{ NoDDClr, NoDDChk } 	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0001000F:ud					{ NoDDChk }			// NV12 U+V block width and height (16x2)
+
+	// Load NV12 U+V 
+	
+    // Set message descriptor
+    
+	(f0.0)	if	(1)		ELSE_UV_8X2
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC+BI_DEST_UV:ud			// Read 1 GRF from SRC_UV
+
+	// Add vertical offset 8 for bot MB in MBAFF mode
+	(f0.1) add (1)	MSGSRC.1:d	MSGSRC.1:d		8:w		
+    
+	// Dual field mode setup
+	and.z.f0.1 (1) NULLREGW		DualFieldMode:w		1:w
+	(f0.1) jmpi NOT_DUAL_FIELD_UV
+
+    add (1)	MSGSRC.1:d		MSGSRC.1:d		-2:w			{ NoDDClr }			// Load 4 lines in stead of 2
+	mov (1)	MSGSRC.2:ud		0x0003000F:ud					{ NoDDChk }			// New block width and height (16x8)
+
+	add (1) MSGDSC			MSGDSC			RESP_LEN(1):ud	// 1 more GRF to receive
+
+NOT_DUAL_FIELD_UV:    
+    
+ELSE_UV_8X2: 
+	else 	(1)		ENDIF_UV_8X2
+
+	// Field picture
+	asr (1)	MSGSRC.1:d		ORIY_CUR:w		2:w			// asr 1: NV12 U+V block origin y = half of Y comp
+														// asr 1: Reduce y by half in field access mode
+	
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_BF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(1)+DWBRMSGDSC_RC_TF+BI_DEST_UV:ud  // Read 1 GRF from SRC_Y top field
+
+	add (1) MSGSRC.1:d		MSGSRC.1:d		-2:w				// for last 2 rows of above MB
+
+	endif
+ENDIF_UV_8X2:
+
+	// Read 1 GRF from DEST surface as the above MB has been deblocked.
+	send (8) PREV_MB_UD(0)<1>	MSGHDRU	MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC	
+
+// End of load_Top_UV_8x2.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Top_Y_16x4.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Top_Y_16x4.asm
@@ -0,0 +1,100 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module Name: Load_Y_16X4.asm
+//
+// Load Y 16X4 Block to PREV_MB_YD
+//
+//----------------------------------------------------------------
+//  Symbols Need To Be Defined Before Including This Module
+//
+//	Source Region In :Ud
+//	Src_YD:			Src_Yd Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud			// 3 Grfs (2 For Y, 1 For U+V)
+//
+//	Source Region Is :Ub.  The Same Region As :Ud Region
+//	Src_YB:			Src_Yb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub		// 2 Grfs
+//
+//	Binding Table Index: 
+//	Bi_Src_Y:		Binding Table Index Of Y Surface
+//
+//	Temp Buffer:
+//	Buf_D:			Buf_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud
+//	Buf_B:			Buf_B Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD2:w
+#endif
+
+#if defined(_PROGRESSIVE) 
+	// Read Y
+    mov (2)	MSGSRC.0<1>:ud	ORIX_TOP<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x0003000F:ud			{ NoDDChk }		// Block width and height (16x4)
+
+    mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud			// Read 2 GRFs from SRC_Y
+#endif
+
+#if defined(_FIELD)
+
+//    cmp.z.f0.0 (1)  NULLREGW 	PicTypeC:w  	0:w						// Get pic type flag
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+	// they are used later in this file
+	
+    mov (2)	MSGSRC.0<1>:ud	ORIX_TOP<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x0003000F:ud			{ NoDDChk }		// Block width and height (16x4)
+   
+    // Set message descriptor
+
+    // Frame picture
+//	(f0.0) mov (1)	MSGDSC	DWBRMSGDSC_RC+0x00020000+BI_DEST_Y:ud			// Read 2 GRFs from SRC_Y
+//	(f0.0) jmpi		load_Y_16x4
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from SRC_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from SRC_Y top field
+
+//load_Y_16x4:
+    // Read 2 GRFs from DEST surface, as the above MB has been deblocked
+//    send (8) PREV_MB_YD(0)<1>	MSGHDRY		MSGSRC<8;8,1>:ud	MSGDSC
+    
+#endif
+    
+    send (8) TOP_MB_YD(0)<1>	MSGHDRT		MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC
+    	    
+// End of load_Y_16x4.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Top_Y_16x4_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/load_Top_Y_16x4_Mbaff.asm
@@ -0,0 +1,111 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module Name: Load_Y_16X4.asm
+//
+// Load Y 16X4 Block to PREV_MB_YD
+//
+//----------------------------------------------------------------
+//  Symbols Need To Be Defined Before Including This Module
+//
+//	Source Region In :Ud
+//	Src_YD:			Src_Yd Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud			// 3 Grfs (2 For Y, 1 For U+V)
+//
+//	Source Region Is :Ub.  The Same Region As :Ud Region
+//	Src_YB:			Src_Yb Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub		// 2 Grfs
+//
+//	Binding Table Index: 
+//	Bi_Src_Y:		Binding Table Index Of Y Surface
+//
+//	Temp Buffer:
+//	Buf_D:			Buf_D Base=Rxx Elementsize=4 Srcregion=Region(8,1) Type=Ud
+//	Buf_B:			Buf_B Base=Rxx Elementsize=1 Srcregion=Region(16,1) Type=Ub
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD2:w
+#endif
+    // FieldModeCurrentMbFlag determines how to access above MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w   BotFieldFlag:w
+
+	// Read Y
+    mov (2)	MSGSRC.0<1>:ud	ORIX_TOP<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x0003000F:ud			{ NoDDChk }		// Block width and height (16x4)
+   
+    // Set message descriptor
+
+	(f0.0)	if	(1)		ELSE_Y_16x4
+
+    // Frame picture
+    mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC+BI_DEST_Y:ud			// Read 2 GRFs from SRC_Y
+
+	// Add vertical offset 16 for bot MB in MBAFF mode
+	(f0.1) add (1)	MSGSRC.1:d	MSGSRC.1:d		16:w		
+	
+	// Dual field mode setup
+	and.z.f0.1 (1) NULLREGW		DualFieldMode:w		1:w
+	(f0.1) jmpi NOT_DUAL_FIELD
+
+    add (1)	MSGSRC.1:d		MSGSRC.1:d		-4:w	{ NoDDClr }		// Load 8 lines in above MB
+	mov (1)	MSGSRC.2:ud		0x0007000F:ud			{ NoDDChk }		// New block width and height (16x8)
+	
+	add (1) MSGDSC			MSGDSC			RESP_LEN(2):ud	// 2 more GRF to receive
+
+NOT_DUAL_FIELD:
+
+ELSE_Y_16x4: 
+	else 	(1)		ENDIF_Y_16x4
+
+	asr (1)	MSGSRC.1:d		ORIY_CUR:w		1:w		// Reduce y by half in field access mode
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_BF+BI_DEST_Y:ud  // Read 2 GRFs from SRC_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	RESP_LEN(2)+DWBRMSGDSC_RC_TF+BI_DEST_Y:ud  // Read 2 GRFs from SRC_Y top field
+
+	add (1)	MSGSRC.1:d		MSGSRC.1:d		-4:w	// for last 4 rows of above MB
+
+	endif
+ENDIF_Y_16x4:
+        
+    // Read 2 GRFs from DEST surface, as the above MB has been deblocked
+    send (8) PREV_MB_YD(0)<1>	MSGHDRY	MSGSRC<8;8,1>:ud	DAPREAD	MSGDSC
+
+// End of load_Y_16x4.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/saveNV12_16x16.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/saveNV12_16x16.asm
@@ -0,0 +1,83 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: saveNV12_16x16.asm
+//
+// Save a NV12 16x16 block 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_YD:			SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 8 GRFs
+//	SRC_UD:			SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 4 GRF
+//
+//	Binding table index: 
+//	BI_DEST_Y:		Binding table index of Y surface
+//	BI_DEST_UV:		Binding table index of UV surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD4:w
+#endif
+
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_CUR<2;2,1>:w		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud		// Block width and height (16x16)
+
+	// Pack Y    
+	mov	(16)	MSGPAYLOADD(0)<1>		SRC_YD(0)		// Compressed inst
+	mov (16)	MSGPAYLOADD(2)<1>		SRC_YD(2)
+	mov (16)	MSGPAYLOADD(4)<1>		SRC_YD(4)
+	mov (16)	MSGPAYLOADD(6)<1>		SRC_YD(6)
+    
+    send (8)	NULLREG	MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSG_LEN(8)+DWBWMSGDSC+BI_DEST_Y		// Write 8 GRFs
+
+
+
+    asr (1)	MSGSRC.1:ud		MSGSRC.1:ud			1:w						// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2<1>:ud	0x0007000F:ud								// NV12 U+V block width and height (16x8)
+
+	mov (16)	MSGPAYLOADD(0)<1>		SRC_UD(0)		// Compressed inst
+	mov (16)	MSGPAYLOADD(2)<1>		SRC_UD(2)
+
+    send (8)	NULLREG	MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV		// Write 4 GRFs
+
+
+// End of saveNV12_16x16.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/saveNV12_16x4.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/saveNV12_16x4.asm
@@ -0,0 +1,80 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: saveNV12_16x4.asm
+//
+// Save a NV12 16x4 block 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_YD:			SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 2 GRFs
+//	SRC_UD:			SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 1 GRF
+//
+//	Binding table index: 
+//	BI_DEST_Y:		Binding table index of Y surface
+//	BI_DEST_UV:		Binding table index of UV surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD5:w
+#endif
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_TOP<2;2,1>:w							// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x0003000F:ud								// Block width and height (16x4)
+
+	// Pack Y    
+	mov	(16)	MSGPAYLOADD(0)<1>		SRC_YD(0)						// Compressed inst
+    
+    send (8)	NULLREG	MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y		// Write 2 GRFs
+
+
+    asr (1)	MSGSRC.1:ud		MSGSRC.1:ud			1:w						// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2<1>:ud	0x0001000F:ud								// NV12 U+V block width and height (16x2)
+
+	// Pack U and V
+//	mov (16)	MSGPAYLOADB(0,0)<2>		SRC_UB(0,0)
+//	mov (16)	MSGPAYLOADB(0,1)<2>		SRC_VB(0,0)
+	
+	mov (8)	MSGPAYLOADD(0,0)<1>		SRC_UD(0)	
+	
+    send (8)	NULLREG	MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV	// Write 1 GRF
+
+// End of saveNV12_16x4.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/saveNV12_16x4T.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/saveNV12_16x4T.asm
@@ -0,0 +1,143 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: saveNV12_16x4T.asm
+//
+// Transpose 16x4 to 4x16 YNV12 data and write to memory 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Left MB region:
+//	PREV_MB_YB:	  	Base=rxx 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+//	PREV_MB_UW: 	Base=ryy 	ElementSize=2 SrcRegion=REGION(8,1) Type=uw
+
+//	Binding table index: 
+//	BI_SRC_Y:		Binding table index of Y surface
+//	BI_SRC_UV:		Binding table index of UV surface (NV12)
+//
+//	Temp buffer:
+//	BUF_B:			BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+//	BUF_W:			BUF_W Base=rxx ElementSize=1 SrcRegion=REGION(8,1) Type=uw
+//
+//
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD6:w
+#endif
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_LEFT<2;2,1>:w		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F0003:ud			// 4x16
+    
+// Transpose Y, save them to MRFs
+
+//	16x4 Y src in GRF (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+//  First step		(16)	<1>	<=== <16;4,1>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+	// The first step
+	mov (16)	BUF_B(0,0)<1>			PREV_MB_YB(0,0)<16;4,1>
+	mov (16)	BUF_B(0,16)<1>			PREV_MB_YB(0,4)<16;4,1>
+	mov (16)	BUF_B(1,0)<1>			PREV_MB_YB(0,8)<16;4,1>
+	mov (16)	BUF_B(1,16)<1>			PREV_MB_YB(0,12)<16;4,1>
+
+//
+//  Second step		(16)	<1>	<=== <1;4,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+	// The second step
+//	mov	(16)	MSGPAYLOADB(0,0)<1>		BUF_B(0,0)<32;8,4> 			// Read 2 rows, write 1 row
+//	mov (16)	MSGPAYLOADB(0,16)<1>	BUF_B(0,1)<32;8,4>
+//	mov (16)	MSGPAYLOADB(1,0)<1>		BUF_B(0,2)<32;8,4>
+//	mov (16)	MSGPAYLOADB(1,16)<1>	BUF_B(0,3)<32;8,4>
+
+	mov	(16)	MSGPAYLOADB(0,0)<1>		BUF_B(0,0)<1;4,4>
+	mov (16)	MSGPAYLOADB(0,16)<1>	BUF_B(0,16)<1;4,4>
+	mov (16)	MSGPAYLOADB(1,0)<1>		BUF_B(1,0)<1;4,4>
+	mov (16)	MSGPAYLOADB(1,16)<1>	BUF_B(1,16)<1;4,4>
+
+//  Transposed Y in 4x16 is ready for writting to dataport.
+//
+    send (8)	NULLREG	MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y				// Write 2 GRFs
+
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+
+	// Transpose U/V, save them to MRFs in NV12 format
+    asr (1)	MSGSRC.1:ud		MSGSRC.1:ud			1:w						// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2<1>:ud	0x00070003:ud								// NV12 U+V block width and height (4x8)
+
+
+//	16x2 UV src in GRF (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+//	First step		(8)		<1>	<=== <8;4,1>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 70 70 60 60 50 50 40 40 31 31 21 21 11 11 01 01 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+	mov (8)	BUF_W(0,0)<1>		PREV_MB_UW(0,0)<8;4,1>
+	mov (8)	BUF_W(0,8)<1>		PREV_MB_UW(0,4)<8;4,1>
+
+//	Second step		(8)		<1>	<=== <1;2,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+	mov (8)	MSGPAYLOADW(0,0)<1>		BUF_W(0,0)<1;2,4>
+	mov (8)	MSGPAYLOADW(0,8)<1>		BUF_W(0,8)<1;2,4>
+
+//  Transposed U+V in NV12 in 4x8 is ready for writting to dataport.
+ 
+    send (8)	NULLREG	MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV		// Write 1 GRF
+    
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Cur_UV_8x8.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Cur_UV_8x8.asm
@@ -0,0 +1,83 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Cur_UV_8x8.asm
+//
+// Save UV 8x8 block (8x8U + 8x8V in NV12)
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_UD:			SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 4 GRF
+//
+//	Binding table index: 
+//	BI_DEST_UV:		Binding table index of UV surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD4:w
+#endif
+
+#if defined(_FIELD)
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+#endif
+
+    mov (1)	MSGSRC.0:ud		ORIX_CUR:w					{ NoDDClr } 	// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_CUR:w			1:w		{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0007000F:ud				{ NoDDChk }		// NV12 U+V block width and height (16x8)
+
+	mov (16)	MSGPAYLOADD(0)<1>		SRC_UD(0) 		// Compressed inst
+	mov (16)	MSGPAYLOADD(2)<1>		SRC_UD(2) 
+	
+#if defined(_PROGRESSIVE) 
+	mov (1)		MSGDSC	MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV:ud
+//    send (8)	NULLREG		MSGHDR		MSGSRC<8;8,1>:ud	DWBWMSGDSC+0x00400000+BI_DEST_UV
+#endif
+
+#if defined(_FIELD)
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 4 GRFs to DEST_UV bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 4 GRFs to DEST_UV top field
+
+#endif
+
+    send (8)	null:ud		MSGHDR		MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+
+// End of save_Cur_UV_8x8.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Cur_UV_8x8_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Cur_UV_8x8_Mbaff.asm
@@ -0,0 +1,92 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Cur_UV_8x8.asm
+//
+// Save UV 8x8 block (8x8U + 8x8V in NV12)
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_UD:			SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 4 GRF
+//
+//	Binding table index: 
+//	BI_DEST_UV:		Binding table index of UV surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD4:w
+#endif
+
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 	BitFields:w  	BotFieldFlag:w
+
+    mov (1)	MSGSRC.0:ud		ORIX_CUR:w					{ NoDDClr } 	// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_CUR:w			1:w		{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0007000F:ud				{ NoDDChk }		// NV12 U+V block width and height (16x8)
+
+	mov (16)	MSGPAYLOADD(0)<1>		SRC_UD(0)		// Compressed inst
+	mov (16)	MSGPAYLOADD(2)<1>		SRC_UD(2)
+
+    // Set message descriptor
+    
+	(f0.0)	if	(1)		ELSE_UV_8X8
+    
+    // Frame picture
+    mov (1)	MSGDSC	MSG_LEN(4)+DWBWMSGDSC+BI_DEST_UV:ud			// Write 4 GRFs to DEST_UV
+
+	(f0.1) add (1)	MSGSRC.1:d	MSGSRC.1:d		8:w		// Add vertical offset 8 for bot MB in MBAFF mode
+
+ELSE_UV_8X8: 
+	else 	(1)		ENDIF_UV_8X8
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 4 GRFs to DEST_UV bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(4)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 4 GRFs to DEST_UV top field
+
+	asr (1)	MSGSRC.1:d		MSGSRC.1:d		1:w					// Reduce y by half in field access mode
+
+	endif
+ENDIF_UV_8X8:
+    
+    send (8)	null:ud		MSGHDR		MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+
+// End of save_Cur_UV_8x8.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Cur_Y_16x16.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Cur_Y_16x16.asm
@@ -0,0 +1,86 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Cur_Y_16x16.asm
+//
+// Save a Y 16x16 block 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_YD:			SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 8 GRFs
+//
+//	Binding table index: 
+//	BI_DEST_Y:		Binding table index of Y surface
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD4:w
+#endif
+
+#if defined(_FIELD)
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+#endif
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_CUR<2;2,1>:w	{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud		{ NoDDChk }		// Block width and height (16x16)
+
+	// Pack Y    
+	mov	(16)	MSGPAYLOADD(0)<1>		SRC_YD(0)		// Compressed inst
+	mov (16)	MSGPAYLOADD(2)<1>		SRC_YD(2)       
+	mov (16)	MSGPAYLOADD(4)<1>		SRC_YD(4)       
+	mov (16)	MSGPAYLOADD(6)<1>		SRC_YD(6)       
+    
+
+#if defined(_PROGRESSIVE) 
+	mov (1)	MSGDSC	MSG_LEN(8)+DWBWMSGDSC+BI_DEST_Y:ud	
+//    send (8)	NULLREG		MSGHDR		MSGSRC<8;8,1>:ud	DWBWMSGDSC+0x00800000+BI_DEST_Y
+#endif
+
+#if defined(_FIELD)
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 8 GRFs to DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 8 GRFs to DEST_Y top field
+
+#endif
+
+    send (8)	null:ud		MSGHDR		MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+    	
+// End of save_Cur_Y_16x16.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Cur_Y_16x16_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Cur_Y_16x16_Mbaff.asm
@@ -0,0 +1,94 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Cur_Y_16x16.asm
+//
+// Save a Y 16x16 block 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_YD:			SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 8 GRFs
+//
+//	Binding table index: 
+//	BI_DEST_Y:		Binding table index of Y surface
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD4:w
+#endif
+
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 		BitFields:w  	BotFieldFlag:w
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_CUR<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F000F:ud			{ NoDDChk }		// Block width and height (16x16 or 12x16)
+
+	// Pack Y    
+	mov	(16)	MSGPAYLOADD(0)<1>		SRC_YD(0)		// Compressed inst
+	mov (16)	MSGPAYLOADD(2)<1>		SRC_YD(2)
+	mov (16)	MSGPAYLOADD(4)<1>		SRC_YD(4)
+	mov (16)	MSGPAYLOADD(6)<1>		SRC_YD(6)
+    
+    // Set message descriptor
+
+	(f0.0)	if	(1)		ELSE_Y_16x16
+    
+    // Frame picture
+    mov (1)	MSGDSC	MSG_LEN(8)+DWBWMSGDSC+BI_DEST_Y:ud			// Write 8 GRFs to DEST_Y
+
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		16:w		// Add vertical offset 16 for bot MB in MBAFF mode
+
+ELSE_Y_16x16: 
+	else 	(1)		ENDIF_Y_16x16
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 8 GRFs to DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(8)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 8 GRFs to DEST_Y top field
+
+	asr (1)	MSGSRC.1:d		MSGSRC.1:d		1:w					// Reduce y by half in field access mode
+
+	endif
+ENDIF_Y_16x16:
+    
+    send (8)	null:ud		MSGHDR		MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+
+// End of save_Cur_Y_16x16.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Left_UV_8x2T.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Left_UV_8x2T.asm
@@ -0,0 +1,102 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Left_UV_8x2T.asm
+//
+// Transpose 8x2 to 2x8 UV data and write to memory 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Left MB region:
+//	PREV_MB_UW: 	Base=ryy 	ElementSize=2 SrcRegion=REGION(8,1) Type=uw
+
+//	Binding table index: 
+//	BI_SRC_UV:		Binding table index of UV surface (NV12)
+//
+//	Temp buffer:
+//	BUF_W:			BUF_W Base=rxx ElementSize=1 SrcRegion=REGION(8,1) Type=uw
+//
+//
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD6:w
+#endif
+
+#if defined(_FIELD)
+    and.nz.f0.1 (1)  NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+#endif
+
+	// Transpose U/V, save them to MRFs in NV12 format
+    mov (1)	MSGSRC.0:ud		ORIX_LEFT:w						{ NoDDClr }			// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_LEFT:w			1:w			{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x00070003:ud					{ NoDDChk }			// NV12 U+V block width and height (4x8)
+
+
+//	16x2 UV src in GRF (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+//	First step		(8)		<1>	<=== <8;4,1>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 70 70 60 60 50 50 40 40 31 31 21 21 11 11 01 01 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+	mov (8)	LEFT_TEMP_W(0,0)<1>		PREV_MB_UW(0,0)<8;4,1>		{ NoDDClr }
+	mov (8)	LEFT_TEMP_W(0,8)<1>		PREV_MB_UW(0,4)<8;4,1>		{ NoDDChk }
+
+//	Second step		(8)		<1>	<=== <1;2,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+	mov (8)	MSGPAYLOADW(0,0)<1>		LEFT_TEMP_W(0,0)<1;2,4>
+	mov (8)	MSGPAYLOADW(0,8)<1>		LEFT_TEMP_W(0,8)<1;2,4>
+
+//  Transposed U+V in NV12 in 4x8 is ready for writting to dataport.
+ 
+#if defined(_PROGRESSIVE) 
+	mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV:ud
+//    send (8)	NULLREG		MSGHDR		MSGSRC<8;8,1>:ud	DWBWMSGDSC+0x00100000+BI_DEST_UV
+#endif
+
+#if defined(_FIELD)
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 1 GRF to DEST_UV bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 1 GRF to DEST_UV top field
+
+#endif
+    send (8)	null:ud		MSGHDR		MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Left_UV_8x2T_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Left_UV_8x2T_Mbaff.asm
@@ -0,0 +1,112 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Left_UV_8x2T.asm
+//
+// Transpose 8x2 to 2x8 UV data and write to memory 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Left MB region:
+//	PREV_MB_UW: 	Base=ryy 	ElementSize=2 SrcRegion=REGION(8,1) Type=uw
+
+//	Binding table index: 
+//	BI_SRC_UV:		Binding table index of UV surface (NV12)
+//
+//	Temp buffer:
+//	BUF_W:			BUF_W Base=rxx ElementSize=1 SrcRegion=REGION(8,1) Type=uw
+//
+//
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD6:w
+#endif
+
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+   
+    and.nz.f0.1 (1)	NULLREGW 		BitFields:w  	BotFieldFlag:w
+
+	// Transpose U/V, save them to MRFs in NV12 format
+    mov (1)	MSGSRC.0:ud		ORIX_LEFT:w					{ NoDDClr } 			// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_LEFT:w			1:w		{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x00070003:ud				{ NoDDChk }				// NV12 U+V block width and height (4x8)
+
+
+//	16x2 UV src in GRF (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 31 31 21 21 11 11 01 01 70 70 60 60 50 50 40 40 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+//	First step		(8)		<1>	<=== <8;4,1>:w
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 61 61 51 51 41 41 70 70 60 60 50 50 40 40 31 31 21 21 11 11 01 01 30 30 20 20 10 10 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+	mov (8)	BUF_W(0,0)<1>		PREV_MB_UW(0,0)<8;4,1>	{ NoDDClr }
+	mov (8)	BUF_W(0,8)<1>		PREV_MB_UW(0,4)<8;4,1>	{ NoDDChk }
+
+//	Second step		(8)		<1>	<=== <1;2,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|71 71 70 70 61 61 60 60 51 51 50 50 41 41 40 40 31 31 30 30 21 21 20 20 11 11 10 10 01 01 00 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+	mov (8)	MSGPAYLOADW(0,0)<1>		BUF_W(0,0)<1;2,4>
+	mov (8)	MSGPAYLOADW(0,8)<1>		BUF_W(0,8)<1;2,4>
+
+//  Transposed U+V in NV12 in 4x8 is ready for writting to dataport.
+ 
+    // Set message descriptor
+    
+	(f0.0)	if	(1)		ELSE_UV_8X2T
+    
+    // Frame picture
+    mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC+BI_DEST_UV:ud			// Write 1 GRF to DEST_UV
+
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		8:w		// Add vertical offset 8 for bot MB in MBAFF mode
+
+ELSE_UV_8X2T: 
+	else 	(1)		ENDIF_UV_8X2T
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 1 GRF to DEST_UV bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 1 GRF to DEST_UV top field
+
+	asr (1)	MSGSRC.1:d		MSGSRC.1:d		1:w					// Reduce y by half in field access mode
+
+	endif
+ENDIF_UV_8X2T:
+
+    send (8)	null:ud		MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Left_Y_16x4T.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Left_Y_16x4T.asm
@@ -0,0 +1,119 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Left_Y_16x4T.asm
+//
+// Transpose 16x4 to 4x16 Y data and write to memory 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Left MB region:
+//	PREV_MB_YB:	  	Base=rxx 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+
+//	Binding table index: 
+//	BI_SRC_Y:		Binding table index of Y surface
+//
+//	Temp buffer:
+//	BUF_B:			BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+//
+//
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD6:w
+#endif
+
+#if defined(_FIELD)
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+#endif
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_LEFT<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F0003:ud			{ NoDDChk }		// 4x16
+    
+// Transpose Y, save them to MRFs
+
+//	16x4 Y src in GRF (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+//  First step		(16)	<1>	<=== <16;4,1>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+	// The first step
+	mov (16)	LEFT_TEMP_B(0,0)<1>			PREV_MB_YB(0,0)<16;4,1>		{ NoDDClr }	
+	mov (16)	LEFT_TEMP_B(0,16)<1>		PREV_MB_YB(0,4)<16;4,1>		{ NoDDChk }
+	mov (16)	LEFT_TEMP_B(1,0)<1>			PREV_MB_YB(0,8)<16;4,1>		{ NoDDClr }
+	mov (16)	LEFT_TEMP_B(1,16)<1>		PREV_MB_YB(0,12)<16;4,1>	{ NoDDChk }
+
+//
+//  Second step		(16)	<1>	<=== <1;4,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+	// The second step
+	mov	(16)	MSGPAYLOADB(0,0)<1>		LEFT_TEMP_B(0,0)<1;4,4>
+	mov (16)	MSGPAYLOADB(0,16)<1>	LEFT_TEMP_B(0,16)<1;4,4>
+	mov (16)	MSGPAYLOADB(1,0)<1>		LEFT_TEMP_B(1,0)<1;4,4>
+	mov (16)	MSGPAYLOADB(1,16)<1>	LEFT_TEMP_B(1,16)<1;4,4>
+
+//  Transposed Y in 4x16 is ready for writting to dataport.
+
+
+#if defined(_PROGRESSIVE) 
+	mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y:ud
+//    send (8)	NULLREG		MSGHDR		MSGSRC<8;8,1>:ud	DWBWMSGDSC+0x00200000+BI_DEST_Y
+#endif
+
+#if defined(_FIELD)
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y top field
+
+#endif
+
+    send (8)	null:ud		MSGHDR		MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Left_Y_16x4T_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Left_Y_16x4T_Mbaff.asm
@@ -0,0 +1,131 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Left_Y_16x4T.asm
+//
+// Transpose 16x4 to 4x16 Y data and write to memory 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Left MB region:
+//	PREV_MB_YB:	  	Base=rxx 	ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+
+//	Binding table index: 
+//	BI_SRC_Y:		Binding table index of Y surface
+//
+//	Temp buffer:
+//	BUF_B:			BUF_B Base=rxx ElementSize=1 SrcRegion=REGION(16,1) Type=ub
+//
+//
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD6:w
+#endif
+
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    and.nz.f0.1 (1)	NULLREGW 		BitFields:w  	BotFieldFlag:w
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_LEFT<2;2,1>:w		{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x000F0003:ud			{ NoDDChk }		// 4x16
+    
+// Transpose Y, save them to MRFs
+
+//	16x4 Y src in GRF (each pix is specified as yx)
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+//  First step		(16)	<1>	<=== <16;4,1>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+
+	// The first step
+	mov (16)	BUF_B(0,0)<1>			PREV_MB_YB(0,0)<16;4,1>		{ NoDDClr }
+	mov (16)	BUF_B(0,16)<1>			PREV_MB_YB(0,4)<16;4,1>		{ NoDDChk }
+	mov (16)	BUF_B(1,0)<1>			PREV_MB_YB(0,8)<16;4,1>		{ NoDDClr }
+	mov (16)	BUF_B(1,16)<1>			PREV_MB_YB(0,12)<16;4,1>	{ NoDDChk }	
+
+//
+//  Second step		(16)	<1>	<=== <1;4,4>
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//	|f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80|
+//	+-----------------------+-----------------------+-----------------------+-----------------------+
+//
+	// The second step
+	mov	(16)	MSGPAYLOADB(0,0)<1>		BUF_B(0,0)<1;4,4>
+	mov (16)	MSGPAYLOADB(0,16)<1>	BUF_B(0,16)<1;4,4>
+	mov (16)	MSGPAYLOADB(1,0)<1>		BUF_B(1,0)<1;4,4>
+	mov (16)	MSGPAYLOADB(1,16)<1>	BUF_B(1,16)<1;4,4>
+
+//  Transposed Y in 4x16 is ready for writting to dataport.
+
+	//***** Left MB is loaded the same as indicated by FieldModeCurrentMbFlag.
+
+    // Set message descriptor
+ 
+	(f0.0)	if	(1)		ELSE_Y_16x4T
+    
+    // Frame picture
+    mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC+BI_DEST_Y:ud			// Write 2 GRFs to DEST_Y
+
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		16:w		// Add vertical offset 16 for bot MB in MBAFF mode
+
+ELSE_Y_16x4T: 
+	else 	(1)		ENDIF_Y_16x4T
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y top field
+
+	asr (1)	MSGSRC.1:d		MSGSRC.1:d		1:w					// Reduce y by half in field access mode
+
+	endif
+ENDIF_Y_16x4T:
+
+    send (8)	null:ud		MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+
+
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Top_UV_8x2.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Top_UV_8x2.asm
@@ -0,0 +1,82 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Top_UV_8x2.asm
+//
+// Save UV 8x2 block (8x2U + 8x2V in NV12)
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_UD:			SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 1 GRF
+//
+//	Binding table index: 
+//	BI_DEST_UV:		Binding table index of UV surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD5:w
+#endif
+	
+#if defined(_FIELD)
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+#endif
+
+    mov (1)	MSGSRC.0:ud		ORIX_TOP:w					{ NoDDClr }				// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_TOP:w			1:w		{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0001000F:ud				{ NoDDChk }				// NV12 U+V block width and height (16x2)
+
+	mov (8)	MSGPAYLOADD(0,0)<1>		TOP_MB_UD(0)	
+	
+
+#if defined(_PROGRESSIVE) 
+	mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC_WC+BI_DEST_UV:ud
+//    send (8)	NULLREG		MSGHDR		MSGSRC<8;8,1>:ud	DWBWMSGDSC+0x00100000+BI_DEST_UV
+#endif
+
+#if defined(_FIELD)
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 1 GRF to DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 1 GRF to DEST_Y top field
+
+#endif
+
+    send (8)	WritebackResponse(0)<1>		MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+// End of save_Top_UV_8x2.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Top_UV_8x2_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Top_UV_8x2_Mbaff.asm
@@ -0,0 +1,99 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Top_UV_8x2.asm
+//
+// Save UV 8x2 block (8x2U + 8x2V in NV12)
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_UD:			SRC_UD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 1 GRF
+//
+//	Binding table index: 
+//	BI_DEST_UV:		Binding table index of UV surface (NV12)
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD5:w
+#endif
+	and.z.f0.1 (8) NULLREGW		DualFieldMode<0;1,0>:w		1:w
+
+    // FieldModeCurrentMbFlag determines how to access above MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+    
+	// Pack U and V
+    mov (1)	MSGSRC.0:ud		ORIX_TOP:w					{ NoDDClr } 			// Block origin
+    asr (1)	MSGSRC.1:ud		ORIY_TOP:w			1:w		{ NoDDClr, NoDDChk }	// NV12 U+V block origin y = half of Y comp
+    mov (1)	MSGSRC.2:ud		0x0001000F:ud				{ NoDDChk }				// NV12 U+V block width and height (16x2)
+
+	// Dual field mode
+	(f0.1) mov (8)	MSGPAYLOADD(0)<1>		PREV_MB_UD(0)
+    (-f0.1) mov (8) MSGPAYLOADD(0)<1>		PREV_MB_UD(1)	// for dual field mode, write last 2 rows
+	
+    // Set message descriptor
+
+    and.nz.f0.1 (1) NULLREGW 		BitFields:w   BotFieldFlag:w
+    
+	(f0.0)	if	(1)		ELSE_UV_8X2_SAVE
+
+    // Frame picture
+    mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC_WC+BI_DEST_UV:ud			// Write 1 GRFs to DEST_UV
+
+	// Add vertical offset 8 for bot MB in MBAFF mode
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		8:w		
+
+ELSE_UV_8X2_SAVE: 
+	else 	(1)		ENDIF_UV_8X2_SAVE
+
+	asr (1)	MSGSRC.1:d		ORIY_CUR:w		2:w			// asr 1: NV12 U+V block origin y = half of Y comp
+														// asr 1: Reduce y by half in field access mode
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_UV:ud  // Write 1 GRF to DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(1)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_UV:ud  // Write 1 GRF to DEST_Y top field
+
+	add (1)	MSGSRC.1:d		MSGSRC.1:d		-2:w		// for last 4 rows of above MB
+
+	endif
+ENDIF_UV_8X2_SAVE:
+
+    send (8)	WritebackResponse(0)<1>		MSGHDR		MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+
+// End of save_Top_UV_8x2.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Top_Y_16x4.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Top_Y_16x4.asm
@@ -0,0 +1,82 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Top_Y_16x4.asm
+//
+// Save a Y 16x4 block 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_YD:			SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 2 GRFs
+//
+//	Binding table index: 
+//	BI_DEST_Y:		Binding table index of Y surface
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD5:w
+#endif
+
+#if defined(_FIELD)
+    and.nz.f0.1 (1) NULLREGW 	BitFields:w  	BotFieldFlag:w			// Get bottom field flag
+#endif
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_TOP<2;2,1>:w			{ NoDDClr }		// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x0003000F:ud				{ NoDDChk }		// Block width and height (16x4)
+
+	// Pack Y    
+	mov	(16)	MSGPAYLOADD(0)<1>		TOP_MB_YD(0)					// Compressed inst
+    
+
+#if defined(_PROGRESSIVE) 
+	mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC_WC+BI_DEST_Y:ud
+//    send (8)	NULLREG		MSGHDR		MSGSRC<8;8,1>:ud	DWBWMSGDSC+0x00200000+BI_DEST_Y
+#endif
+
+#if defined(_FIELD)
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y top field
+
+#endif
+
+    send (8)	WritebackResponse(0)<1>		MSGHDR	MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+// End of save_Top_Y_16x4.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Top_Y_16x4_Mbaff.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/save_Top_Y_16x4_Mbaff.asm
@@ -0,0 +1,99 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: save_Top_Y_16x4.asm
+//
+// Save a Y 16x4 block 
+//
+//----------------------------------------------------------------
+//  Symbols need to be defined before including this module
+//
+//	Source region in :ud
+//	SRC_YD:			SRC_YD Base=rxx ElementSize=4 SrcRegion=REGION(8,1) Type=ud			// 2 GRFs
+//
+//	Binding table index: 
+//	BI_DEST_Y:		Binding table index of Y surface
+//
+//----------------------------------------------------------------
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0xDDD5:w
+#endif
+
+	and.z.f0.1 (16) NULLREGW		DualFieldMode<0;1,0>:w		1:w
+
+    // FieldModeCurrentMbFlag determines how to access above MB
+	and.z.f0.0 (1) 	null:w		r[ECM_AddrReg, BitFlags]:ub		FieldModeCurrentMbFlag:w		
+
+    mov (2)	MSGSRC.0<1>:ud	ORIX_TOP<2;2,1>:w		{ NoDDClr }			// Block origin
+    mov (1)	MSGSRC.2<1>:ud	0x0003000F:ud			{ NoDDChk }			// Block width and height (16x4)
+
+	// Pack Y
+	// Dual field mode
+	(f0.1) mov	(16) MSGPAYLOADD(0)<1>		PREV_MB_YD(0)				// Compressed inst
+    (-f0.1)  mov (16) MSGPAYLOADD(0)<1>		PREV_MB_YD(2)				// for dual field mode, write last 4 rows
+    
+    // Set message descriptor
+
+    and.nz.f0.1 (1) NULLREGW 		BitFields:w   BotFieldFlag:w
+
+	(f0.0)	if	(1)		ELSE_Y_16x4_SAVE
+    
+    // Frame picture
+    mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC_WC+BI_DEST_Y:ud			// Write 2 GRFs to DEST_Y
+
+	// Add vertical offset 16 for bot MB in MBAFF mode
+	(f0.1) add (1)	MSGSRC.1:d		MSGSRC.1:d		16:w		
+
+ELSE_Y_16x4_SAVE: 
+	else 	(1)		ENDIF_Y_16x4_SAVE
+
+	asr (1)	MSGSRC.1:d		ORIY_CUR:w		1:w					// Reduce y by half in field access mode
+
+	// Field picture
+    (f0.1) mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCBF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y bottom field
+    (-f0.1) mov (1)	MSGDSC	MSG_LEN(2)+DWBWMSGDSC_WC+ENMSGDSCTF+BI_DEST_Y:ud  // Write 2 GRFs to DEST_Y top field
+
+	add (1)	MSGSRC.1:d		MSGSRC.1:d		-4:w	// for last 4 rows of above MB
+
+	endif
+ENDIF_Y_16x4_SAVE:
+    
+    send (8)	WritebackResponse(0)<1>		MSGHDR		MSGSRC<8;8,1>:ud	DAPWRITE	MSGDSC
+
+// End of save_Top_Y_16x4.asm
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/writeURB.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/writeURB.asm
@@ -0,0 +1,68 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: WriteURB.asm
+//
+// General purpose module to write data to URB using the URB handle/offset in r0
+//
+//----------------------------------------------------------------
+//	Assume:
+//	- a0.0 and a0.1 is meg desc, has been assign with URB offset and msg size
+//	- MRFs are alrady assigned with data.
+//----------------------------------------------------------------
+//
+//  16x16 byte pixel block can be saved using just 1 "send" instruction.
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignature:w			0x3535:w
+#endif
+
+// URB write header:
+//mov (8) MSGSRC.0:ud 	r0.0<8;8,1>:ud			// Copy parent R0 header
+
+//shr (1)	Temp2_W:uw	URBOffset:uw	1:w	// divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.
+//add (1) MSGSRC.0:uw		r0.0:uw		Temp2_W:uw	
+
+
+shr (1)	MSGSRC.0:uw		URBOffset:uw	1:w	// divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.
+//mov (1)	MSGSRC.0:uw		URBOffset_2:uw
+
+//mov (1) MSGSRC.1:ud 	0:ud					// Reset Handle 1
+
+send  null:uw 	m0	  MSGSRC<8;8,1>:uw		URBWRITE	URBWriteMsgDesc:ud // URB write
+//send  null:ud 	MRF0	 null:ud	URBWriteMsgDesc:ud		// URB write	
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/writeURB_UV_Child.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/writeURB_UV_Child.asm
@@ -0,0 +1,69 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: WriteURB_Child.asm
+//
+// General purpose module to write data to URB using the URB handle/offset in r0
+//
+//----------------------------------------------------------------
+//	Assume:
+//	- a0.0 and a0.1 is meg desc, has been assign with URB offset and msg size
+//	- MRFs are alrady assigned with data.
+//----------------------------------------------------------------
+//
+//  16x16 byte pixel block can be saved using just 1 "send" instruction.
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0x3535:w
+#endif
+
+// URB write header:
+//mov (8) MSGSRC.0:ud 	r0.0<8;8,1>:ud			// Copy parent R0 header
+
+//shr (1)	Temp2_W:uw	URBOffsetC:uw	1:w	// divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.
+//add (1) MSGSRC.0:uw		r0.0:uw		Temp2_W:uw	
+
+shr (1)	MSGSRC.0:uw		URBOffsetC:uw	1:w	// divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.
+
+//mov (1) MSGSRC.1:ud 	0:ud					// Reset Handle 1
+
+	// URB write 1 MRFs, 
+	// Current MB offset is in URBOffset, use it as write origin
+	// Add 2 to offset to store data be be passed to the right MB
+
+send  null:uw 	m0	  MSGSRC<8;8,1>:uw		URBWRITE	MSG_LEN(1)+URBWMSGDSC+0x20 // URB write
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/writeURB_Y_Child.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/ildb/writeURB_Y_Child.asm
@@ -0,0 +1,70 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Module name: WriteURB_Child.asm
+//
+// General purpose module to write data to URB using the URB handle/offset in r0
+//
+//----------------------------------------------------------------
+//	Assume:
+//	- a0.0 and a0.1 is meg desc, has been assign with URB offset and msg size
+//	- MRFs are alrady assigned with data.
+//----------------------------------------------------------------
+//
+//  16x16 byte pixel block can be saved using just 1 "send" instruction.
+
+#if defined(_DEBUG) 
+	mov		(1)		EntrySignatureC:w			0x3535:w
+#endif
+
+// URB write header:
+//mov (8) MSGSRC.0:ud 	r0.0<8;8,1>:ud			// Copy parent R0 header
+
+//shr (1)	Temp2_W:uw	URBOffsetC:uw	1:w	// divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.
+//add (1) MSGSRC.0:uw		r0.0:uw		Temp2_W:uw	
+
+shr (1)	MSGSRC.0:uw		URBOffsetC:uw	1:w	// divide by 2, because URB entry is counted by 512bits.  Offset is counted by 256bits.
+
+//mov (1) MSGSRC.1:ud 	0:ud					// Reset Handle 1
+
+	// URB write 2 MRFs, 
+	// Current MB offset is in URBOffset, use it as write origin
+	// Add 2 to offset to store data be be passed to the right MB
+	//mov (1) URBWriteMsgDesc:ud		0x06300020:ud
+
+send  null:uw 	m0	  MSGSRC<8;8,1>:uw		URBWRITE	MSG_LEN(2)+URBWMSGDSC+0x20 // URB write
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AVCMCInter.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AVCMCInter.asm
@@ -0,0 +1,284 @@
+/*
+ * All inter-prediction macroblock kernels 
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+// Kernel name: AVCMCInter.asm
+
+#ifdef INTERLABEL
+#undef INTERLABEL
+#endif
+
+#if defined(MBAFF)
+//				< MBaff_Motion >
+#define INTERLABEL(x)	x##_##MBF
+#elif defined(FIELD)
+//				< FieldMB_Motion >
+#define INTERLABEL(x)	x##_##FLD
+#else // FRAME
+//				< FrameMB_Motion >
+#define INTERLABEL(x)	x##_##FRM
+#endif
+//
+// Decoding an inter-prediction macroblock (conditional compile)
+//	-DMBAFF : MBAff picture MB
+//	-DFRAME : Frame picture MB
+//	-DFIELD : Field picture MB
+//	-DMBAFF -DMONO : MBAff mono picture MB
+//	-DFRAME -DMONO : Frame mono picture MB
+//	-DFIELD -DMONO : Field mono picture MB
+
+
+//#if !defined(__AVCMCInter__)		// Make sure this is only included once
+//#define __AVCMCInter__
+
+
+// TODO: header files need to be in sync with intra prediction
+#include "header.inc"
+#include "inter_Header.inc"
+
+// TODO: Kernel names for mono cases
+#if defined(MBAFF)
+.kernel MBAff_Motion
+MBAFF_MB:
+#elif defined(FIELD)
+.kernel FieldMB_Motion
+FIELD_MB:
+#else // Frame
+.kernel FrameMB_Motion
+FRAME_MB:
+#endif
+
+#ifdef _DEBUG
+// WA for FULSIM so we'll know which kernel is being debugged
+#if defined(MBAFF)
+mov (1) acc0:ud 0x0aaa55a5:ud
+#elif defined(FIELD)
+mov (1) acc0:ud 0x0baa55a5:ud
+#else // Frame
+mov (1) acc0:ud 0x0caa55a5:ud
+#endif
+#endif
+
+
+#ifdef SW_SCOREBOARD
+    CALL(scoreboard_start_inter,1)
+#endif
+
+	mov (8)		gMSGSRC<1>:ud		r0.0<8;8,1>:ud		// Initialize message header payload with R0
+	
+	and (1)		gwMBTYPE<1>			gMBTYPE:ub						nMBTYPE_MASK:w		// MB type
+	shl (2)		gX<1>:w				gORIX<2;2,1>:ub					4:w // Convert MB origin to pixel unit
+	
+//	#include "process_inter16x16.asm"					// Handle B_L0_16x16 case with zero MVs and weighted pred off.
+	// In the case of B_L0_16x16 with zero MVs and weighted pred off, the kernel jumps to INTERLABEL(EXIT_LOOP).
+	
+INTERLABEL(INIT_MBPARA):
+	#include "initialize_MBPara.asm"
+	
+	
+    //========================= BEGIN - LOOP_SUBMB ===========================
+	mov (1)		gLOOP_SUBMB:uw		0:uw				// 0, 2, 4, 6
+INTERLABEL(LOOP_SUBMB):
+
+	//========================== BEGIN - LOOP_DIR ============================
+	// Prediction flag (gPREDFLAG - 0:Pred_L0, 1:Pred_L1, 2:BiPred)
+	asr (1)		gPREDFLAG:w			gSUBMB_MODE:ub					gLOOP_SUBMB:uw
+	mov (1)		gLOOP_DIR:uw		1:uw				// 1, 0
+	and (1)		gPREDFLAG:w			gPREDFLAG:w						0x3:w
+INTERLABEL(LOOP_DIR):
+
+	cmp.e.f0.0 (1) null:w			gLOOP_DIR:w						gPREDFLAG:w	
+	(f0.0) jmpi	INTERLABEL(LOOP_DIR_CONTINUE)	
+	
+    // Get binding table index 
+    // & reference picture parity (gREFPARITY - 0:top, 0x100:bottom, x:frame)
+    // & address of interpolation result
+    cmp.e.f0.1 (1) null:w			gLOOP_DIR:w						1:w
+    (f0.1) mov (1)		gpINTP:ud			nOFFSET_INTP0:ud						{NoDDClr} //
+    (f0.1) and (1)		gBIDX:w				r[pBIDX]:ub						0x7f:w	{NoDDChk} //
+    (-f0.1) mov (1)		gpINTP:ud			nOFFSET_INTP1:ud						{NoDDClr} //
+	(-f0.1) and (1)		gBIDX:w				r[pBIDX,4]:ub					0x7f:w	{NoDDChk} //
+#if defined(MBAFF) || defined(FIELD)
+    (f0.1) and (1)		gREFPARITY:w				r[pBIDX]:ub						0x80:w
+    (-f0.1) and (1)		gREFPARITY:w				r[pBIDX,4]:ub					0x80:w
+    shl (1)		gREFPARITY:w		gREFPARITY<0;1,0>:w				1:w
+#endif
+
+	// Sub MB shape
+	asr (1)		gSHAPETEMP:w		gSUBMB_SHAPE:ub					gLOOP_SUBMB:w
+	
+    // Chroma MV adjustment & Set message descriptor for frame/field read
+#if defined(MBAFF)
+	#include "chromaMVAdjust.asm" 
+    and.nz.f0.0 (1) null:uw			gFIELDMBFLAG:ub					nFIELDMB_MASK:uw
+    (f0.0) add (1) gD0:ud			gBIDX:uw						nDWBRMSGDSC_SC_TF:ud
+    (-f0.0) add (1)	gMSGDSC_R:ud	gBIDX:uw						nDWBRMSGDSC_SC:ud
+    (f0.0) add (1) gMSGDSC_R:ud		gD0:ud							gREFPARITY:uw
+#elif defined(FIELD)
+	#include "chromaMVAdjust.asm" 
+    add (1)		gMSGDSC_R:ud		gBIDX:uw						nDWBRMSGDSC_SC_TF:ud
+    add (1)		gMSGDSC_R:ud		gMSGDSC_R:ud					gREFPARITY:uw
+#else // FRAME
+	add (1)		gMSGDSC_R:ud		gBIDX:uw						nDWBRMSGDSC_SC:ud
+#endif
+
+	and.nz.f0.1 (1) null:w			gSHAPETEMP:w					3:w	
+	(f0.1) jmpi INTERLABEL(PROCESS4x4)
+	
+	//======================== BEGIN - PROCESS 8x8 ===========================
+	
+	// Reference block load
+	#include "loadRef_Y_16x13.asm"
+#ifndef MONO
+#if defined(MBAFF) || defined(FIELD)
+	add (1)		r[pMV,2]:w			r[pMV,2]:w						gCHRMVADJ:w
+#endif
+	#include "loadRef_C_10x5.asm"
+#endif
+
+	// Interpolation
+	//CALL_INTER(INTERLABEL(Interpolate_Y_8x8_Func), 1)
+	#include "interpolate_Y_8x8.asm"
+#ifndef MONO
+	//CALL_INTER(INTERLABEL(Interpolate_C_4x4_Func), 1)
+	#include "interpolate_C_4x4.asm"
+#endif
+
+	jmpi INTERLABEL(ROUND_SHIFT_C)
+	//========================= END - PROCESS 8x8 ============================
+	
+	//======================== BEGIN - LOOP_SUBMBPT ==========================
+INTERLABEL(PROCESS4x4):
+
+	mov (1)		gLOOP_SUBMBPT:uw	4:uw				// 4, 3, 2, 1
+INTERLABEL(LOOP_SUBMBPT):
+
+	// Reference block load
+	#include "loadRef_Y_16x9.asm"
+#ifndef MONO
+#if defined(MBAFF) || defined(FIELD)
+	add (1)		r[pMV,2]:w			r[pMV,2]:w						gCHRMVADJ:w
+#endif
+	#include "loadRef_C_6x3.asm"
+#endif
+
+	// Interpolation
+	#include "interpolate_Y_4x4.asm"
+#ifndef MONO
+	#include "interpolate_C_2x2.asm"
+#endif
+	
+	cmp.e.f0.0 (1) null:w			gLOOP_SUBMBPT:uw				3:w
+	add.z.f0.1 (1) gLOOP_SUBMBPT:uw gLOOP_SUBMBPT:uw				-1:w
+	add (1)		pMV:w				pMV:w							8:w	
+	(-f0.0) add (1)	gpINTP:ud		gpINTP:ud						0x00080008:ud	// 8 & 8
+	(f0.0) add (1) gpINTP:ud		gpINTP:ud						0x00180038:ud	// 24 & 56
+	(-f0.1) jmpi INTERLABEL(LOOP_SUBMBPT)
+    
+    cmp.e.f0.1	null:w				gLOOP_DIR:w						1:w
+    add (1)		pMV:w				pMV:w							-32:w
+    (f0.1) mov (1) gpINTP:ud		nOFFSET_INTP0:ud
+    (-f0.1) mov (1) gpINTP:ud		nOFFSET_INTP1:ud
+
+	mov (1)		pRESULT:uw					gpINTPC:uw
+    
+	//========================= END - LOOP_SUBMBPT ===========================
+    
+INTERLABEL(ROUND_SHIFT_C):
+    
+#ifndef MONO
+	#include "roundShift_C_4x4.asm"
+#endif
+
+INTERLABEL(LOOP_DIR_CONTINUE):
+	
+	add.nz.f0.1 (1) gLOOP_DIR:uw	gLOOP_DIR:uw					-1:w
+	add (1)		pMV:w				pMV:w							4:w
+    (-f0.1) jmpi INTERLABEL(LOOP_DIR)
+    //=========================== END - LOOP_DIR =============================
+
+INTERLABEL(Weighted_Prediction):
+	#include "weightedPred.asm"
+	
+	and.z.f0.1 (16)	null<1>:w		gLOOP_SUBMB<0;1,0>:uw			2:w
+
+	#include "recon_Y_8x8.asm"
+#ifndef MONO
+	#include "recon_C_4x4.asm"
+
+	(-f0.1) add (1)	pERRORC:w		pERRORC:w						48:w
+#endif
+
+	cmp.e.f0.1 (1) null:w			gLOOP_SUBMB:uw					6:w
+	add (1)		gLOOP_SUBMB:uw		gLOOP_SUBMB:uw					2:w
+
+	add (1)		pWGT_BIDX:ud		pWGT_BIDX:ud					0x00100001:ud	// 12 & 1
+	add (1)		pMV:w				pMV:w							gMVSTEP:w
+
+    (-f0.1) jmpi INTERLABEL(LOOP_SUBMB)
+    //========================== END - LOOP_SUBMB ============================
+    
+INTERLABEL(EXIT_LOOP):   
+	#include "writeRecon_YC.asm"    
+
+#ifdef SW_SCOREBOARD    
+	wait	n0:ud		//	Now wait for scoreboard to response
+    #include "Soreboard_update.asm"	// scorboard update function
+#else
+// Check for write commit first if SW scoreboard is disabled
+	mov	(1)	gREG_WRITE_COMMIT_Y<1>:ud	gREG_WRITE_COMMIT_Y<0;1,0>:ud		// Make sure Y write is committed
+	mov	(1)	gREG_WRITE_COMMIT_UV<1>:ud	gREG_WRITE_COMMIT_UV<0;1,0>:ud		// Make sure U/V write is committed
+#endif
+
+// Terminate the thread
+//
+    END_THREAD
+
+
+//#include "Interpolate_Y_8x8_Func.asm"
+//#include "Interpolate_C_4x4_Func.asm"
+//#include "WeightedPred_Y_Func.asm"	
+//#include "WeightedPred_C_Func.asm"	
+
+
+.end_code
+
+.end_kernel
+
+        
+//#endif	// !defined(__AVCMCInter__)
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVC.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVC.asm
@@ -0,0 +1,469 @@
+/*
+ * All HWMC kernels 
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+// Kernel name: AllAVC.asm
+//
+// All HWMC kernels merged into this file
+//
+//  $Revision: 2 $
+//  $Date: 9/10/06 2:02a $
+//
+
+// Note: To enable SW scoreboard for ILK AVC kernels, simply toggle the HW_SCOREBOARD 
+//		 and SW_SCOREBOARD definition as described below.
+//
+// ----------------------------------------------------
+//  Main: ALLINTRA
+// ----------------------------------------------------
+
+#define	COMBINED_KERNEL
+#define	ENABLE_ILDB
+
+//	WA for *Stim tool issue, should be removed later
+
+#ifdef DEV_ILK
+#define INSTFACTOR	2	// 128-bit count as 2 instructions
+#else
+#define INSTFACTOR	1	// 128-bit is 1 instruction
+#endif	// DEV_ILK
+
+#ifdef DEV_CTG
+  #define SW_SCOREBOARD		// SW Scoreboard should be enabled for CTG and earlier
+  #undef HW_SCOREBOARD		// HW Scoreboard should be disabled for CTG and earlier
+#else
+  #define HW_SCOREBOARD		// HW Scoreboard should be enabled for ILK and beyond
+  #undef SW_SCOREBOARD		// SW Scoreboard should be disabled for ILK and beyond
+#endif	// DEV_CTG
+#ifdef BOOTSTRAP
+#  ifdef ENABLE_ILDB
+#    define ALL_SPAWNED_UV_ILDB_FRAME_IP	0
+#    define SLEEP_ENTRY_UV_ILDB_FRAME_IP	0
+#    define POST_SLEEP_UV_ILDB_FRAME_IP	        0
+#    define ALL_SPAWNED_Y_ILDB_FRAME_IP	        0
+#    define SLEEP_ENTRY_Y_ILDB_FRAME_IP	        0
+#    define POST_SLEEP_Y_ILDB_FRAME_IP	        0
+#  endif
+#elif defined(DEV_ILK)
+# include "export.inc.gen5"
+#elif defined(DEV_CTG)
+# include "export.inc"
+#endif
+#if defined(_EXPORT)
+	#include "AllAVC_Export.inc"
+#elif defined(_BUILD)
+	#include "AllAVC.ich"			// ISAasm dumped .exports
+	#include "AllAVC_Export.inc"	// Keep jumping targets aligned, only for CTG and beyond
+	#include "AllAVC_Build.inc"
+#else
+#endif
+
+.kernel AllAVC
+
+// Build all intra prediction kernels
+//
+#ifdef INTRA_16x16_PAD_NENOP
+    $for(0; <INTRA_16x16_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef INTRA_16x16_PAD_NOP
+    $for(0; <INTRA_16x16_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "Intra_16x16.asm"
+
+#ifdef INTRA_8x8_PAD_NENOP
+    $for(0; <INTRA_8x8_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef INTRA_8x8_PAD_NOP
+    $for(0; <INTRA_8x8_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "Intra_8x8.asm"
+
+#ifdef INTRA_4x4_PAD_NENOP
+    $for(0; <INTRA_4x4_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef INTRA_4x4_PAD_NOP
+    $for(0; <INTRA_4x4_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "Intra_4x4.asm"
+
+#ifdef INTRA_PCM_PAD_NENOP
+    $for(0; <INTRA_PCM_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef INTRA_PCM_PAD_NOP
+    $for(0; <INTRA_PCM_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "Intra_PCM.asm"
+
+// Build FrameMB_Motion kernel
+//
+#define FRAME
+
+  #ifdef FRAME_MB_PAD_NENOP
+    $for(0; <FRAME_MB_PAD_NENOP; 1) {
+	nenop
+	}
+  #endif
+  #ifdef FRAME_MB_PAD_NOP
+    $for(0; <FRAME_MB_PAD_NOP; 1) {
+	nop
+	}
+  #endif
+    #include "AVCMCInter.asm"
+#undef  FRAME
+
+// Build FieldMB_Motion kernel
+//
+#define FIELD
+
+  #ifdef FIELD_MB_PAD_NENOP
+    $for(0; <FIELD_MB_PAD_NENOP; 1) {
+	nenop
+	}
+  #endif
+  #ifdef FIELD_MB_PAD_NOP
+    $for(0; <FIELD_MB_PAD_NOP; 1) {
+	nop
+	}
+  #endif
+    #include "AVCMCInter.asm"
+#undef  FIELD
+
+// Build MBAff_Motion kernel
+//
+#define MBAFF
+
+  #ifdef MBAFF_MB_PAD_NENOP
+    $for(0; <MBAFF_MB_PAD_NENOP; 1) {
+	nenop
+	}
+  #endif
+  #ifdef MBAFF_MB_PAD_NOP
+    $for(0; <MBAFF_MB_PAD_NOP; 1) {
+	nop
+	}
+  #endif
+    #include "AVCMCInter.asm"
+#undef  MBAFF
+
+#ifdef SW_SCOREBOARD    
+
+// SW scoreboard kernel for non-MBAFF
+//
+#ifdef SCOREBOARD_PAD_NENOP
+    $for(0; <SCOREBOARD_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef SCOREBOARD_PAD_NOP
+    $for(0; <SCOREBOARD_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "scoreboard.asm"
+
+//	SW scoreboard kernel for MBAFF
+
+#ifdef SCOREBOARD_MBAFF_PAD_NENOP
+    $for(0; <SCOREBOARD_MBAFF_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef SCOREBOARD_MBAFF_PAD_NOP
+    $for(0; <SCOREBOARD_MBAFF_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "scoreboard_MBAFF.asm"
+
+#elif defined(HW_SCOREBOARD)
+ 
+// SetHWscoreboard kernel for non-MBAFF
+//
+#ifdef SETHWSCOREBOARD_PAD_NENOP
+    $for(0; <SETHWSCOREBOARD_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef SETHWSCOREBOARD_PAD_NOP
+    $for(0; <SETHWSCOREBOARD_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "SetHWScoreboard.asm"
+
+//	SetHWscoreboard kernel for MBAFF
+
+#ifdef SETHWSCOREBOARD_MBAFF_PAD_NENOP
+    $for(0; <SETHWSCOREBOARD_MBAFF_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef SETHWSCOREBOARD_MBAFF_PAD_NOP
+    $for(0; <SETHWSCOREBOARD_MBAFF_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "SetHWScoreboard_MBAFF.asm"
+
+#endif	// SW_SCOREBOARD
+
+#ifdef BSDRESET_PAD_NENOP
+    $for(0; <BSDRESET_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef BSDRESET_PAD_NOP
+    $for(0; <BSDRESET_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "BSDReset.asm"
+
+#ifdef DCRESETDUMMY_PAD_NENOP
+    $for(0; <DCRESETDUMMY_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef DCRESETDUMMY_PAD_NOP
+    $for(0; <DCRESETDUMMY_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "DCResetDummy.asm"
+
+#ifdef ENABLE_ILDB
+
+// Build all ILDB kernels
+//
+//	Undefine some previous defined symbols since they will be re-defined/re-declared in ILDB kernels
+#undef	A
+#undef	B
+#undef	p0
+#undef	p1
+
+#define MSGPAYLOADB MSGPAYLOADB_ILDB
+#define MSGPAYLOADW MSGPAYLOADW_ILDB
+#define MSGPAYLOADD MSGPAYLOADD_ILDB
+#define MSGPAYLOADF MSGPAYLOADF_ILDB
+
+//				< Frame ILDB >
+#define _PROGRESSIVE
+#define ILDB_LABEL(x)	x##_ILDB_FRAME
+#ifdef AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NENOP
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NOP
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_FRAME_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Root_Y.asm"
+
+#ifdef AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NENOP
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NOP
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_FRAME_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Child_Y.asm"
+
+#ifdef AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NENOP
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NOP
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_FRAME_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Root_UV.asm"
+
+#ifdef AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NENOP
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NOP
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_FRAME_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Child_UV.asm"
+#undef ILDB_LABEL
+#undef _PROGRESSIVE
+
+//				< Field ILDB >
+#define _FIELD
+#define ILDB_LABEL(x)	x##_ILDB_FIELD
+#ifdef AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NENOP
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NOP
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_FIELD_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Root_Field_Y.asm"
+
+#ifdef AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NENOP
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NOP
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_FIELD_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Child_Field_Y.asm"
+
+#ifdef AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NENOP
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NOP
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_FIELD_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Root_Field_UV.asm"
+
+#ifdef AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NENOP
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NOP
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_FIELD_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Child_Field_UV.asm"
+#undef ILDB_LABEL
+#undef _FIELD
+
+//				< MBAFF Frame ILDB >
+#define _MBAFF
+#define ILDB_LABEL(x)	x##_ILDB_MBAFF
+#ifdef AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NENOP
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NOP
+    $for(0; <AVC_ILDB_ROOT_Y_ILDB_MBAFF_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Root_Mbaff_Y.asm"
+
+#ifdef AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NENOP
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NOP
+    $for(0; <AVC_ILDB_CHILD_Y_ILDB_MBAFF_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Child_Mbaff_Y.asm"
+
+#ifdef AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NENOP
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NOP
+    $for(0; <AVC_ILDB_ROOT_UV_ILDB_MBAFF_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Root_Mbaff_UV.asm"
+
+#ifdef AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NENOP
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NENOP; 1) {
+	nenop
+	}
+#endif
+#ifdef AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NOP
+    $for(0; <AVC_ILDB_CHILD_UV_ILDB_MBAFF_PAD_NOP; 1) {
+	nop
+	}
+#endif
+    #include "AVC_ILDB_Child_Mbaff_UV.asm"
+#undef ILDB_LABEL
+#undef _MBAFF
+
+#endif		// ENABLE_ILDB
+
+AllAVC_END:
+nop
+// End of AllAVC
+
+.end_code
+
+.end_kernel
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVCField.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVCField.asm
@@ -0,0 +1,100 @@
+/*
+ * All field picture HWMC kernels 
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//	2857702934	// 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets
+//    0    // Offset to Intra_16x16 luma prediction mode 0
+//    9    // Offset to Intra_16x16 luma prediction mode 1
+//   19    // Offset to Intra_16x16 luma prediction mode 2
+//   42    // Offset to Intra_16x16 luma prediction mode 3
+//	2857699336	// 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets
+//    0    // Offset to Intra_8x8 luma prediction mode 0
+//    5    // Offset to Intra_8x8 luma prediction mode 1
+//   10    // Offset to Intra_8x8 luma prediction mode 2
+//   26    // Offset to Intra_8x8 luma prediction mode 3
+//   36    // Offset to Intra_8x8 luma prediction mode 4
+//   50    // Offset to Intra_8x8 luma prediction mode 5
+//   68    // Offset to Intra_8x8 luma prediction mode 6
+//   85    // Offset to Intra_8x8 luma prediction mode 7
+//   95    // Offset to Intra_8x8 luma prediction mode 8
+//	2857698308	// 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets
+//    0    // Offset to Intra_4x4 luma prediction mode 0
+//    2    // Offset to Intra_4x4 luma prediction mode 1
+//    4    // Offset to Intra_4x4 luma prediction mode 2
+//   16    // Offset to Intra_4x4 luma prediction mode 3
+//   23    // Offset to Intra_4x4 luma prediction mode 4
+//   32    // Offset to Intra_4x4 luma prediction mode 5
+//   45    // Offset to Intra_4x4 luma prediction mode 6
+//   59    // Offset to Intra_4x4 luma prediction mode 7
+//   66    // Offset to Intra_4x4 luma prediction mode 8
+//	2857700364	// 0xAA550C0C - GUID for intra chroma prediction mode offsets
+//    0    // Offset to intra chroma prediction mode 0
+//   30    // Offset to intra chroma prediction mode 1
+//   36    // Offset to intra chroma prediction mode 2
+//   41    // Offset to intra chroma prediction mode 3
+
+// Kernel name: AllAVCField.asm
+//
+// All field picture HWMC kernels merged into this file
+//
+//  $Revision: 1 $
+//  $Date: 4/13/06 4:35p $
+//
+
+// ----------------------------------------------------
+//  Main: AllAVCField
+// ----------------------------------------------------
+
+#define	ALLHWMC
+#define	COMBINED_KERNEL
+
+.kernel AllAVCField
+
+    #include "Intra_PCM.asm"
+    #include "Intra_16x16.asm"
+    #include "Intra_8x8.asm"
+    #include "Intra_4x4.asm"
+    #include "scoreboard.asm"
+
+	#define FIELD
+	#include "AVCMCInter.asm"
+
+// End of AllAVCField
+
+.end_kernel
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVCFrame.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVCFrame.asm
@@ -0,0 +1,99 @@
+/*
+ * All frame picture HWMC kernels 
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//	2857702934	// 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets
+//    0    // Offset to Intra_16x16 luma prediction mode 0
+//    9    // Offset to Intra_16x16 luma prediction mode 1
+//   19    // Offset to Intra_16x16 luma prediction mode 2
+//   42    // Offset to Intra_16x16 luma prediction mode 3
+//	2857699336	// 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets
+//    0    // Offset to Intra_8x8 luma prediction mode 0
+//    5    // Offset to Intra_8x8 luma prediction mode 1
+//   10    // Offset to Intra_8x8 luma prediction mode 2
+//   26    // Offset to Intra_8x8 luma prediction mode 3
+//   36    // Offset to Intra_8x8 luma prediction mode 4
+//   50    // Offset to Intra_8x8 luma prediction mode 5
+//   68    // Offset to Intra_8x8 luma prediction mode 6
+//   85    // Offset to Intra_8x8 luma prediction mode 7
+//   95    // Offset to Intra_8x8 luma prediction mode 8
+//	2857698308	// 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets
+//    0    // Offset to Intra_4x4 luma prediction mode 0
+//    2    // Offset to Intra_4x4 luma prediction mode 1
+//    4    // Offset to Intra_4x4 luma prediction mode 2
+//   16    // Offset to Intra_4x4 luma prediction mode 3
+//   23    // Offset to Intra_4x4 luma prediction mode 4
+//   32    // Offset to Intra_4x4 luma prediction mode 5
+//   45    // Offset to Intra_4x4 luma prediction mode 6
+//   59    // Offset to Intra_4x4 luma prediction mode 7
+//   66    // Offset to Intra_4x4 luma prediction mode 8
+//	2857700364	// 0xAA550C0C - GUID for intra chroma prediction mode offsets
+//    0    // Offset to intra chroma prediction mode 0
+//   30    // Offset to intra chroma prediction mode 1
+//   36    // Offset to intra chroma prediction mode 2
+//   41    // Offset to intra chroma prediction mode 3
+
+// Kernel name: AllAVCFrame.asm
+//
+// All frame picture HWMC kernels merged into this file
+//
+//  $Revision: 1 $
+//  $Date: 4/13/06 4:35p $
+//
+
+// ----------------------------------------------------
+//  Main: AllAVCFrame
+// ----------------------------------------------------
+
+#define	ALLHWMC
+#define	COMBINED_KERNEL
+
+.kernel AllAVCFrame
+
+    #include "Intra_PCM.asm"
+    #include "Intra_16x16.asm"
+    #include "Intra_8x8.asm"
+    #include "Intra_4x4.asm"
+    #include "scoreboard.asm"
+
+	#include "AVCMCInter.asm"
+
+// End of AllAVCFrame
+
+.end_kernel
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVCMBAFF.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVCMBAFF.asm
@@ -0,0 +1,100 @@
+/*
+ * All MBAFF frame picture HWMC kernels 
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//	2857702934	// 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets
+//    0    // Offset to Intra_16x16 luma prediction mode 0
+//    9    // Offset to Intra_16x16 luma prediction mode 1
+//   19    // Offset to Intra_16x16 luma prediction mode 2
+//   42    // Offset to Intra_16x16 luma prediction mode 3
+//	2857699336	// 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets
+//    0    // Offset to Intra_8x8 luma prediction mode 0
+//    5    // Offset to Intra_8x8 luma prediction mode 1
+//   10    // Offset to Intra_8x8 luma prediction mode 2
+//   26    // Offset to Intra_8x8 luma prediction mode 3
+//   36    // Offset to Intra_8x8 luma prediction mode 4
+//   50    // Offset to Intra_8x8 luma prediction mode 5
+//   68    // Offset to Intra_8x8 luma prediction mode 6
+//   85    // Offset to Intra_8x8 luma prediction mode 7
+//   95    // Offset to Intra_8x8 luma prediction mode 8
+//	2857698308	// 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets
+//    0    // Offset to Intra_4x4 luma prediction mode 0
+//    2    // Offset to Intra_4x4 luma prediction mode 1
+//    4    // Offset to Intra_4x4 luma prediction mode 2
+//   16    // Offset to Intra_4x4 luma prediction mode 3
+//   23    // Offset to Intra_4x4 luma prediction mode 4
+//   32    // Offset to Intra_4x4 luma prediction mode 5
+//   45    // Offset to Intra_4x4 luma prediction mode 6
+//   59    // Offset to Intra_4x4 luma prediction mode 7
+//   66    // Offset to Intra_4x4 luma prediction mode 8
+//	2857700364	// 0xAA550C0C - GUID for intra chroma prediction mode offsets
+//    0    // Offset to intra chroma prediction mode 0
+//   30    // Offset to intra chroma prediction mode 1
+//   36    // Offset to intra chroma prediction mode 2
+//   41    // Offset to intra chroma prediction mode 3
+
+// Kernel name: AllAVCMBAFF.asm
+//
+// All MBAFF frame picture HWMC kernels merged into this file
+//
+//  $Revision: 1 $
+//  $Date: 4/13/06 4:35p $
+//
+
+// ----------------------------------------------------
+//  Main: AllAVCMBAFF
+// ----------------------------------------------------
+
+#define	ALLHWMC
+#define	COMBINED_KERNEL
+
+.kernel AllAVCMBAFF
+
+    #include "Intra_PCM.asm"
+    #include "Intra_16x16.asm"
+    #include "Intra_8x8.asm"
+    #include "Intra_4x4.asm"
+    #include "scoreboard.asm"
+
+	#define MBAFF
+	#include "AVCMCInter.asm"
+
+// End of AllAVCMBAFF
+
+.end_kernel
+
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVC_Build.inc
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVC_Build.inc
@@ -0,0 +1,112 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+$table {
+AllAVC_END_IP/INSTFACTOR    // Total instruction count
+#if (defined(SW_SCOREBOARD) || defined(HW_SCOREBOARD)) && defined(ENABLE_ILDB)
+//    23    // Total kernel count
+#elif defined(SW_SCOREBOARD) || defined(HW_SCOREBOARD)
+//    11    // Total kernel count
+#elif defined(ENABLE_ILDB)
+//    21    // Total kernel count
+#else
+//    11    // Total kernel count
+#endif
+INTRA_16x16_ENTRY/INSTFACTOR    // Instruction offset to 'Intra_16x16'
+INTRA_8x8_ENTRY/INSTFACTOR    // Instruction offset to 'Intra_8x8'
+INTRA_4x4_ENTRY/INSTFACTOR    // Instruction offset to 'Intra_4x4'
+INTRA_PCM_ENTRY/INSTFACTOR    // Instruction offset to 'Intra_PCM'
+FRAME_MB_ENTRY/INSTFACTOR    // Instruction offset to 'FrameMB_Motion'
+FIELD_MB_ENTRY/INSTFACTOR    // Instruction offset to 'FieldMB_Motion'
+MBAFF_MB_ENTRY/INSTFACTOR    // Instruction offset to 'MBAff_Motion'
+#ifdef SW_SCOREBOARD    
+SCOREBOARD_ENTRY/INSTFACTOR			// Instruction offset to 'scoreboard'
+SCOREBOARD_MBAFF_ENTRY/INSTFACTOR	// Instruction offset to 'scoreboard_MBAFF'
+#elif defined(HW_SCOREBOARD)
+SETHWSCOREBOARD_ENTRY/INSTFACTOR		// Instruction offset to 'AVC_SetIntraDepend'
+SETHWSCOREBOARD_MBAFF_ENTRY/INSTFACTOR	// Instruction offset to 'AVC_SetIntraDependMBAFF'
+#endif	// SW_SCOREBOARD
+#ifdef ENABLE_ILDB
+AVC_ILDB_ROOT_Y_ILDB_FRAME_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Y'
+AVC_ILDB_CHILD_Y_ILDB_FRAME_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Y'
+AVC_ILDB_ROOT_UV_ILDB_FRAME_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_UV'
+AVC_ILDB_CHILD_UV_ILDB_FRAME_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_UV'
+AVC_ILDB_ROOT_Y_ILDB_FIELD_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Field_Y'
+AVC_ILDB_CHILD_Y_ILDB_FIELD_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Field_Y'
+AVC_ILDB_ROOT_UV_ILDB_FIELD_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Field_UV'
+AVC_ILDB_CHILD_UV_ILDB_FIELD_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Field_UV'
+AVC_ILDB_ROOT_Y_ILDB_MBAFF_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Mbaff_Y'
+AVC_ILDB_CHILD_Y_ILDB_MBAFF_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Mbaff_Y'
+AVC_ILDB_ROOT_UV_ILDB_MBAFF_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Root_Mbaff_UV'
+AVC_ILDB_CHILD_UV_ILDB_MBAFF_ENTRY/INSTFACTOR    // Instruction offset to 'AVC_ILDB_Child_Mbaff_UV'
+#endif	// ENABLE_ILDB
+BSDRESET_ENTRY/INSTFACTOR    // Instruction offset to 'BSDReset'
+DCRESETDUMMY_ENTRY/INSTFACTOR    // Instruction offset to 'DCResetDummy'
+
+//    0    // Instruction offset to Intra_4x4_luma_prediction_mode_0
+INTRA_4X4_HORIZONTAL_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_1
+INTRA_4X4_DC_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_2
+INTRA_4X4_DIAG_DOWN_LEFT_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_3
+INTRA_4X4_DIAG_DOWN_RIGHT_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_4
+INTRA_4X4_VERT_RIGHT_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_5
+INTRA_4X4_HOR_DOWN_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_6
+INTRA_4X4_VERT_LEFT_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_7
+INTRA_4X4_HOR_UP_IP-INTRA_4X4_VERTICAL_IP    // Instruction offset to Intra_4x4_luma_prediction_mode_8
+
+//    0    // Instruction offset to Intra_8x8_luma_prediction_mode_0
+INTRA_8X8_HORIZONTAL_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_1
+INTRA_8X8_DC_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_2
+INTRA_8X8_DIAG_DOWN_LEFT_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_3
+INTRA_8X8_DIAG_DOWN_RIGHT_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_4
+INTRA_8X8_VERT_RIGHT_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_5
+INTRA_8X8_HOR_DOWN_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_6
+INTRA_8X8_VERT_LEFT_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_7
+INTRA_8X8_HOR_UP_IP-INTRA_8X8_VERTICAL_IP    // Instruction offset to Intra_8x8_luma_prediction_mode_8
+
+//    0    // Instruction offset to Intra_16x16_luma_prediction_mode_0
+INTRA_16x16_HORIZONTAL_IP-INTRA_16x16_VERTICAL_IP    // Instruction offset to Intra_16x16_luma_prediction_mode_1
+INTRA_16x16_DC_IP-INTRA_16x16_VERTICAL_IP    // Instruction offset to Intra_16x16_luma_prediction_mode_2
+INTRA_16x16_PLANE_IP-INTRA_16x16_VERTICAL_IP    // Instruction offset to Intra_16x16_luma_prediction_mode_3
+
+//    0    // Instruction offset to intra_chroma_prediction_mode_0
+INTRA_CHROMA_HORIZONTAL_IP-INTRA_CHROMA_DC_IP    // Instruction offset to intra_chroma_prediction_mode_1
+INTRA_CHROMA_VERTICAL_IP-INTRA_CHROMA_DC_IP    // Instruction offset to intra_chroma_prediction_mode_2
+INTRA_Chroma_PLANE_IP-INTRA_CHROMA_DC_IP    // Instruction offset to intra_chroma_prediction_mode_3
+
+intra_Pred_4x4_Y_IP-ADD_ERROR_SB3_IP*0x100+intra_Pred_4x4_Y_IP-ADD_ERROR_SB2_IP*0x100+intra_Pred_4x4_Y_IP-ADD_ERROR_SB1_IP*0x100+intra_Pred_4x4_Y_IP-ADD_ERROR_SB0_IP	// Instruction offset to intra_4x4_pred_module
+}
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVC_Export.inc
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllAVC_Export.inc
@@ -0,0 +1,202 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+.export entry_point INTRA_16x16
+.export entry_point INTRA_8x8
+.export entry_point INTRA_4x4
+.export entry_point INTRA_PCM
+.export entry_point FRAME_MB
+.export entry_point FIELD_MB
+.export entry_point MBAFF_MB
+#ifdef SW_SCOREBOARD    
+.export entry_point SCOREBOARD
+.export entry_point SCOREBOARD_MBAFF
+#elif defined(HW_SCOREBOARD)
+.export entry_point SETHWSCOREBOARD
+.export entry_point SETHWSCOREBOARD_MBAFF
+#endif	// SW_SCOREBOARD
+
+#ifdef ENABLE_ILDB
+.export entry_point AVC_ILDB_ROOT_Y_ILDB_FRAME
+.export entry_point AVC_ILDB_CHILD_Y_ILDB_FRAME
+.export entry_point AVC_ILDB_ROOT_UV_ILDB_FRAME
+.export entry_point AVC_ILDB_CHILD_UV_ILDB_FRAME
+.export entry_point AVC_ILDB_ROOT_Y_ILDB_FIELD
+.export entry_point AVC_ILDB_CHILD_Y_ILDB_FIELD
+.export entry_point AVC_ILDB_ROOT_UV_ILDB_FIELD
+.export entry_point AVC_ILDB_CHILD_UV_ILDB_FIELD
+.export entry_point AVC_ILDB_ROOT_Y_ILDB_MBAFF
+.export entry_point AVC_ILDB_CHILD_Y_ILDB_MBAFF
+.export entry_point AVC_ILDB_ROOT_UV_ILDB_MBAFF
+.export entry_point AVC_ILDB_CHILD_UV_ILDB_MBAFF
+#endif	// ENABLE_ILDB
+
+.export entry_point BSDRESET
+.export entry_point DCRESETDUMMY
+
+.export label INTRA_16x16_VERTICAL
+.export label INTRA_16x16_HORIZONTAL
+.export label INTRA_16x16_DC
+.export label INTRA_16x16_PLANE
+
+.export label INTRA_8X8_VERTICAL
+.export label INTRA_8X8_HORIZONTAL
+.export label INTRA_8X8_DC
+.export label INTRA_8X8_DIAG_DOWN_LEFT
+.export label INTRA_8X8_DIAG_DOWN_RIGHT
+.export label INTRA_8X8_VERT_RIGHT
+.export label INTRA_8X8_HOR_DOWN
+.export label INTRA_8X8_VERT_LEFT
+.export label INTRA_8X8_HOR_UP
+
+.export label INTRA_4X4_VERTICAL
+.export label INTRA_4X4_HORIZONTAL
+.export label INTRA_4X4_DC
+.export label INTRA_4X4_DIAG_DOWN_LEFT
+.export label INTRA_4X4_DIAG_DOWN_RIGHT
+.export label INTRA_4X4_VERT_RIGHT
+.export label INTRA_4X4_HOR_DOWN
+.export label INTRA_4X4_VERT_LEFT
+.export label INTRA_4X4_HOR_UP
+
+.export label INTRA_CHROMA_DC
+.export label INTRA_CHROMA_HORIZONTAL
+.export label INTRA_CHROMA_VERTICAL
+.export label INTRA_Chroma_PLANE
+
+.export label intra_Pred_4x4_Y
+.export label ADD_ERROR_SB0
+.export label ADD_ERROR_SB1
+.export label ADD_ERROR_SB2
+.export label ADD_ERROR_SB3
+
+.export label AllAVC_END
+
+#ifdef SW_SCOREBOARD    
+.export label MB_Loop
+.export label No_Message
+.export label Dependency_Check
+.export label Notify_MSG
+.export label Update_CurMB
+.export label MBAFF_MB_Loop
+.export label MBAFF_No_Message
+.export label MBAFF_Dependency_Check
+.export label MBAFF_Notify_MSG
+.export label MBAFF_Update_CurMB
+
+//.export label 
+
+// Definitions for first pass MC kernel building
+#ifndef No_Message_IP
+#define No_Message_IP	0
+#endif
+
+#ifndef Dependency_Check_IP
+#define Dependency_Check_IP	0
+#endif
+
+#ifndef Notify_MSG_IP
+#define Notify_MSG_IP	0
+#endif
+
+#ifndef Update_CurMB_IP
+#define Update_CurMB_IP	0
+#endif
+
+#ifndef MBAFF_No_Message_IP
+#define MBAFF_No_Message_IP	0
+#endif
+
+#ifndef MBAFF_Dependency_Check_IP
+#define MBAFF_Dependency_Check_IP	0
+#endif
+
+#ifndef MBAFF_Notify_MSG_IP
+#define MBAFF_Notify_MSG_IP	0
+#endif
+
+#ifndef	AS_ENABLED
+ #ifndef MBAFF_MB_Loop_IP
+ #define MBAFF_MB_Loop_IP	0
+ #endif
+
+ #ifndef MB_Loop_IP
+ #define MB_Loop_IP	0
+ #endif
+#endif	// End AS_ENABLED
+
+#ifndef MBAFF_Update_CurMB_IP
+#define MBAFF_Update_CurMB_IP	0
+#endif
+
+#endif	// SW_SCOREBOARD
+
+#ifdef ENABLE_ILDB
+.export label	ALL_SPAWNED_UV_ILDB_FRAME
+.export label	SLEEP_ENTRY_UV_ILDB_FRAME
+.export label	POST_SLEEP_UV_ILDB_FRAME
+.export label	ALL_SPAWNED_Y_ILDB_FRAME
+.export label	SLEEP_ENTRY_Y_ILDB_FRAME
+.export label	POST_SLEEP_Y_ILDB_FRAME
+
+// Definitions for first pass ILDB kernel building
+#ifndef	ALL_SPAWNED_UV_ILDB_FRAME_IP
+#define	ALL_SPAWNED_UV_ILDB_FRAME_IP	0
+#endif
+
+#ifndef	SLEEP_ENTRY_UV_ILDB_FRAME_IP
+#define	SLEEP_ENTRY_UV_ILDB_FRAME_IP	0
+#endif
+
+#ifndef	POST_SLEEP_UV_ILDB_FRAME_IP
+#define	POST_SLEEP_UV_ILDB_FRAME_IP	0
+#endif
+
+#ifndef	ALL_SPAWNED_Y_ILDB_FRAME_IP
+#define	ALL_SPAWNED_Y_ILDB_FRAME_IP	0
+#endif
+
+#ifndef	SLEEP_ENTRY_Y_ILDB_FRAME_IP
+#define	SLEEP_ENTRY_Y_ILDB_FRAME_IP	0
+#endif
+
+#ifndef	POST_SLEEP_Y_ILDB_FRAME_IP
+#define	POST_SLEEP_Y_ILDB_FRAME_IP	0
+#endif
+
+#endif	// ENABLE_ILDB
--- a/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllIntra.asm
+++ b/contrib/sdk/sources/vaapi/intel-driver-1.6.2/src/shaders/h264/mc/AllIntra.asm
@@ -0,0 +1,98 @@
+/*
+ * All intra-prediction macroblock kernels 
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file was originally licensed under the following license
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+//	2857702934	// 0xAA551616 - GUID for Intra_16x16 luma prediction mode offsets
+//    0    // Offset to Intra_16x16 luma prediction mode 0
+//    9    // Offset to Intra_16x16 luma prediction mode 1
+//   19    // Offset to Intra_16x16 luma prediction mode 2
+//   42    // Offset to Intra_16x16 luma prediction mode 3
+//	2857699336	// 0xAA550808 - GUID for Intra_8x8 luma prediction mode offsets
+//    0    // Offset to Intra_8x8 luma prediction mode 0
+//    5    // Offset to Intra_8x8 luma prediction mode 1
+//   10    // Offset to Intra_8x8 luma prediction mode 2
+//   26    // Offset to Intra_8x8 luma prediction mode 3
+//   36    // Offset to Intra_8x8 luma prediction mode 4
+//   50    // Offset to Intra_8x8 luma prediction mode 5
+//   68    // Offset to Intra_8x8 luma prediction mode 6
+//   85    // Offset to Intra_8x8 luma prediction mode 7
+//   95    // Offset to Intra_8x8 luma prediction mode 8
+//	2857698308	// 0xAA550404 - GUID for Intra_4x4 luma prediction mode offsets
+//    0    // Offset to Intra_4x4 luma prediction mode 0
+//    2    // Offset to Intra_4x4 luma prediction mode 1
+//    4    // Offset to Intra_4x4 luma prediction mode 2
+//   16    // Offset to Intra_4x4 luma prediction mode 3
+//   23    // Offset to Intra_4x4 luma prediction mode 4
+//   32    // Offset to Intra_4x4 luma prediction mode 5
+//   45    // Offset to Intra_4x4 luma prediction mode 6
+//   59    // Offset to Intra_4x4 luma prediction mode 7
+//   66    // Offset to Intra_4x4 luma prediction mode 8
+//	2857700364	// 0xAA550C0C - GUID for intra chroma prediction mode offsets
+//    0    // Offset to intra chroma prediction mode 0
+//   30    // Offset to intra chroma prediction mode 1
+//   36    // Offset to intra chroma prediction mode 2
+//   41    // Offset to intra chroma prediction mode 3
+
+// Kernel name: AllIntra.asm
+//
+// All HWMC kernels merged into this file
+//
+//  $Revision: 1 $
+//  $Date: 4/13/06 4:35p $
+//
+
+// ----------------------------------------------------
+//  Main: ALLINTRA
+// ----------------------------------------------------
+
+#define	ALLHWMC
+#define	COMBINED_KERNEL
+
+.kernel ALLINTRA
+
+    // All frame destination HWMC kernels
+    //
+    #include "Intra_PCM.asm"
+    #include "Intra_16x16.asm"
+    #include "Intra_8x8.asm"
+    #include "Intra_4x4.asm"
+
+// End of ALLINTRA
+
+.end_kernel
+
--- a/Show More
+++ b/Show More