sdk: Intel vaapi driver.

git-svn-id: svn://kolibrios.org@5361 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Sergey Semyonov (Serge)
2015-01-07 20:24:42 +00:00
parent 75873a0173
commit 9e083f3ae8
827 changed files with 266927 additions and 0 deletions

View File

@@ -0,0 +1,156 @@
# Copyright (c) 2007 Intel Corporation. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sub license, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice (including the
# next paragraph) shall be included in all copies or substantial portions
# of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
# IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
SUBDIRS = shaders
AM_CPPFLAGS = \
-DPTHREADS \
$(DRM_CFLAGS) \
$(LIBVA_DEPS_CFLAGS) \
$(NULL)
driver_cflags = \
-Wall \
-fvisibility=hidden \
$(NULL)
driver_ldflags = \
-module -avoid-version \
-no-undefined \
-Wl,--no-undefined \
$(NULL)
driver_libs = \
-lpthread -lm -ldl \
$(DRM_LIBS) -ldrm_intel \
$(LIBVA_DEPS_LIBS) \
$(NULL)
source_c = \
dso_utils.c \
gen6_mfc.c \
gen6_mfc_common.c \
gen6_mfd.c \
gen6_vme.c \
gen7_vme.c \
gen7_mfc.c \
gen7_mfd.c \
gen75_mfd.c \
gen75_mfc.c \
gen8_mfc.c \
gen8_mfd.c \
gen8_vme.c \
gen75_picture_process.c \
gen75_vme.c \
gen75_vpp_gpe.c \
gen75_vpp_vebox.c \
i965_avc_bsd.c \
i965_avc_hw_scoreboard.c\
i965_avc_ildb.c \
i965_decoder_utils.c \
i965_device_info.c \
i965_drv_video.c \
i965_encoder.c \
i965_encoder_utils.c \
i965_media.c \
i965_media_h264.c \
i965_media_mpeg2.c \
i965_gpe_utils.c \
i965_post_processing.c \
gen8_post_processing.c \
i965_render.c \
gen8_render.c \
intel_batchbuffer.c \
intel_batchbuffer_dump.c\
intel_driver.c \
intel_memman.c \
object_heap.c \
intel_media_common.c \
$(NULL)
source_h = \
dso_utils.h \
gen6_mfc.h \
gen6_mfd.h \
gen6_vme.h \
gen7_mfd.h \
gen75_picture_process.h \
gen75_vpp_gpe.h \
gen75_vpp_vebox.h \
i965_avc_bsd.h \
i965_avc_hw_scoreboard.h\
i965_avc_ildb.h \
i965_decoder.h \
i965_decoder_utils.h \
i965_defines.h \
i965_drv_video.h \
i965_encoder.h \
i965_encoder_utils.h \
i965_media.h \
i965_media_h264.h \
i965_media_mpeg2.h \
i965_mutext.h \
i965_gpe_utils.h \
i965_pciids.h \
i965_post_processing.h \
i965_render.h \
i965_structs.h \
intel_batchbuffer.h \
intel_batchbuffer_dump.h\
intel_compiler.h \
intel_driver.h \
intel_media.h \
intel_memman.h \
object_heap.h \
sysdeps.h \
va_backend_compat.h \
i965_fourcc.h \
$(NULL)
i965_drv_video_la_LTLIBRARIES = i965_drv_video.la
i965_drv_video_ladir = $(LIBVA_DRIVERS_PATH)
i965_drv_video_la_CFLAGS = $(driver_cflags)
i965_drv_video_la_LDFLAGS = $(driver_ldflags)
i965_drv_video_la_LIBADD = $(driver_libs)
i965_drv_video_la_SOURCES = $(source_c)
noinst_HEADERS = $(source_h)
if USE_X11
source_c += i965_output_dri.c
source_h += i965_output_dri.h
endif
if USE_WAYLAND
source_c += i965_output_wayland.c
source_h += i965_output_wayland.h
driver_cflags += $(WAYLAND_CFLAGS)
endif
# Wayland protocol
protocol_source_h = wayland-drm-client-protocol.h
i965_output_wayland.c: $(protocol_source_h)
@wayland_scanner_rules@
DIST_SUBDIRS = $(SUBDIRS) wayland
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in config.h.in

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,119 @@
/* src/config.h. Generated from config.h.in by configure. */
/* src/config.h.in. Generated from configure.ac by autoheader. */
/* Define if building universal (internal helper macro) */
/* #undef AC_APPLE_UNIVERSAL_BUILD */
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Define to 1 if you have the <drm_fourcc.h> header file. */
#define HAVE_DRM_FOURCC_H 1
/* Define to 1 if you have the <EGL/egl.h> header file. */
#define HAVE_EGL_EGL_H 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the `m' library (-lm). */
#define HAVE_LIBM 1
/* Define to 1 if you have the `log2f' function. */
#define HAVE_LOG2F 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Defined to 1 if VA/DRM API is enabled */
#define HAVE_VA_DRM 1
/* Major version of the driver */
#define INTEL_DRIVER_MAJOR_VERSION 1
/* Micro version of the driver */
#define INTEL_DRIVER_MICRO_VERSION 1
/* Minor version of the driver */
#define INTEL_DRIVER_MINOR_VERSION 4
/* Preversion of the driver */
#define INTEL_DRIVER_PRE_VERSION 0
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#define LT_OBJDIR ".libs/"
/* Name of package */
#define PACKAGE "libva-intel-driver"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "haihao.xiang@intel.com"
/* Define to the full name of this package. */
#define PACKAGE_NAME "intel_driver"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "intel_driver 1.4.1"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "libva-intel-driver"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.4.1"
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define driver entry-point */
#define VA_DRIVER_INIT_FUNC __vaDriverInit_0_36
/* Version number of package */
#define VERSION "1.4.1"
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
#if defined AC_APPLE_UNIVERSAL_BUILD
# if defined __BIG_ENDIAN__
# define WORDS_BIGENDIAN 1
# endif
#else
# ifndef WORDS_BIGENDIAN
/* # undef WORDS_BIGENDIAN */
# endif
#endif
/* Enable large inode numbers on Mac OS X 10.5. */
#ifndef _DARWIN_USE_64_BIT_INODE
# define _DARWIN_USE_64_BIT_INODE 1
#endif
/* Number of bits in a file offset, on hosts where this is settable. */
/* #undef _FILE_OFFSET_BITS */
/* Define for large files, on AIX-style hosts. */
/* #undef _LARGE_FILES */

View File

@@ -0,0 +1,124 @@
/* src/config.h.in. Generated from configure.ac by autoheader. */
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the <drm_fourcc.h> header file. */
#undef HAVE_DRM_FOURCC_H
/* Define to 1 if you have the <EGL/egl.h> header file. */
#undef HAVE_EGL_EGL_H
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `m' library (-lm). */
#undef HAVE_LIBM
/* Define to 1 if you have the `log2f' function. */
#undef HAVE_LOG2F
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Defined to 1 if VA/DRM API is enabled */
#undef HAVE_VA_DRM
/* Defined to 1 if VA/Wayland API is enabled */
#undef HAVE_VA_WAYLAND
/* Defined to 1 if VA/X11 API is enabled */
#undef HAVE_VA_X11
/* Major version of the driver */
#undef INTEL_DRIVER_MAJOR_VERSION
/* Micro version of the driver */
#undef INTEL_DRIVER_MICRO_VERSION
/* Minor version of the driver */
#undef INTEL_DRIVER_MINOR_VERSION
/* Preversion of the driver */
#undef INTEL_DRIVER_PRE_VERSION
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define driver entry-point */
#undef VA_DRIVER_INIT_FUNC
/* Version number of package */
#undef VERSION
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
#if defined AC_APPLE_UNIVERSAL_BUILD
# if defined __BIG_ENDIAN__
# define WORDS_BIGENDIAN 1
# endif
#else
# ifndef WORDS_BIGENDIAN
# undef WORDS_BIGENDIAN
# endif
#endif
/* Enable large inode numbers on Mac OS X 10.5. */
#ifndef _DARWIN_USE_64_BIT_INODE
# define _DARWIN_USE_64_BIT_INODE 1
#endif
/* Number of bits in a file offset, on hosts where this is settable. */
#undef _FILE_OFFSET_BITS
/* Define for large files, on AIX-style hosts. */
#undef _LARGE_FILES

View File

@@ -0,0 +1,110 @@
/*
* Copyright (C) 2012 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#define _GNU_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include "dso_utils.h"
struct dso_handle {
void *handle;
};
/* Opens the named shared library */
struct dso_handle *
dso_open(const char *path)
{
struct dso_handle *h;
h = calloc(1, sizeof(*h));
if (!h)
return NULL;
if (path) {
h->handle = dlopen(path, RTLD_LAZY|RTLD_LOCAL);
if (!h->handle)
goto error;
}
else
h->handle = RTLD_DEFAULT;
return h;
error:
dso_close(h);
return NULL;
}
/* Closes and disposed any allocated data */
void
dso_close(struct dso_handle *h)
{
if (!h)
return;
if (h->handle) {
if (h->handle != RTLD_DEFAULT)
dlclose(h->handle);
h->handle = NULL;
}
free(h);
}
/* Load symbol into the supplied location */
static bool
get_symbol(struct dso_handle *h, void *func_vptr, const char *name)
{
dso_generic_func func, * const func_ptr = func_vptr;
const char *error;
dlerror();
func = (dso_generic_func)dlsym(h->handle, name);
error = dlerror();
if (error) {
fprintf(stderr, "error: failed to resolve %s(): %s\n", name, error);
return false;
}
*func_ptr = func;
return true;
}
/* Loads symbols into the supplied vtable */
bool
dso_get_symbols(
struct dso_handle *h,
void *vtable,
unsigned int vtable_length,
const struct dso_symbol *symbols
)
{
const struct dso_symbol *s;
for (s = symbols; s->name != NULL; s++) {
if (s->offset + sizeof(dso_generic_func) > vtable_length)
return false;
if (!get_symbol(h, ((char *)vtable) + s->offset, s->name))
return false;
}
return true;
}

View File

@@ -0,0 +1,74 @@
/*
* Copyright (C) 2012 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef DSO_UTILS_H
#define DSO_UTILS_H
#include <stdbool.h>
/** Generic pointer to function. */
typedef void (*dso_generic_func)(void);
/** Library handle (opaque). */
struct dso_handle;
/** Symbol lookup table. */
struct dso_symbol {
/** Symbol name */
const char *name;
/** Offset into the supplied vtable where symbol is to be loaded. */
unsigned int offset;
};
/**
* Opens the named shared library.
*
* @param[in] path the library name, or NULL to lookup into loaded libraries
* @return the newly allocated library handle
*/
struct dso_handle *
dso_open(const char *path);
/** Closes and disposed any allocated data. */
void
dso_close(struct dso_handle *h);
/**
* Loads symbols into the supplied vtable.
*
* @param[in] handle the DSO handle
* @param[in] vtable the function table to fill in
* @param[in] vtable_length the size (in bytes) of the function table
* @param[in] symbols the NULL terminated array of symbols to lookup
* @return true on success, false otherwise
**/
bool
dso_get_symbols(
struct dso_handle *h,
void *vtable,
unsigned int vtable_length,
const struct dso_symbol *symbols
);
#endif /* DSO_UTILS_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,281 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Zhou Chang <chang.zhou@intel.com>
*
*/
#ifndef _GEN6_MFC_H_
#define _GEN6_MFC_H_
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include "i965_gpe_utils.h"
struct encode_state;
#define MAX_MFC_REFERENCE_SURFACES 16
#define NUM_MFC_DMV_BUFFERS 34
#define INTRA_MB_FLAG_MASK 0x00002000
/* The space required for slice header SLICE_STATE + header.
* Is it enough? */
#define SLICE_HEADER 80
/* the space required for slice tail. */
#define SLICE_TAIL 16
#define __SOFTWARE__ 0
#define MFC_BATCHBUFFER_AVC_INTRA 0
#define MFC_BATCHBUFFER_AVC_INTER 1
#define NUM_MFC_KERNEL 2
#define BIND_IDX_VME_OUTPUT 0
#define BIND_IDX_MFC_SLICE_HEADER 1
#define BIND_IDX_MFC_BATCHBUFFER 2
#define CMD_LEN_IN_OWORD 4
typedef enum _gen6_brc_status
{
BRC_NO_HRD_VIOLATION = 0,
BRC_UNDERFLOW = 1,
BRC_OVERFLOW = 2,
BRC_UNDERFLOW_WITH_MAX_QP = 3,
BRC_OVERFLOW_WITH_MIN_QP = 4,
} gen6_brc_status;
struct gen6_mfc_avc_surface_aux
{
dri_bo *dmv_top;
dri_bo *dmv_bottom;
};
struct gen6_mfc_context
{
struct {
unsigned int width;
unsigned int height;
unsigned int w_pitch;
unsigned int h_pitch;
} surface_state;
//MFX_PIPE_BUF_ADDR_STATE
struct {
dri_bo *bo;
} post_deblocking_output; //OUTPUT: reconstructed picture
struct {
dri_bo *bo;
} pre_deblocking_output; //OUTPUT: reconstructed picture with deblocked
struct {
dri_bo *bo;
} uncompressed_picture_source; //INPUT: original compressed image
struct {
dri_bo *bo;
} intra_row_store_scratch_buffer; //INTERNAL:
struct {
dri_bo *bo;
} macroblock_status_buffer; //INTERNAL:
struct {
dri_bo *bo;
} deblocking_filter_row_store_scratch_buffer; //INTERNAL:
struct {
dri_bo *bo;
} reference_surfaces[MAX_MFC_REFERENCE_SURFACES]; //INTERNAL: refrence surfaces
//MFX_IND_OBJ_BASE_ADDR_STATE
struct{
dri_bo *bo;
} mfc_indirect_mv_object; //INPUT: the blocks' mv info
struct {
dri_bo *bo;
int offset;
int end_offset;
} mfc_indirect_pak_bse_object; //OUTPUT: the compressed bitstream
//MFX_BSP_BUF_BASE_ADDR_STATE
struct {
dri_bo *bo;
} bsd_mpc_row_store_scratch_buffer; //INTERNAL:
//MFX_AVC_DIRECTMODE_STATE
struct {
dri_bo *bo;
} direct_mv_buffers[NUM_MFC_DMV_BUFFERS]; //INTERNAL: 0-31 as input,32 and 33 as output
//Bit rate tracking context
struct {
unsigned int QpPrimeY;
unsigned int MaxQpNegModifier;
unsigned int MaxQpPosModifier;
unsigned char MaxSizeInWord;
unsigned char TargetSizeInWord;
unsigned char Correct[6];
unsigned char GrowInit;
unsigned char GrowResistance;
unsigned char ShrinkInit;
unsigned char ShrinkResistance;
unsigned int target_mb_size;
unsigned int target_frame_size;
} bit_rate_control_context[3]; //INTERNAL: for I, P, B frames
struct {
int mode;
int gop_nums[3];
int target_frame_size[3]; // I,P,B
double bits_per_frame;
double qpf_rounding_accumulator;
double saved_bps;
double saved_fps;
int saved_intra_period;
int saved_ip_period;
int saved_idr_period;
} brc;
struct {
double current_buffer_fullness;
double target_buffer_fullness;
double buffer_capacity;
unsigned int buffer_size;
unsigned int violation_noted;
} hrd;
//HRD control context
struct {
int i_bit_rate_value;
int i_cpb_size_value;
int i_initial_cpb_removal_delay;
int i_cpb_removal_delay;
int i_frame_number;
int i_initial_cpb_removal_delay_length;
int i_cpb_removal_delay_length;
int i_dpb_output_delay_length;
}vui_hrd;
struct i965_gpe_context gpe_context;
struct i965_buffer_surface mfc_batchbuffer_surface;
struct intel_batchbuffer *aux_batchbuffer;
struct i965_buffer_surface aux_batchbuffer_surface;
void (*pipe_mode_select)(VADriverContextP ctx,
int standard_select,
struct intel_encoder_context *encoder_context);
void (*set_surface_state)(VADriverContextP ctx,
struct intel_encoder_context *encoder_context);
void (*ind_obj_base_addr_state)(VADriverContextP ctx,
struct intel_encoder_context *encoder_context);
void (*avc_img_state)(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
void (*avc_qm_state)(VADriverContextP ctx,
struct intel_encoder_context *encoder_context);
void (*avc_fqm_state)(VADriverContextP ctx,
struct intel_encoder_context *encoder_context);
void (*insert_object)(VADriverContextP ctx,
struct intel_encoder_context *encoder_context,
unsigned int *insert_data,
int lenght_in_dws, int data_bits_in_last_dw,
int skip_emul_byte_count,
int is_last_header, int is_end_of_slice,
int emulation_flag,
struct intel_batchbuffer *batch);
void (*buffer_suface_setup)(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct i965_buffer_surface *buffer_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
};
VAStatus gen6_mfc_pipeline(VADriverContextP ctx,
VAProfile profile,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
void gen6_mfc_context_destroy(void *context);
extern
Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
extern int intel_mfc_update_hrd(struct encode_state *encode_state,
struct gen6_mfc_context *mfc_context,
int frame_bits);
extern int intel_mfc_brc_postpack(struct encode_state *encode_state,
struct gen6_mfc_context *mfc_context,
int frame_bits);
extern void intel_mfc_hrd_context_update(struct encode_state *encode_state,
struct gen6_mfc_context *mfc_context);
extern int intel_mfc_interlace_check(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
extern void intel_mfc_brc_prepare(struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
extern void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context,
struct intel_batchbuffer *slice_batch);
extern VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
extern int intel_avc_enc_slice_type_fixup(int type);
extern void
intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
extern
Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
extern void
intel_avc_slice_insert_packed_data(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context,
int slice_index,
struct intel_batchbuffer *slice_batch);
#endif /* _GEN6_MFC_BCS_H_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,78 @@
/*
* Copyright <20> 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef _GEN6_MFD_H_
#define _GEN6_MFD_H_
#include <xf86drm.h>
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include "i965_decoder.h"
#define GEN6_VC1_I_PICTURE 0
#define GEN6_VC1_P_PICTURE 1
#define GEN6_VC1_B_PICTURE 2
#define GEN6_VC1_BI_PICTURE 3
#define GEN6_VC1_SKIPPED_PICTURE 4
#define GEN6_VC1_SIMPLE_PROFILE 0
#define GEN6_VC1_MAIN_PROFILE 1
#define GEN6_VC1_ADVANCED_PROFILE 2
#define GEN6_VC1_RESERVED_PROFILE 3
struct gen6_vc1_surface
{
dri_bo *dmv;
int picture_type;
};
struct hw_context;
struct gen6_mfd_context
{
struct hw_context base;
union {
VAIQMatrixBufferMPEG2 mpeg2;
} iq_matrix;
GenFrameStoreContext fs_ctx;
GenFrameStore reference_surface[MAX_GEN_REFERENCE_FRAMES];
GenBuffer post_deblocking_output;
GenBuffer pre_deblocking_output;
GenBuffer intra_row_store_scratch_buffer;
GenBuffer deblocking_filter_row_store_scratch_buffer;
GenBuffer bsd_mpc_row_store_scratch_buffer;
GenBuffer mpr_row_store_scratch_buffer;
GenBuffer bitplane_read_buffer;
int wa_mpeg2_slice_vertical_position;
};
#endif /* _GEN6_MFD_H_ */

View File

@@ -0,0 +1,668 @@
/*
* Copyright © 2010-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Zhou Chang <chang.zhou@intel.com>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_encoder.h"
#include "gen6_vme.h"
#include "gen6_mfc.h"
#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
#define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */
#define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
#define CURBE_URB_ENTRY_LENGTH 4 /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
enum VIDEO_CODING_TYPE{
VIDEO_CODING_AVC = 0,
VIDEO_CODING_SUM
};
enum AVC_VME_KERNEL_TYPE{
AVC_VME_INTRA_SHADER = 0,
AVC_VME_INTER_SHADER,
AVC_VME_BATCHBUFFER,
AVC_VME_KERNEL_SUM
};
static const uint32_t gen6_vme_intra_frame[][4] = {
#include "shaders/vme/intra_frame.g6b"
};
static const uint32_t gen6_vme_inter_frame[][4] = {
#include "shaders/vme/inter_frame.g6b"
};
static const uint32_t gen6_vme_batchbuffer[][4] = {
#include "shaders/vme/batchbuffer.g6b"
};
static struct i965_kernel gen6_vme_kernels[] = {
{
"AVC VME Intra Frame",
AVC_VME_INTRA_SHADER, /*index*/
gen6_vme_intra_frame,
sizeof(gen6_vme_intra_frame),
NULL
},
{
"AVC VME inter Frame",
AVC_VME_INTER_SHADER,
gen6_vme_inter_frame,
sizeof(gen6_vme_inter_frame),
NULL
},
{
"AVC VME BATCHBUFFER",
AVC_VME_BATCHBUFFER,
gen6_vme_batchbuffer,
sizeof(gen6_vme_batchbuffer),
NULL
},
};
/* only used for VME source surface state */
static void
gen6_vme_source_surface_state(VADriverContextP ctx,
int index,
struct object_surface *obj_surface,
struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
vme_context->vme_surface2_setup(ctx,
&vme_context->gpe_context,
obj_surface,
BINDING_TABLE_OFFSET(index),
SURFACE_STATE_OFFSET(index));
}
static void
gen6_vme_media_source_surface_state(VADriverContextP ctx,
int index,
struct object_surface *obj_surface,
struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
vme_context->vme_media_rw_surface_setup(ctx,
&vme_context->gpe_context,
obj_surface,
BINDING_TABLE_OFFSET(index),
SURFACE_STATE_OFFSET(index));
}
static void
gen6_vme_output_buffer_setup(VADriverContextP ctx,
struct encode_state *encode_state,
int index,
struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_vme_context *vme_context = encoder_context->vme_context;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
if (is_intra)
vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
else
vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
"VME output buffer",
vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
0x1000);
assert(vme_context->vme_output.bo);
vme_context->vme_buffer_suface_setup(ctx,
&vme_context->gpe_context,
&vme_context->vme_output,
BINDING_TABLE_OFFSET(index),
SURFACE_STATE_OFFSET(index));
}
static void
gen6_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
struct encode_state *encode_state,
int index,
struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_vme_context *vme_context = encoder_context->vme_context;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */
vme_context->vme_batchbuffer.pitch = 16;
vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
"VME batchbuffer",
vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
0x1000);
vme_context->vme_buffer_suface_setup(ctx,
&vme_context->gpe_context,
&vme_context->vme_batchbuffer,
BINDING_TABLE_OFFSET(index),
SURFACE_STATE_OFFSET(index));
}
static VAStatus
gen6_vme_surface_setup(VADriverContextP ctx,
struct encode_state *encode_state,
int is_intra,
struct intel_encoder_context *encoder_context)
{
struct object_surface *obj_surface;
/*Setup surfaces state*/
/* current picture for encoding */
obj_surface = encode_state->input_yuv_object;
gen6_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
gen6_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
if (!is_intra) {
VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
int slice_type;
slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen6_vme_source_surface_state);
if (slice_type == SLICE_TYPE_B)
intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen6_vme_source_surface_state);
}
/* VME output */
gen6_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
gen6_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
return VA_STATUS_SUCCESS;
}
static VAStatus gen6_vme_interface_setup(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
struct gen6_interface_descriptor_data *desc;
int i;
dri_bo *bo;
bo = vme_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
desc = bo->virtual;
for (i = 0; i < vme_context->vme_kernel_sum; i++) {
struct i965_kernel *kernel;
kernel = &vme_context->gpe_context.kernels[i];
assert(sizeof(*desc) == 32);
/*Setup the descritor table*/
memset(desc, 0, sizeof(*desc));
desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
desc->desc2.sampler_count = 1; /* FIXME: */
desc->desc2.sampler_state_pointer = (vme_context->vme_state.bo->offset >> 5);
desc->desc3.binding_table_entry_count = 1; /* FIXME: */
desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
desc->desc4.constant_urb_entry_read_offset = 0;
desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
/*kernel start*/
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
kernel->bo);
/*Sampler State(VME state pointer)*/
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
(1 << 2), //
i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc2),
vme_context->vme_state.bo);
desc++;
}
dri_bo_unmap(bo);
return VA_STATUS_SUCCESS;
}
static VAStatus gen6_vme_constant_setup(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
// unsigned char *constant_buffer;
unsigned int *vme_state_message;
int mv_num = 32;
if (vme_context->h264_level >= 30) {
mv_num = 16;
if (vme_context->h264_level >= 31)
mv_num = 8;
}
dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
assert(vme_context->gpe_context.curbe.bo->virtual);
// constant_buffer = vme_context->curbe.bo->virtual;
vme_state_message = (unsigned int *)vme_context->gpe_context.curbe.bo->virtual;
vme_state_message[31] = mv_num;
/*TODO copy buffer into CURB*/
dri_bo_unmap( vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
static const unsigned int intra_mb_mode_cost_table[] = {
0x31110001, // for qp0
0x09110001, // for qp1
0x15030001, // for qp2
0x0b030001, // for qp3
0x0d030011, // for qp4
0x17210011, // for qp5
0x41210011, // for qp6
0x19210011, // for qp7
0x25050003, // for qp8
0x1b130003, // for qp9
0x1d130003, // for qp10
0x27070021, // for qp11
0x51310021, // for qp12
0x29090021, // for qp13
0x35150005, // for qp14
0x2b0b0013, // for qp15
0x2d0d0013, // for qp16
0x37170007, // for qp17
0x61410031, // for qp18
0x39190009, // for qp19
0x45250015, // for qp20
0x3b1b000b, // for qp21
0x3d1d000d, // for qp22
0x47270017, // for qp23
0x71510041, // for qp24 ! center for qp=0..30
0x49290019, // for qp25
0x55350025, // for qp26
0x4b2b001b, // for qp27
0x4d2d001d, // for qp28
0x57370027, // for qp29
0x81610051, // for qp30
0x57270017, // for qp31
0x81510041, // for qp32 ! center for qp=31..51
0x59290019, // for qp33
0x65350025, // for qp34
0x5b2b001b, // for qp35
0x5d2d001d, // for qp36
0x67370027, // for qp37
0x91610051, // for qp38
0x69390029, // for qp39
0x75450035, // for qp40
0x6b3b002b, // for qp41
0x6d3d002d, // for qp42
0x77470037, // for qp43
0xa1710061, // for qp44
0x79490039, // for qp45
0x85550045, // for qp46
0x7b4b003b, // for qp47
0x7d4d003d, // for qp48
0x87570047, // for qp49
0xb1810071, // for qp50
0x89590049 // for qp51
};
static void gen6_vme_state_setup_fixup(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context,
unsigned int *vme_state_message)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
if (slice_param->slice_type != SLICE_TYPE_I &&
slice_param->slice_type != SLICE_TYPE_SI)
return;
if (encoder_context->rate_control_mode == VA_RC_CQP)
vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
else
vme_state_message[16] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY];
}
static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx,
struct encode_state *encode_state,
int is_intra,
struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
unsigned int *vme_state_message;
int i;
//building VME state message
dri_bo_map(vme_context->vme_state.bo, 1);
assert(vme_context->vme_state.bo->virtual);
vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
if (encoder_context->quality_level != ENCODER_LOW_QUALITY) {
vme_state_message[0] = 0x01010101;
vme_state_message[1] = 0x10010101;
vme_state_message[2] = 0x0F0F0F0F;
vme_state_message[3] = 0x100F0F0F;
vme_state_message[4] = 0x01010101;
vme_state_message[5] = 0x10010101;
vme_state_message[6] = 0x0F0F0F0F;
vme_state_message[7] = 0x100F0F0F;
vme_state_message[8] = 0x01010101;
vme_state_message[9] = 0x10010101;
vme_state_message[10] = 0x0F0F0F0F;
vme_state_message[11] = 0x000F0F0F;
vme_state_message[12] = 0x00;
vme_state_message[13] = 0x00;
} else {
vme_state_message[0] = 0x10010101;
vme_state_message[1] = 0x100F0F0F;
vme_state_message[2] = 0x10010101;
vme_state_message[3] = 0x000F0F0F;
vme_state_message[4] = 0;
vme_state_message[5] = 0;
vme_state_message[6] = 0;
vme_state_message[7] = 0;
vme_state_message[8] = 0;
vme_state_message[9] = 0;
vme_state_message[10] = 0;
vme_state_message[11] = 0;
vme_state_message[12] = 0;
vme_state_message[13] = 0;
}
vme_state_message[14] = 0x4a4a;
vme_state_message[15] = 0x0;
vme_state_message[16] = 0x4a4a4a4a;
vme_state_message[17] = 0x4a4a4a4a;
vme_state_message[18] = 0x21110100;
vme_state_message[19] = 0x61514131;
for(i = 20; i < 32; i++) {
vme_state_message[i] = 0;
}
//vme_state_message[16] = 0x42424242; //cost function LUT set 0 for Intra
gen6_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message);
dri_bo_unmap( vme_context->vme_state.bo);
return VA_STATUS_SUCCESS;
}
static void
gen6_vme_fill_vme_batchbuffer(VADriverContextP ctx,
struct encode_state *encode_state,
int mb_width, int mb_height,
int kernel,
int transform_8x8_mode_flag,
struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
int number_mb_cmds;
int mb_x = 0, mb_y = 0;
int i, s;
unsigned int *command_ptr;
dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
command_ptr = vme_context->vme_batchbuffer.bo->virtual;
for (s = 0; s < encode_state->num_slice_params_ext; s++) {
VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
int slice_mb_begin = pSliceParameter->macroblock_address;
int slice_mb_number = pSliceParameter->num_macroblocks;
for (i = 0; i < slice_mb_number; ) {
int mb_count = i + slice_mb_begin;
mb_x = mb_count % mb_width;
mb_y = mb_count / mb_width;
if( i == 0 ) {
number_mb_cmds = mb_width; // we must mark the slice edge.
} else if ( (i + 128 ) <= slice_mb_number) {
number_mb_cmds = 128;
} else {
number_mb_cmds = slice_mb_number - i;
}
*command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
*command_ptr++ = kernel;
*command_ptr++ = 0;
*command_ptr++ = 0;
*command_ptr++ = 0;
*command_ptr++ = 0;
/*inline data */
*command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
*command_ptr++ = (number_mb_cmds << 16 | transform_8x8_mode_flag | ((i==0) << 1));
*command_ptr++ = encoder_context->quality_level;
i += number_mb_cmds;
}
}
*command_ptr++ = 0;
*command_ptr++ = MI_BATCH_BUFFER_END;
dri_bo_unmap(vme_context->vme_batchbuffer.bo);
}
static void gen6_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_vme_context *vme_context = encoder_context->vme_context;
dri_bo *bo;
i965_gpe_context_init(ctx, &vme_context->gpe_context);
/* VME output buffer */
dri_bo_unreference(vme_context->vme_output.bo);
vme_context->vme_output.bo = NULL;
dri_bo_unreference(vme_context->vme_batchbuffer.bo);
vme_context->vme_batchbuffer.bo = NULL;
/* VME state */
dri_bo_unreference(vme_context->vme_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"Buffer",
1024*16, 64);
assert(bo);
vme_context->vme_state.bo = bo;
}
static void gen6_vme_pipeline_programing(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
struct intel_batchbuffer *batch = encoder_context->base.batch;
VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
gen6_vme_fill_vme_batchbuffer(ctx,
encode_state,
width_in_mbs, height_in_mbs,
is_intra ? AVC_VME_INTRA_SHADER : AVC_VME_INTER_SHADER,
pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
encoder_context);
intel_batchbuffer_start_atomic(batch, 0x1000);
gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
OUT_RELOC(batch,
vme_context->vme_batchbuffer.bo,
I915_GEM_DOMAIN_COMMAND, 0,
0);
ADVANCE_BATCH(batch);
intel_batchbuffer_end_atomic(batch);
}
static VAStatus gen6_vme_prepare(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
VAStatus vaStatus = VA_STATUS_SUCCESS;
VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
struct gen6_vme_context *vme_context = encoder_context->vme_context;
if (!vme_context->h264_level ||
(vme_context->h264_level != pSequenceParameter->level_idc)) {
vme_context->h264_level = pSequenceParameter->level_idc;
}
/*Setup all the memory object*/
gen6_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
gen6_vme_interface_setup(ctx, encode_state, encoder_context);
gen6_vme_constant_setup(ctx, encode_state, encoder_context);
gen6_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
/*Programing media pipeline*/
gen6_vme_pipeline_programing(ctx, encode_state, encoder_context);
return vaStatus;
}
static VAStatus gen6_vme_run(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
intel_batchbuffer_flush(batch);
return VA_STATUS_SUCCESS;
}
static VAStatus gen6_vme_stop(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
return VA_STATUS_SUCCESS;
}
static VAStatus
gen6_vme_pipeline(VADriverContextP ctx,
VAProfile profile,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
gen6_vme_media_init(ctx, encoder_context);
gen6_vme_prepare(ctx, encode_state, encoder_context);
gen6_vme_run(ctx, encode_state, encoder_context);
gen6_vme_stop(ctx, encode_state, encoder_context);
return VA_STATUS_SUCCESS;
}
static void
gen6_vme_context_destroy(void *context)
{
struct gen6_vme_context *vme_context = context;
i965_gpe_context_destroy(&vme_context->gpe_context);
dri_bo_unreference(vme_context->vme_output.bo);
vme_context->vme_output.bo = NULL;
dri_bo_unreference(vme_context->vme_state.bo);
vme_context->vme_state.bo = NULL;
dri_bo_unreference(vme_context->vme_batchbuffer.bo);
vme_context->vme_batchbuffer.bo = NULL;
free(vme_context);
}
Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
struct gen6_vme_context *vme_context = NULL;
if (encoder_context->codec != CODEC_H264) {
/* Never get here */
assert(0);
return False;
}
vme_context = calloc(1, sizeof(struct gen6_vme_context));
vme_context->gpe_context.surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
vme_context->gpe_context.vfe_state.num_urb_entries = 16;
vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
vme_context->video_coding_type = VIDEO_CODING_AVC;
vme_context->vme_kernel_sum = AVC_VME_KERNEL_SUM;
i965_gpe_load_kernels(ctx,
&vme_context->gpe_context,
gen6_vme_kernels,
vme_context->vme_kernel_sum);
encoder_context->vme_pipeline = gen6_vme_pipeline;
vme_context->vme_surface2_setup = i965_gpe_surface2_setup;
vme_context->vme_media_rw_surface_setup = i965_gpe_media_rw_surface_setup;
vme_context->vme_buffer_suface_setup = i965_gpe_buffer_suface_setup;
encoder_context->vme_context = vme_context;
encoder_context->vme_context_destroy = gen6_vme_context_destroy;
return True;
}

View File

@@ -0,0 +1,178 @@
/*
* Copyright <20> 2009 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWAR
*
* Authors:
* Zhou Chang <chang.zhou@intel.com>
*
*/
#ifndef _GEN6_VME_H_
#define _GEN6_VME_H_
#include <xf86drm.h>
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include "i965_gpe_utils.h"
#define INTRA_VME_OUTPUT_IN_BYTES 16 /* in bytes */
#define INTRA_VME_OUTPUT_IN_DWS (INTRA_VME_OUTPUT_IN_BYTES / 4)
#define INTER_VME_OUTPUT_IN_BYTES 160 /* the first 128 bytes for MVs and the last 32 bytes for other info */
#define INTER_VME_OUTPUT_IN_DWS (INTER_VME_OUTPUT_IN_BYTES / 4)
#define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS
#define MAX_MEDIA_SURFACES_GEN6 34
#define GEN6_VME_KERNEL_NUMBER 3
struct encode_state;
struct intel_encoder_context;
struct gen6_vme_context
{
struct i965_gpe_context gpe_context;
struct {
dri_bo *bo;
} vme_state;
struct i965_buffer_surface vme_output;
struct i965_buffer_surface vme_batchbuffer;
void (*vme_surface2_setup)(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void (*vme_media_rw_surface_setup)(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void (*vme_buffer_suface_setup)(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct i965_buffer_surface *buffer_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void (*vme_media_chroma_surface_setup)(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void *vme_state_message;
unsigned int h264_level;
unsigned int video_coding_type;
unsigned int vme_kernel_sum;
unsigned int mpeg2_level;
struct object_surface *used_reference_objects[2];
void *used_references[2];
unsigned int ref_index_in_mb[2];
};
#define MPEG2_PIC_WIDTH_HEIGHT 30
#define MPEG2_MV_RANGE 29
#define MPEG2_LEVEL_MASK 0x0f
#define MPEG2_LEVEL_LOW 0x0a
#define MPEG2_LEVEL_MAIN 0x08
#define MPEG2_LEVEL_HIGH 0x04
Bool gen75_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
extern void intel_vme_update_mbmv_cost(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
#define MODE_INTRA_NONPRED 0
#define MODE_INTRA_16X16 1
#define MODE_INTRA_8X8 2
#define MODE_INTRA_4X4 3
#define MODE_INTER_16X8 4
#define MODE_INTER_8X16 4
#define MODE_INTER_8X8 5
#define MODE_INTER_8X4 6
#define MODE_INTER_4X8 6
#define MODE_INTER_4X4 7
#define MODE_INTER_16X16 8
#define MODE_INTER_BWD 9
#define MODE_REFID_COST 10
#define MODE_CHROMA_INTRA 11
#define MODE_INTER_MV0 12
#define MODE_INTER_MV1 13
#define MODE_INTER_MV2 14
#define MODE_INTER_MV3 15
#define MODE_INTER_MV4 16
#define MODE_INTER_MV5 17
#define MODE_INTER_MV6 18
#define MODE_INTER_MV7 19
#define INTRA_PRED_AVAIL_FLAG_AE 0x60
#define INTRA_PRED_AVAIL_FLAG_B 0x10
#define INTRA_PRED_AVAIL_FLAG_C 0x8
#define INTRA_PRED_AVAIL_FLAG_D 0x4
#define INTRA_PRED_AVAIL_FLAG_BCD_MASK 0x1C
extern void
gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
struct encode_state *encode_state,
int mb_width, int mb_height,
int kernel,
int transform_8x8_mode_flag,
struct intel_encoder_context *encoder_context);
extern void
gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context);
extern void
intel_vme_mpeg2_state_setup(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
extern void
gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
struct encode_state *encode_state,
int mb_width, int mb_height,
int kernel,
struct intel_encoder_context *encoder_context);
void
intel_avc_vme_reference_state(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context,
int list_index,
int surface_index,
void (* vme_source_surface_state)(
VADriverContextP ctx,
int index,
struct object_surface *obj_surface,
struct intel_encoder_context *encoder_context));
extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
#endif /* _GEN6_VME_H_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,278 @@
/*
* Copyright <20> 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Li Xiaowei <xiaowei.a.li@intel.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
#include "i965_structs.h"
#include "i965_drv_video.h"
#include "i965_post_processing.h"
#include "gen75_picture_process.h"
extern struct hw_context *
i965_proc_context_init(VADriverContextP ctx,
struct object_config *obj_config);
static VAStatus
gen75_vpp_fmt_cvt(VADriverContextP ctx,
VAProfile profile,
union codec_state *codec_state,
struct hw_context *hw_context)
{
VAStatus va_status = VA_STATUS_SUCCESS;
struct intel_video_process_context *proc_ctx =
(struct intel_video_process_context *)hw_context;
/* implicity surface format coversion and scaling */
if(proc_ctx->vpp_fmt_cvt_ctx == NULL){
proc_ctx->vpp_fmt_cvt_ctx = i965_proc_context_init(ctx, NULL);
}
va_status = i965_proc_picture(ctx, profile, codec_state,
proc_ctx->vpp_fmt_cvt_ctx);
return va_status;
}
static VAStatus
gen75_vpp_vebox(VADriverContextP ctx,
struct intel_video_process_context* proc_ctx)
{
VAStatus va_status = VA_STATUS_SUCCESS;
VAProcPipelineParameterBuffer* pipeline_param = proc_ctx->pipeline_param;
struct i965_driver_data *i965 = i965_driver_data(ctx);
/* vpp features based on VEBox fixed function */
if(proc_ctx->vpp_vebox_ctx == NULL) {
proc_ctx->vpp_vebox_ctx = gen75_vebox_context_init(ctx);
}
proc_ctx->vpp_vebox_ctx->pipeline_param = pipeline_param;
proc_ctx->vpp_vebox_ctx->surface_input_object = proc_ctx->surface_pipeline_input_object;
proc_ctx->vpp_vebox_ctx->surface_output_object = proc_ctx->surface_render_output_object;
if (IS_HASWELL(i965->intel.device_info))
va_status = gen75_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx);
else if (IS_GEN8(i965->intel.device_info))
va_status = gen8_vebox_process_picture(ctx, proc_ctx->vpp_vebox_ctx);
return va_status;
}
static VAStatus
gen75_vpp_gpe(VADriverContextP ctx,
struct intel_video_process_context* proc_ctx)
{
VAStatus va_status = VA_STATUS_SUCCESS;
if(proc_ctx->vpp_gpe_ctx == NULL){
proc_ctx->vpp_gpe_ctx = vpp_gpe_context_init(ctx);
}
proc_ctx->vpp_gpe_ctx->pipeline_param = proc_ctx->pipeline_param;
proc_ctx->vpp_gpe_ctx->surface_pipeline_input_object = proc_ctx->surface_pipeline_input_object;
proc_ctx->vpp_gpe_ctx->surface_output_object = proc_ctx->surface_render_output_object;
va_status = vpp_gpe_process_picture(ctx, proc_ctx->vpp_gpe_ctx);
return va_status;
}
VAStatus
gen75_proc_picture(VADriverContextP ctx,
VAProfile profile,
union codec_state *codec_state,
struct hw_context *hw_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct proc_state* proc_st = &(codec_state->proc);
struct intel_video_process_context *proc_ctx =
(struct intel_video_process_context *)hw_context;
VAProcPipelineParameterBuffer *pipeline_param =
(VAProcPipelineParameterBuffer *)proc_st->pipeline_param->buffer;
struct object_surface *obj_dst_surf = NULL;
struct object_surface *obj_src_surf = NULL;
VAStatus status;
proc_ctx->pipeline_param = pipeline_param;
if (proc_st->current_render_target == VA_INVALID_SURFACE ||
pipeline_param->surface == VA_INVALID_SURFACE) {
status = VA_STATUS_ERROR_INVALID_SURFACE;
goto error;
}
obj_dst_surf = SURFACE(proc_st->current_render_target);
if (!obj_dst_surf) {
status = VA_STATUS_ERROR_INVALID_SURFACE;
goto error;
}
obj_src_surf = SURFACE(proc_ctx->pipeline_param->surface);
if (!obj_src_surf) {
status = VA_STATUS_ERROR_INVALID_SURFACE;
goto error;
}
if (!obj_src_surf->bo) {
status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
goto error;
}
if (pipeline_param->num_filters && !pipeline_param->filters) {
status = VA_STATUS_ERROR_INVALID_PARAMETER;
goto error;
}
if (!obj_dst_surf->bo) {
unsigned int is_tiled = 0;
unsigned int fourcc = VA_FOURCC_NV12;
int sampling = SUBSAMPLE_YUV420;
i965_check_alloc_surface_bo(ctx, obj_dst_surf, is_tiled, fourcc, sampling);
}
proc_ctx->surface_render_output_object = obj_dst_surf;
proc_ctx->surface_pipeline_input_object = obj_src_surf;
assert(pipeline_param->num_filters <= 4);
VABufferID *filter_id = (VABufferID*) pipeline_param->filters;
if(pipeline_param->num_filters == 0 || pipeline_param->filters == NULL ){
/* implicity surface format coversion and scaling */
gen75_vpp_fmt_cvt(ctx, profile, codec_state, hw_context);
}else if(pipeline_param->num_filters == 1) {
struct object_buffer * obj_buf = BUFFER((*filter_id) + 0);
assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
if (!obj_buf ||
!obj_buf->buffer_store ||
!obj_buf->buffer_store->buffer) {
status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
goto error;
}
VAProcFilterParameterBuffer* filter =
(VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
if (filter->type == VAProcFilterNoiseReduction ||
filter->type == VAProcFilterDeinterlacing ||
filter->type == VAProcFilterSkinToneEnhancement ||
filter->type == VAProcFilterColorBalance){
gen75_vpp_vebox(ctx, proc_ctx);
}else if(filter->type == VAProcFilterSharpening){
if (obj_src_surf->fourcc != VA_FOURCC_NV12 ||
obj_dst_surf->fourcc != VA_FOURCC_NV12) {
status = VA_STATUS_ERROR_UNIMPLEMENTED;
goto error;
}
gen75_vpp_gpe(ctx, proc_ctx);
}
}else if (pipeline_param->num_filters >= 2) {
unsigned int i = 0;
for (i = 0; i < pipeline_param->num_filters; i++){
struct object_buffer * obj_buf = BUFFER(pipeline_param->filters[i]);
if (!obj_buf ||
!obj_buf->buffer_store ||
!obj_buf->buffer_store->buffer) {
status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
goto error;
}
VAProcFilterParameterBuffer* filter =
(VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
if (filter->type != VAProcFilterNoiseReduction &&
filter->type != VAProcFilterDeinterlacing &&
filter->type != VAProcFilterSkinToneEnhancement &&
filter->type != VAProcFilterColorBalance) {
printf("Do not support multiply filters outside vebox pipeline \n");
assert(0);
}
}
gen75_vpp_vebox(ctx, proc_ctx);
}
return VA_STATUS_SUCCESS;
error:
return status;
}
static void
gen75_proc_context_destroy(void *hw_context)
{
struct intel_video_process_context *proc_ctx =
(struct intel_video_process_context *)hw_context;
VADriverContextP ctx = (VADriverContextP)(proc_ctx->driver_context);
if(proc_ctx->vpp_fmt_cvt_ctx){
proc_ctx->vpp_fmt_cvt_ctx->destroy(proc_ctx->vpp_fmt_cvt_ctx);
proc_ctx->vpp_fmt_cvt_ctx = NULL;
}
if(proc_ctx->vpp_vebox_ctx){
gen75_vebox_context_destroy(ctx,proc_ctx->vpp_vebox_ctx);
proc_ctx->vpp_vebox_ctx = NULL;
}
if(proc_ctx->vpp_gpe_ctx){
vpp_gpe_context_destroy(ctx,proc_ctx->vpp_gpe_ctx);
proc_ctx->vpp_gpe_ctx = NULL;
}
free(proc_ctx);
}
struct hw_context *
gen75_proc_context_init(VADriverContextP ctx,
struct object_config *obj_config)
{
struct intel_video_process_context *proc_context
= calloc(1, sizeof(struct intel_video_process_context));
proc_context->base.destroy = gen75_proc_context_destroy;
proc_context->base.run = gen75_proc_picture;
proc_context->vpp_vebox_ctx = NULL;
proc_context->vpp_gpe_ctx = NULL;
proc_context->vpp_fmt_cvt_ctx = NULL;
proc_context->driver_context = ctx;
return (struct hw_context *)proc_context;
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright <20> 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Li Xiaowei <xiaowei.a.li@intel.com>
*
*/
#ifndef _GEN75_PICTURE_PROCESS_H
#define _GEN75_PICTURE_PROCESS_H
#include <va/va_vpp.h>
#include "i965_drv_video.h"
#include "gen75_vpp_vebox.h"
#include "gen75_vpp_gpe.h"
struct intel_video_process_context
{
struct hw_context base;
void* driver_context;
struct intel_vebox_context *vpp_vebox_ctx;
struct hw_context *vpp_fmt_cvt_ctx;
struct vpp_gpe_context *vpp_gpe_ctx;
VAProcPipelineParameterBuffer* pipeline_param;
struct object_surface *surface_render_output_object;
struct object_surface *surface_pipeline_input_object;
};
struct hw_context *
gen75_proc_context_init(VADriverContextP ctx, struct object_config *obj_config);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,916 @@
/*
* Copyright <20> 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Li Xiaowei <xiaowei.a.li@intel.com>
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_structs.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "gen75_vpp_gpe.h"
#define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS
#define MAX_MEDIA_SURFACES_GEN6 34
#define SURFACE_STATE_OFFSET_GEN7(index) (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
#define BINDING_TABLE_OFFSET_GEN7(index) (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
#define SURFACE_STATE_OFFSET_GEN8(index) (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
#define BINDING_TABLE_OFFSET_GEN8(index) (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
#define CURBE_ALLOCATION_SIZE 37
#define CURBE_TOTAL_DATA_LENGTH (4 * 32)
#define CURBE_URB_ENTRY_LENGTH 4
/* Shaders information for sharpening */
static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
#include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
};
static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
#include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
};
static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
#include "shaders/post_processing/gen75/sharpening_unmask.g75b"
};
static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
{
"vpp: sharpening(horizontal blur)",
VPP_GPE_SHARPENING,
gen75_gpe_sharpening_h_blur,
sizeof(gen75_gpe_sharpening_h_blur),
NULL
},
{
"vpp: sharpening(vertical blur)",
VPP_GPE_SHARPENING,
gen75_gpe_sharpening_v_blur,
sizeof(gen75_gpe_sharpening_v_blur),
NULL
},
{
"vpp: sharpening(unmask)",
VPP_GPE_SHARPENING,
gen75_gpe_sharpening_unmask,
sizeof(gen75_gpe_sharpening_unmask),
NULL
},
};
/* sharpening kernels for Broadwell */
static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
#include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
};
static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
#include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
};
static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
#include "shaders/post_processing/gen8/sharpening_unmask.g8b"
};
static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
{
"vpp: sharpening(horizontal blur)",
VPP_GPE_SHARPENING,
gen8_gpe_sharpening_h_blur,
sizeof(gen8_gpe_sharpening_h_blur),
NULL
},
{
"vpp: sharpening(vertical blur)",
VPP_GPE_SHARPENING,
gen8_gpe_sharpening_v_blur,
sizeof(gen8_gpe_sharpening_v_blur),
NULL
},
{
"vpp: sharpening(unmask)",
VPP_GPE_SHARPENING,
gen8_gpe_sharpening_unmask,
sizeof(gen8_gpe_sharpening_unmask),
NULL
},
};
static VAStatus
gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct object_surface *obj_surface;
unsigned int i = 0;
unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
vpp_gpe_ctx->backward_surf_sum) * 2;
/* Binding input NV12 surfaces (Luma + Chroma)*/
for( i = 0; i < input_surface_sum; i += 2){
obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
assert(obj_surface);
gen7_gpe_media_rw_surface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
obj_surface,
BINDING_TABLE_OFFSET_GEN7(i),
SURFACE_STATE_OFFSET_GEN7(i));
gen75_gpe_media_chroma_surface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
obj_surface,
BINDING_TABLE_OFFSET_GEN7(i + 1),
SURFACE_STATE_OFFSET_GEN7(i + 1));
}
/* Binding output NV12 surface(Luma + Chroma) */
obj_surface = vpp_gpe_ctx->surface_output_object;
assert(obj_surface);
gen7_gpe_media_rw_surface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
obj_surface,
BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
gen75_gpe_media_chroma_surface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
obj_surface,
BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
/* Bind kernel return buffer surface */
gen7_gpe_buffer_suface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
&vpp_gpe_ctx->vpp_kernel_return,
BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
return VA_STATUS_SUCCESS;
}
static VAStatus
gen75_gpe_process_interface_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct gen6_interface_descriptor_data *desc;
dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
int i;
dri_bo_map(bo, 1);
assert(bo->virtual);
desc = bo->virtual;
/*Setup the descritor table*/
for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
assert(sizeof(*desc) == 32);
memset(desc, 0, sizeof(*desc));
desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
desc->desc2.sampler_count = 0; /* FIXME: */
desc->desc2.sampler_state_pointer = 0;
desc->desc3.binding_table_entry_count = 6; /* FIXME: */
desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
desc->desc4.constant_urb_entry_read_offset = 0;
desc->desc4.constant_urb_entry_read_length = 0;
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
kernel->bo);
desc++;
}
dri_bo_unmap(bo);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen75_gpe_process_constant_fill(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;
memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
vpp_gpe_ctx->kernel_param_size);
dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen75_gpe_process_parameters_fill(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
unsigned int *command_ptr;
unsigned int i, size = vpp_gpe_ctx->thread_param_size;
unsigned char* position = NULL;
/* Thread inline data setting*/
dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
{
*command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
*command_ptr++ = vpp_gpe_ctx->sub_shader_index;
*command_ptr++ = 0;
*command_ptr++ = 0;
*command_ptr++ = 0;
*command_ptr++ = 0;
/* copy thread inline data */
position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
memcpy(command_ptr, position, size);
command_ptr += size/sizeof(int);
}
*command_ptr++ = 0;
*command_ptr++ = MI_BATCH_BUFFER_END;
dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
OUT_RELOC(vpp_gpe_ctx->batch,
vpp_gpe_ctx->vpp_batchbuffer.bo,
I915_GEM_DOMAIN_COMMAND, 0,
0);
ADVANCE_BATCH(vpp_gpe_ctx->batch);
intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen75_gpe_process_init(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
dri_bo *bo;
unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
(vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
* vpp_gpe_ctx->vpp_kernel_return.size_block;
dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"vpp batch buffer",
batch_buf_size, 0x1000);
vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"vpp kernel return buffer",
kernel_return_size, 0x1000);
vpp_gpe_ctx->vpp_kernel_return.bo = bo;
dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen75_gpe_process_prepare(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
/*Setup all the memory object*/
gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
//gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
/*Programing media pipeline*/
gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen75_gpe_process_run(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
intel_batchbuffer_flush(vpp_gpe_ctx->batch);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen75_gpe_process(VADriverContextP ctx,
struct vpp_gpe_context * vpp_gpe_ctx)
{
VAStatus va_status = VA_STATUS_SUCCESS;
va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
if (va_status != VA_STATUS_SUCCESS)
return va_status;
va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
if (va_status != VA_STATUS_SUCCESS)
return va_status;
va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
if (va_status != VA_STATUS_SUCCESS)
return va_status;
return VA_STATUS_SUCCESS;
}
static VAStatus
gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct object_surface *obj_surface;
unsigned int i = 0;
unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
vpp_gpe_ctx->backward_surf_sum) * 2;
/* Binding input NV12 surfaces (Luma + Chroma)*/
for( i = 0; i < input_surface_sum; i += 2){
obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
assert(obj_surface);
gen8_gpe_media_rw_surface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
obj_surface,
BINDING_TABLE_OFFSET_GEN8(i),
SURFACE_STATE_OFFSET_GEN8(i));
gen8_gpe_media_chroma_surface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
obj_surface,
BINDING_TABLE_OFFSET_GEN8(i + 1),
SURFACE_STATE_OFFSET_GEN8(i + 1));
}
/* Binding output NV12 surface(Luma + Chroma) */
obj_surface = vpp_gpe_ctx->surface_output_object;
assert(obj_surface);
gen8_gpe_media_rw_surface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
obj_surface,
BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
gen8_gpe_media_chroma_surface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
obj_surface,
BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
/* Bind kernel return buffer surface */
gen7_gpe_buffer_suface_setup(ctx,
&vpp_gpe_ctx->gpe_ctx,
&vpp_gpe_ctx->vpp_kernel_return,
BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
return VA_STATUS_SUCCESS;
}
static VAStatus
gen8_gpe_process_interface_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct gen8_interface_descriptor_data *desc;
dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
int i;
dri_bo_map(bo, 1);
assert(bo->virtual);
desc = (struct gen8_interface_descriptor_data *)(bo->virtual
+ vpp_gpe_ctx->gpe_ctx.idrt_offset);
/*Setup the descritor table*/
for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
struct i965_kernel *kernel;
kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
assert(sizeof(*desc) == 32);
/*Setup the descritor table*/
memset(desc, 0, sizeof(*desc));
desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
desc->desc3.sampler_count = 0; /* FIXME: */
desc->desc3.sampler_state_pointer = 0;
desc->desc4.binding_table_entry_count = 6; /* FIXME: */
desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
desc->desc5.constant_urb_entry_read_offset = 0;
desc->desc5.constant_urb_entry_read_length = 0;
desc++;
}
dri_bo_unmap(bo);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen8_gpe_process_constant_fill(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
vpp_gpe_ctx->kernel_param_size);
dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen8_gpe_process_parameters_fill(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
unsigned int *command_ptr;
unsigned int i, size = vpp_gpe_ctx->thread_param_size;
unsigned char* position = NULL;
/* Thread inline data setting*/
dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
{
*command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
*command_ptr++ = vpp_gpe_ctx->sub_shader_index;
*command_ptr++ = 0;
*command_ptr++ = 0;
*command_ptr++ = 0;
*command_ptr++ = 0;
/* copy thread inline data */
position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
memcpy(command_ptr, position, size);
command_ptr += size/sizeof(int);
*command_ptr++ = CMD_MEDIA_STATE_FLUSH;
*command_ptr++ = 0;
}
*command_ptr++ = 0;
*command_ptr++ = MI_BATCH_BUFFER_END;
dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
OUT_RELOC(vpp_gpe_ctx->batch,
vpp_gpe_ctx->vpp_batchbuffer.bo,
I915_GEM_DOMAIN_COMMAND, 0,
0);
OUT_BATCH(vpp_gpe_ctx->batch, 0);
ADVANCE_BATCH(vpp_gpe_ctx->batch);
intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen8_gpe_process_init(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
dri_bo *bo;
unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
(vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
* vpp_gpe_ctx->vpp_kernel_return.size_block;
dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"vpp batch buffer",
batch_buf_size, 0x1000);
vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"vpp kernel return buffer",
kernel_return_size, 0x1000);
vpp_gpe_ctx->vpp_kernel_return.bo = bo;
dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen8_gpe_process_prepare(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
/*Setup all the memory object*/
gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
//gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
/*Programing media pipeline*/
gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen8_gpe_process_run(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
intel_batchbuffer_flush(vpp_gpe_ctx->batch);
return VA_STATUS_SUCCESS;
}
static VAStatus
gen8_gpe_process(VADriverContextP ctx,
struct vpp_gpe_context * vpp_gpe_ctx)
{
VAStatus va_status = VA_STATUS_SUCCESS;
va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
if (va_status != VA_STATUS_SUCCESS)
return va_status;
va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
if (va_status != VA_STATUS_SUCCESS)
return va_status;
va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
if (va_status != VA_STATUS_SUCCESS)
return va_status;
return VA_STATUS_SUCCESS;
}
static VAStatus
vpp_gpe_process(VADriverContextP ctx,
struct vpp_gpe_context * vpp_gpe_ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
if (IS_HASWELL(i965->intel.device_info))
return gen75_gpe_process(ctx, vpp_gpe_ctx);
else if (IS_GEN8(i965->intel.device_info))
return gen8_gpe_process(ctx, vpp_gpe_ctx);
return VA_STATUS_ERROR_UNIMPLEMENTED;
}
static VAStatus
vpp_gpe_process_sharpening(VADriverContextP ctx,
struct vpp_gpe_context * vpp_gpe_ctx)
{
VAStatus va_status = VA_STATUS_SUCCESS;
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
VABufferID *filter_ids = (VABufferID*)pipe->filters ;
struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
if (!obj_buf ||
!obj_buf->buffer_store ||
!obj_buf->buffer_store->buffer)
goto error;
VAProcFilterParameterBuffer* filter =
(VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
float sharpening_intensity = filter->value;
ThreadParameterSharpening thr_param;
unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
unsigned int i;
unsigned char * pos;
if(vpp_gpe_ctx->is_first_frame){
vpp_gpe_ctx->sub_shader_sum = 3;
struct i965_kernel * vpp_kernels;
if (IS_HASWELL(i965->intel.device_info))
vpp_kernels = gen75_vpp_sharpening_kernels;
else if (IS_GEN8(i965->intel.device_info))
vpp_kernels = gen8_vpp_sharpening_kernels;
vpp_gpe_ctx->gpe_load_kernels(ctx,
&vpp_gpe_ctx->gpe_ctx,
vpp_kernels,
vpp_gpe_ctx->sub_shader_sum);
}
if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
va_status = i965_CreateSurfaces(ctx,
vpp_gpe_ctx->in_frame_w,
vpp_gpe_ctx->in_frame_h,
VA_RT_FORMAT_YUV420,
1,
&vpp_gpe_ctx->surface_tmp);
assert(va_status == VA_STATUS_SUCCESS);
struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
assert(obj_surf);
if (obj_surf) {
i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
SUBSAMPLE_YUV420);
vpp_gpe_ctx->surface_tmp_object = obj_surf;
}
}
assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
/* Step 1: horizontal blur process */
vpp_gpe_ctx->forward_surf_sum = 0;
vpp_gpe_ctx->backward_surf_sum = 0;
vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
vpp_gpe_ctx->thread_param_size = thr_param_size;
vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
*vpp_gpe_ctx->thread_num);
pos = vpp_gpe_ctx->thread_param;
if (!pos) {
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
thr_param.base.v_pos = 16 * i;
thr_param.base.h_pos = 0;
memcpy(pos, &thr_param, thr_param_size);
pos += thr_param_size;
}
vpp_gpe_ctx->sub_shader_index = 0;
va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
free(vpp_gpe_ctx->thread_param);
/* Step 2: vertical blur process */
vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
vpp_gpe_ctx->forward_surf_sum = 0;
vpp_gpe_ctx->backward_surf_sum = 0;
vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
vpp_gpe_ctx->thread_param_size = thr_param_size;
vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
*vpp_gpe_ctx->thread_num);
pos = vpp_gpe_ctx->thread_param;
if (!pos) {
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
thr_param.base.v_pos = 0;
thr_param.base.h_pos = 16 * i;
memcpy(pos, &thr_param, thr_param_size);
pos += thr_param_size;
}
vpp_gpe_ctx->sub_shader_index = 1;
vpp_gpe_process(ctx, vpp_gpe_ctx);
free(vpp_gpe_ctx->thread_param);
/* Step 3: apply the blur to original surface */
vpp_gpe_ctx->surface_input_object[0] = origin_in_obj_surface;
vpp_gpe_ctx->surface_input_object[1] = vpp_gpe_ctx->surface_tmp_object;
vpp_gpe_ctx->surface_output_object = origin_out_obj_surface;
vpp_gpe_ctx->forward_surf_sum = 1;
vpp_gpe_ctx->backward_surf_sum = 0;
vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
vpp_gpe_ctx->thread_param_size = thr_param_size;
vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
*vpp_gpe_ctx->thread_num);
pos = vpp_gpe_ctx->thread_param;
if (!pos) {
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
thr_param.base.v_pos = 4 * i;
thr_param.base.h_pos = 0;
memcpy(pos, &thr_param, thr_param_size);
pos += thr_param_size;
}
vpp_gpe_ctx->sub_shader_index = 2;
va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
free(vpp_gpe_ctx->thread_param);
return va_status;
error:
return VA_STATUS_ERROR_INVALID_PARAMETER;
}
VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
struct vpp_gpe_context * vpp_gpe_ctx)
{
VAStatus va_status = VA_STATUS_SUCCESS;
struct i965_driver_data *i965 = i965_driver_data(ctx);
VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
VAProcFilterParameterBuffer* filter = NULL;
unsigned int i;
struct object_surface *obj_surface = NULL;
if (pipe->num_filters && !pipe->filters)
goto error;
for(i = 0; i < pipe->num_filters; i++){
struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
if (!obj_buf ||
!obj_buf->buffer_store ||
!obj_buf->buffer_store->buffer)
goto error;
filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
if(filter->type == VAProcFilterSharpening){
break;
}
}
assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
vpp_gpe_ctx->forward_surf_sum = 0;
vpp_gpe_ctx->backward_surf_sum = 0;
for(i = 0; i < pipe->num_forward_references; i ++)
{
obj_surface = SURFACE(pipe->forward_references[i]);
assert(obj_surface);
vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
vpp_gpe_ctx->forward_surf_sum++;
}
for(i = 0; i < pipe->num_backward_references; i ++)
{
obj_surface = SURFACE(pipe->backward_references[i]);
assert(obj_surface);
vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
vpp_gpe_ctx->backward_surf_sum++;
}
obj_surface = vpp_gpe_ctx->surface_input_object[0];
vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
if(filter && filter->type == VAProcFilterSharpening) {
va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
} else {
va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
}
vpp_gpe_ctx->is_first_frame = 0;
return va_status;
error:
return VA_STATUS_ERROR_INVALID_PARAMETER;
}
void
vpp_gpe_context_destroy(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
assert(vpp_gpe_ctx->surface_tmp_object != NULL);
i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
vpp_gpe_ctx->surface_tmp_object = NULL;
}
free(vpp_gpe_ctx->batch);
free(vpp_gpe_ctx);
}
struct vpp_gpe_context *
vpp_gpe_context_init(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
assert(IS_HASWELL(i965->intel.device_info) ||
IS_GEN8(i965->intel.device_info));
vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
vpp_gpe_ctx->surface_tmp_object = NULL;
vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
vpp_gpe_ctx->is_first_frame = 1;
gpe_ctx->vfe_state.max_num_threads = 60 - 1;
gpe_ctx->vfe_state.num_urb_entries = 16;
gpe_ctx->vfe_state.gpgpu_mode = 0;
gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
if (IS_HASWELL(i965->intel.device_info)) {
vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init;
vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy;
vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels;
gpe_ctx->surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
} else if (IS_GEN8(i965->intel.device_info)) {
vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init;
vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy;
vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels;
gpe_ctx->surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
}
return vpp_gpe_ctx;
}

View File

@@ -0,0 +1,121 @@
/*
* Copyright <20> 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Li Xiaowei <xiaowei.a.li@intel.com>
*
*/
#ifndef GEN75_VPP_GPE
#define GEN75_VPP_GPE
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include <va/va_vpp.h>
#include "i965_gpe_utils.h"
#define MAX_SURF_IN_SUM 5
enum VPP_GPE_TYPE{
VPP_GPE_SHARPENING,
VPP_GPE_BLENDING,
VPP_GPE_SCENE_CHANGE_DETECTION,
VPP_GPE_FILTER_SUM,
};
typedef struct _KernelParameterBase{
unsigned short pic_width;
unsigned short pic_height;
}KernelParameterBase;
typedef struct _KernelParameterSharpening{
KernelParameterBase base;
}KernelParameterSharpening;
typedef struct _ThreadParameterBase{
unsigned int pic_width;
unsigned int pic_height;
unsigned int v_pos;
unsigned int h_pos;
}ThreadParameterBase;
typedef struct _ThreadParameterSharpenig{
ThreadParameterBase base;
unsigned int l_amount;
unsigned int d_amount;
}ThreadParameterSharpening;
struct vpp_gpe_context{
struct intel_batchbuffer *batch;
struct i965_gpe_context gpe_ctx;
struct i965_buffer_surface vpp_batchbuffer;
struct i965_buffer_surface vpp_kernel_return;
VAProcPipelineParameterBuffer *pipeline_param;
enum VPP_GPE_TYPE filter_type;
unsigned int sub_shader_index;
unsigned int sub_shader_sum;
unsigned char * kernel_param;
unsigned int kernel_param_size;
unsigned char * thread_param;
unsigned int thread_param_size;
unsigned int thread_num;
struct object_surface *surface_pipeline_input_object;
struct object_surface *surface_output_object;
VASurfaceID surface_tmp;
struct object_surface *surface_tmp_object;
struct object_surface *surface_input_object[MAX_SURF_IN_SUM];
unsigned int forward_surf_sum;
unsigned int backward_surf_sum;
unsigned int in_frame_w;
unsigned int in_frame_h;
unsigned int is_first_frame;
void (*gpe_context_init)(VADriverContextP ctx,
struct i965_gpe_context *gpe_context);
void (*gpe_context_destroy)(struct i965_gpe_context *gpe_context);
void (*gpe_load_kernels)(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct i965_kernel *kernel_list,
unsigned int num_kernels);
};
struct vpp_gpe_context *
vpp_gpe_context_init(VADriverContextP ctx);
void
vpp_gpe_context_destroy(VADriverContextP ctx,
struct vpp_gpe_context* vpp_context);
VAStatus
vpp_gpe_process_picture(VADriverContextP ctx,
struct vpp_gpe_context * vpp_context);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,156 @@
/*
* Copyright <20> 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Li Xiaowei <xiaowei.a.li@intel.com>
*
*/
#ifndef _GEN75_VPP_VEBOX_H
#define _GEN75_VPP_VEBOX_H
#include <xf86drm.h>
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include <va/va_vpp.h>
#include "i965_drv_video.h"
#include "i965_post_processing.h"
#define INPUT_SURFACE 0
#define OUTPUT_SURFACE 1
#define VPP_DNDI_DN 0x00000001
#define VPP_DNDI_DI 0x00000002
#define VPP_IECP_STD_STE 0x00000100
#define VPP_IECP_ACE 0x00000200
#define VPP_IECP_TCC 0x00000400
#define VPP_IECP_PRO_AMP 0x00000800
#define VPP_IECP_CSC 0x00001000
#define VPP_IECP_AOI 0x00002000
#define MAX_FILTER_SUM 8
#define PRE_FORMAT_CONVERT 0x01
#define POST_FORMAT_CONVERT 0x02
#define POST_SCALING_CONVERT 0x04
#define POST_COPY_CONVERT 0x08
enum {
FRAME_IN_CURRENT = 0,
FRAME_IN_PREVIOUS,
FRAME_IN_STMM,
FRAME_OUT_STMM,
FRAME_OUT_CURRENT_DN,
FRAME_OUT_CURRENT,
FRAME_OUT_PREVIOUS,
FRAME_OUT_STATISTIC,
FRAME_STORE_SUM,
};
enum SURFACE_FORMAT{
YCRCB_NORMAL = 0,
YCRCB_SWAPUVY,
YCRCB_SWAPUV,
YCRCB_SWAPY,
PLANAR_420_8, //NV12
PACKED_444A_8,
PACKED_422_16,
R10G10B10A2_UNORM_SRGB,
R8G8B8A8_UNORM_SRGB,
PACKED_444_16,
PLANAR_422_16,
Y8_UNORM,
PLANAR_420_16,
R16G16B16A16,
SURFACE_FORMAT_SUM
};
typedef struct veb_frame_store {
VASurfaceID surface_id;
unsigned int is_internal_surface;
struct object_surface *obj_surface;
} VEBFrameStore;
typedef struct veb_buffer {
dri_bo *bo;
void * ptr;
unsigned char valid;
} VEBBuffer;
struct intel_vebox_context
{
struct intel_batchbuffer *batch;
struct object_surface *surface_input_object;
struct object_surface *surface_output_object;
VASurfaceID surface_input_vebox;
struct object_surface *surface_input_vebox_object;
VASurfaceID surface_output_vebox;
struct object_surface *surface_output_vebox_object;
VASurfaceID surface_output_scaled;
struct object_surface *surface_output_scaled_object;
unsigned int fourcc_input;
unsigned int fourcc_output;
int width_input;
int height_input;
int width_output;
int height_output;
VEBFrameStore frame_store[FRAME_STORE_SUM];
VEBBuffer dndi_state_table;
VEBBuffer iecp_state_table;
VEBBuffer gamut_state_table;
VEBBuffer vertex_state_table;
unsigned int filters_mask;
int frame_order;
int current_output;
VAProcPipelineParameterBuffer * pipeline_param;
void * filter_dn;
void * filter_di;
void * filter_iecp_std;
void * filter_iecp_ace;
void * filter_iecp_tcc;
void * filter_iecp_amp;
unsigned int filter_iecp_amp_num_elements;
unsigned char format_convert_flags;
};
VAStatus gen75_vebox_process_picture(VADriverContextP ctx,
struct intel_vebox_context *proc_ctx);
void gen75_vebox_context_destroy(VADriverContextP ctx,
struct intel_vebox_context *proc_ctx);
struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx);
VAStatus gen8_vebox_process_picture(VADriverContextP ctx,
struct intel_vebox_context *proc_ctx);
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,98 @@
/*
* Copyright <20> 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef _GEN7_MFD_H_
#define _GEN7_MFD_H_
#include <xf86drm.h>
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include "i965_decoder.h"
#define GEN7_VC1_I_PICTURE 0
#define GEN7_VC1_P_PICTURE 1
#define GEN7_VC1_B_PICTURE 2
#define GEN7_VC1_BI_PICTURE 3
#define GEN7_VC1_SKIPPED_PICTURE 4
#define GEN7_VC1_SIMPLE_PROFILE 0
#define GEN7_VC1_MAIN_PROFILE 1
#define GEN7_VC1_ADVANCED_PROFILE 2
#define GEN7_VC1_RESERVED_PROFILE 3
#define GEN7_JPEG_ROTATION_0 0
#define GEN7_JPEG_ROTATION_90 1
#define GEN7_JPEG_ROTATION_270 2
#define GEN7_JPEG_ROTATION_180 3
#define GEN7_YUV400 0
#define GEN7_YUV420 1
#define GEN7_YUV422H_2Y 2
#define GEN7_YUV444 3
#define GEN7_YUV411 4
#define GEN7_YUV422V_2Y 5
#define GEN7_YUV422H_4Y 6
#define GEN7_YUV422V_4Y 7
struct gen7_vc1_surface
{
dri_bo *dmv;
int picture_type;
};
struct hw_context;
struct gen7_mfd_context
{
struct hw_context base;
union {
VAIQMatrixBufferMPEG2 mpeg2;
VAIQMatrixBufferH264 h264; /* flat scaling lists (default) */
} iq_matrix;
GenFrameStoreContext fs_ctx;
GenFrameStore reference_surface[MAX_GEN_REFERENCE_FRAMES];
GenBuffer post_deblocking_output;
GenBuffer pre_deblocking_output;
GenBuffer intra_row_store_scratch_buffer;
GenBuffer deblocking_filter_row_store_scratch_buffer;
GenBuffer bsd_mpc_row_store_scratch_buffer;
GenBuffer mpr_row_store_scratch_buffer;
GenBuffer bitplane_read_buffer;
GenBuffer segmentation_buffer;
VASurfaceID jpeg_wa_surface_id;
struct object_surface *jpeg_wa_surface_object;
dri_bo *jpeg_wa_slice_data_bo;
int wa_mpeg2_slice_vertical_position;
};
#endif /* _GEN7_MFD_H_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,898 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#include "sysdeps.h"
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_avc_bsd.h"
#include "i965_media_h264.h"
#include "i965_media.h"
#include "i965_decoder_utils.h"
#include "intel_media.h"
static void
i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx,
struct object_surface *obj_surface,
VAPictureParameterBufferH264 *pic_param,
struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
GenAvcSurface *avc_bsd_surface = obj_surface->private_data;
obj_surface->free_private_data = gen_free_avc_surface;
if (!avc_bsd_surface) {
avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1);
avc_bsd_surface->frame_store_id = -1;
assert((obj_surface->size & 0x3f) == 0);
obj_surface->private_data = avc_bsd_surface;
}
avc_bsd_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
!pic_param->seq_fields.bits.direct_8x8_inference_flag);
if (avc_bsd_surface->dmv_top == NULL) {
avc_bsd_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
"direct mv w/r buffer",
DMV_SIZE,
0x1000);
}
if (avc_bsd_surface->dmv_bottom_flag &&
avc_bsd_surface->dmv_bottom == NULL) {
avc_bsd_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
"direct mv w/r buffer",
DMV_SIZE,
0x1000);
}
}
static void
i965_bsd_ind_obj_base_address(VADriverContextP ctx,
struct decode_state *decode_state,
int slice,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
dri_bo *ind_bo = decode_state->slice_datas[slice]->bo;
BEGIN_BCS_BATCH(batch, 3);
OUT_BCS_BATCH(batch, CMD_BSD_IND_OBJ_BASE_ADDR | (3 - 2));
OUT_BCS_RELOC(batch, ind_bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0);
OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
}
static void
i965_avc_bsd_img_state(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int qm_present_flag;
int img_struct;
int mbaff_frame_flag;
unsigned int avc_it_command_header;
unsigned int width_in_mbs, height_in_mbs;
VAPictureParameterBufferH264 *pic_param;
if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
qm_present_flag = 1;
else
qm_present_flag = 0; /* built-in QM matrices */
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
img_struct = 1;
else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
img_struct = 3;
else
img_struct = 0;
if ((img_struct & 0x1) == 0x1) {
assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
} else {
assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
}
if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
assert(pic_param->pic_fields.bits.field_pic_flag == 0);
} else {
assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
}
mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
!pic_param->pic_fields.bits.field_pic_flag);
width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
/* BSD unit doesn't support 4:2:2 and 4:4:4 picture */
assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
avc_it_command_header = (CMD_MEDIA_OBJECT_EX | (12 - 2));
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_IMG_STATE | (6 - 2));
OUT_BCS_BATCH(batch,
((width_in_mbs * height_in_mbs) & 0x7fff));
OUT_BCS_BATCH(batch,
(height_in_mbs << 16) |
(width_in_mbs << 0));
OUT_BCS_BATCH(batch,
((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
(SCAN_RASTER_ORDER << 15) | /* AVC ILDB Data */
(SCAN_SPECIAL_ORDER << 14) | /* AVC IT Command */
(SCAN_RASTER_ORDER << 13) | /* AVC IT Data */
(1 << 12) | /* always 1, hardware requirement */
(qm_present_flag << 10) |
(img_struct << 8) |
(16 << 0)); /* FIXME: always support 16 reference frames ??? */
OUT_BCS_BATCH(batch,
(RESIDUAL_DATA_OFFSET << 24) | /* residual data offset */
(0 << 17) | /* don't overwrite SRT */
(0 << 16) | /* Un-SRT (Unsynchronized Root Thread) */
(0 << 12) | /* FIXME: no 16MV ??? */
(pic_param->seq_fields.bits.chroma_format_idc << 10) |
(i965_h264_context->enable_avc_ildb << 8) | /* Enable ILDB writing output */
(pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
(pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
(pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
(pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
(pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
(mbaff_frame_flag << 1) |
(pic_param->pic_fields.bits.field_pic_flag << 0));
OUT_BCS_BATCH(batch, avc_it_command_header);
ADVANCE_BCS_BATCH(batch);
}
static void
i965_avc_bsd_qm_state(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int cmd_len;
VAIQMatrixBufferH264 *iq_matrix;
VAPictureParameterBufferH264 *pic_param;
if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
return;
iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
BEGIN_BCS_BATCH(batch, cmd_len);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_QM_STATE | (cmd_len - 2));
if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
OUT_BCS_BATCH(batch,
(0x0 << 8) | /* don't use default built-in matrices */
(0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
else
OUT_BCS_BATCH(batch,
(0x0 << 8) | /* don't use default built-in matrices */
(0x3f << 0)); /* six 4x4 scaling matrices */
intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
ADVANCE_BCS_BATCH(batch);
}
static void
i965_avc_bsd_slice_state(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int present_flag, cmd_len, list, j;
uint8_t ref_idx_state[32];
char weightoffsets[32 * 6];
/* don't issue SLICE_STATE for intra-prediction decoding */
if (slice_param->slice_type == SLICE_TYPE_I ||
slice_param->slice_type == SLICE_TYPE_SI)
return;
cmd_len = 2;
if (slice_param->slice_type == SLICE_TYPE_P ||
slice_param->slice_type == SLICE_TYPE_SP) {
present_flag = PRESENT_REF_LIST0;
cmd_len += 8;
} else {
present_flag = PRESENT_REF_LIST0 | PRESENT_REF_LIST1;
cmd_len += 16;
}
if ((slice_param->slice_type == SLICE_TYPE_P ||
slice_param->slice_type == SLICE_TYPE_SP) &&
(pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
present_flag |= PRESENT_WEIGHT_OFFSET_L0;
cmd_len += 48;
}
if ((slice_param->slice_type == SLICE_TYPE_B) &&
(pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
present_flag |= PRESENT_WEIGHT_OFFSET_L0 | PRESENT_WEIGHT_OFFSET_L1;
cmd_len += 96;
}
BEGIN_BCS_BATCH(batch, cmd_len);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_SLICE_STATE | (cmd_len - 2));
OUT_BCS_BATCH(batch, present_flag);
for (list = 0; list < 2; list++) {
int flag, num_va_pics;
VAPictureH264 *va_pic;
if (list == 0) {
flag = PRESENT_REF_LIST0;
va_pic = slice_param->RefPicList0;
num_va_pics = slice_param->num_ref_idx_l0_active_minus1 + 1;
} else {
flag = PRESENT_REF_LIST1;
va_pic = slice_param->RefPicList1;
num_va_pics = slice_param->num_ref_idx_l1_active_minus1 + 1;
}
if (!(present_flag & flag))
continue;
gen5_fill_avc_ref_idx_state(
ref_idx_state,
va_pic, num_va_pics,
i965_h264_context->fsid_list
);
intel_batchbuffer_data(batch, ref_idx_state, sizeof(ref_idx_state));
}
i965_h264_context->weight128_luma_l0 = 0;
i965_h264_context->weight128_luma_l1 = 0;
i965_h264_context->weight128_chroma_l0 = 0;
i965_h264_context->weight128_chroma_l1 = 0;
i965_h264_context->weight128_offset0_flag = 0;
i965_h264_context->weight128_offset0 = 0;
if (present_flag & PRESENT_WEIGHT_OFFSET_L0) {
for (j = 0; j < 32; j++) {
weightoffsets[j * 6 + 0] = slice_param->luma_offset_l0[j];
weightoffsets[j * 6 + 1] = slice_param->luma_weight_l0[j];
weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l0[j][0];
weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l0[j][0];
weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l0[j][1];
weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l0[j][1];
if (pic_param->pic_fields.bits.weighted_pred_flag == 1 ||
pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
if (i965_h264_context->use_hw_w128) {
if (slice_param->luma_weight_l0[j] == 128)
i965_h264_context->weight128_luma_l0 |= (1 << j);
if (slice_param->chroma_weight_l0[j][0] == 128 ||
slice_param->chroma_weight_l0[j][1] == 128)
i965_h264_context->weight128_chroma_l0 |= (1 << j);
} else {
/* FIXME: workaround for weight 128 */
if (slice_param->luma_weight_l0[j] == 128 ||
slice_param->chroma_weight_l0[j][0] == 128 ||
slice_param->chroma_weight_l0[j][1] == 128)
i965_h264_context->weight128_offset0_flag = 1;
}
}
}
intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
}
if (present_flag & PRESENT_WEIGHT_OFFSET_L1) {
for (j = 0; j < 32; j++) {
weightoffsets[j * 6 + 0] = slice_param->luma_offset_l1[j];
weightoffsets[j * 6 + 1] = slice_param->luma_weight_l1[j];
weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l1[j][0];
weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l1[j][0];
weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l1[j][1];
weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l1[j][1];
if (pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
if (i965_h264_context->use_hw_w128) {
if (slice_param->luma_weight_l1[j] == 128)
i965_h264_context->weight128_luma_l1 |= (1 << j);
if (slice_param->chroma_weight_l1[j][0] == 128 ||
slice_param->chroma_weight_l1[j][1] == 128)
i965_h264_context->weight128_chroma_l1 |= (1 << j);
} else {
if (slice_param->luma_weight_l0[j] == 128 ||
slice_param->chroma_weight_l0[j][0] == 128 ||
slice_param->chroma_weight_l0[j][1] == 128)
i965_h264_context->weight128_offset0_flag = 1;
}
}
}
intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
}
ADVANCE_BCS_BATCH(batch);
}
static void
i965_avc_bsd_buf_base_state(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
struct i965_avc_bsd_context *i965_avc_bsd_context;
int i;
VAPictureH264 *va_pic;
struct object_surface *obj_surface;
GenAvcSurface *avc_bsd_surface;
i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
BEGIN_BCS_BATCH(batch, 74);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_BUF_BASE_STATE | (74 - 2));
OUT_BCS_RELOC(batch, i965_avc_bsd_context->bsd_raw_store.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
OUT_BCS_RELOC(batch, i965_avc_bsd_context->mpr_row_store.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
OUT_BCS_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
OUT_BCS_RELOC(batch, i965_h264_context->avc_it_data.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
(i965_h264_context->avc_it_data.write_offset << 6));
if (i965_h264_context->enable_avc_ildb)
OUT_BCS_RELOC(batch, i965_h264_context->avc_ildb_data.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
else
OUT_BCS_BATCH(batch, 0);
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
obj_surface = i965_h264_context->fsid_list[i].obj_surface;
if (obj_surface && obj_surface->private_data) {
avc_bsd_surface = obj_surface->private_data;
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0);
if (avc_bsd_surface->dmv_bottom_flag == 1)
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0);
else
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0);
} else {
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
}
}
va_pic = &pic_param->CurrPic;
obj_surface = decode_state->render_object;
if (pic_param->pic_fields.bits.reference_pic_flag)
obj_surface->flags |= SURFACE_REFERENCED;
else
obj_surface->flags &= ~SURFACE_REFERENCED;
i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
/* initial uv component for YUV400 case */
if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
unsigned int uv_offset = obj_surface->width * obj_surface->height;
unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
dri_bo_map(obj_surface->bo, 1);
memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
dri_bo_unmap(obj_surface->bo);
}
i965_avc_bsd_init_avc_bsd_surface(ctx, obj_surface, pic_param, i965_h264_context);
avc_bsd_surface = obj_surface->private_data;
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
if (avc_bsd_surface->dmv_bottom_flag == 1)
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
else
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
/* POC List */
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
obj_surface = i965_h264_context->fsid_list[i].obj_surface;
if (obj_surface) {
const VAPictureH264 * const va_pic = avc_find_picture(
obj_surface->base.id, pic_param->ReferenceFrames,
ARRAY_ELEMS(pic_param->ReferenceFrames));
assert(va_pic != NULL);
OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
} else {
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
}
}
va_pic = &pic_param->CurrPic;
OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
ADVANCE_BCS_BATCH(batch);
}
static void
g4x_avc_bsd_object(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
int slice_index,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
if (slice_param) {
int encrypted, counter_value, cmd_len;
int slice_hor_pos, slice_ver_pos;
int num_ref_idx_l0, num_ref_idx_l1;
int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
unsigned int slice_data_bit_offset;
int weighted_pred_idc = 0;
int first_mb_in_slice = 0;
int slice_type;
encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
if (encrypted) {
cmd_len = 9;
counter_value = 0; /* FIXME: ??? */
} else
cmd_len = 8;
slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
decode_state->slice_datas[slice_index]->bo,
slice_param,
pic_param->pic_fields.bits.entropy_coding_mode_flag
);
if (slice_param->slice_type == SLICE_TYPE_I ||
slice_param->slice_type == SLICE_TYPE_SI)
slice_type = SLICE_TYPE_I;
else if (slice_param->slice_type == SLICE_TYPE_P ||
slice_param->slice_type == SLICE_TYPE_SP)
slice_type = SLICE_TYPE_P;
else {
assert(slice_param->slice_type == SLICE_TYPE_B);
slice_type = SLICE_TYPE_B;
}
if (slice_type == SLICE_TYPE_I) {
assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
num_ref_idx_l0 = 0;
num_ref_idx_l1 = 0;
} else if (slice_type == SLICE_TYPE_P) {
assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
num_ref_idx_l1 = 0;
} else {
num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
}
if (slice_type == SLICE_TYPE_P)
weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
else if (slice_type == SLICE_TYPE_B)
weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
slice_hor_pos = first_mb_in_slice % width_in_mbs;
slice_ver_pos = first_mb_in_slice / width_in_mbs;
BEGIN_BCS_BATCH(batch, cmd_len);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (cmd_len - 2));
OUT_BCS_BATCH(batch,
(encrypted << 31) |
((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
OUT_BCS_BATCH(batch,
(slice_param->slice_data_offset +
(slice_data_bit_offset >> 3)));
OUT_BCS_BATCH(batch,
(0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
(0 << 14) | /* ignore BSDPrematureComplete Error handling */
(0 << 13) | /* FIXME: ??? */
(0 << 12) | /* ignore MPR Error handling */
(0 << 10) | /* ignore Entropy Error handling */
(0 << 8) | /* ignore MB Header Error handling */
(slice_type << 0));
OUT_BCS_BATCH(batch,
(num_ref_idx_l1 << 24) |
(num_ref_idx_l0 << 16) |
(slice_param->chroma_log2_weight_denom << 8) |
(slice_param->luma_log2_weight_denom << 0));
OUT_BCS_BATCH(batch,
(weighted_pred_idc << 30) |
(slice_param->direct_spatial_mv_pred_flag << 29) |
(slice_param->disable_deblocking_filter_idc << 27) |
(slice_param->cabac_init_idc << 24) |
((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
OUT_BCS_BATCH(batch,
(slice_ver_pos << 24) |
(slice_hor_pos << 16) |
(first_mb_in_slice << 0));
OUT_BCS_BATCH(batch,
(1 << 7) |
((0x7 - (slice_data_bit_offset & 0x7)) << 0));
if (encrypted) {
OUT_BCS_BATCH(batch, counter_value);
}
ADVANCE_BCS_BATCH(batch);
} else {
BEGIN_BCS_BATCH(batch, 8);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (8 - 2));
OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */
OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
}
}
static void
ironlake_avc_bsd_object(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
int slice_index,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
if (slice_param) {
int encrypted, counter_value;
int slice_hor_pos, slice_ver_pos;
int num_ref_idx_l0, num_ref_idx_l1;
int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
unsigned int slice_data_bit_offset;
int weighted_pred_idc = 0;
int first_mb_in_slice;
int slice_type;
encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
if (encrypted) {
counter_value = 0; /* FIXME: ??? */
} else
counter_value = 0;
slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
decode_state->slice_datas[slice_index]->bo,
slice_param,
pic_param->pic_fields.bits.entropy_coding_mode_flag
);
if (slice_param->slice_type == SLICE_TYPE_I ||
slice_param->slice_type == SLICE_TYPE_SI)
slice_type = SLICE_TYPE_I;
else if (slice_param->slice_type == SLICE_TYPE_P ||
slice_param->slice_type == SLICE_TYPE_SP)
slice_type = SLICE_TYPE_P;
else {
assert(slice_param->slice_type == SLICE_TYPE_B);
slice_type = SLICE_TYPE_B;
}
if (slice_type == SLICE_TYPE_I) {
assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
num_ref_idx_l0 = 0;
num_ref_idx_l1 = 0;
} else if (slice_type == SLICE_TYPE_P) {
assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
num_ref_idx_l1 = 0;
} else {
num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
}
if (slice_type == SLICE_TYPE_P)
weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
else if (slice_type == SLICE_TYPE_B)
weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
slice_hor_pos = first_mb_in_slice % width_in_mbs;
slice_ver_pos = first_mb_in_slice / width_in_mbs;
BEGIN_BCS_BATCH(batch, 16);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2));
OUT_BCS_BATCH(batch,
(encrypted << 31) |
(0 << 30) | /* FIXME: packet based bit stream */
(0 << 29) | /* FIXME: packet format */
((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
OUT_BCS_BATCH(batch,
(slice_param->slice_data_offset +
(slice_data_bit_offset >> 3)));
OUT_BCS_BATCH(batch,
(0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
(0 << 14) | /* ignore BSDPrematureComplete Error handling */
(0 << 13) | /* FIXME: ??? */
(0 << 12) | /* ignore MPR Error handling */
(0 << 10) | /* ignore Entropy Error handling */
(0 << 8) | /* ignore MB Header Error handling */
(slice_type << 0));
OUT_BCS_BATCH(batch,
(num_ref_idx_l1 << 24) |
(num_ref_idx_l0 << 16) |
(slice_param->chroma_log2_weight_denom << 8) |
(slice_param->luma_log2_weight_denom << 0));
OUT_BCS_BATCH(batch,
(weighted_pred_idc << 30) |
(slice_param->direct_spatial_mv_pred_flag << 29) |
(slice_param->disable_deblocking_filter_idc << 27) |
(slice_param->cabac_init_idc << 24) |
((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
OUT_BCS_BATCH(batch,
(slice_ver_pos << 24) |
(slice_hor_pos << 16) |
(first_mb_in_slice << 0));
OUT_BCS_BATCH(batch,
(1 << 7) |
((0x7 - (slice_data_bit_offset & 0x7)) << 0));
OUT_BCS_BATCH(batch, counter_value);
/* FIXME: dw9-dw11 */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l0);
OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l1);
OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l0);
OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l1);
ADVANCE_BCS_BATCH(batch);
} else {
BEGIN_BCS_BATCH(batch, 16);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2));
OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */
OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
}
}
static void
i965_avc_bsd_object(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
int slice_index,
struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
if (IS_IRONLAKE(i965->intel.device_info))
ironlake_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
else
g4x_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
}
static void
i965_avc_bsd_phantom_slice(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
struct i965_h264_context *i965_h264_context)
{
i965_avc_bsd_object(ctx, decode_state, pic_param, NULL, 0, i965_h264_context);
}
void
i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context)
{
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
VAPictureParameterBufferH264 *pic_param;
VASliceParameterBufferH264 *slice_param;
int i, j;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
i965_h264_context->fsid_list, &i965_h264_context->fs_ctx);
i965_h264_context->enable_avc_ildb = 0;
i965_h264_context->picture.i_flag = 1;
for (j = 0; j < decode_state->num_slice_params && i965_h264_context->enable_avc_ildb == 0; j++) {
assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
assert((slice_param->slice_type == SLICE_TYPE_I) ||
(slice_param->slice_type == SLICE_TYPE_SI) ||
(slice_param->slice_type == SLICE_TYPE_P) ||
(slice_param->slice_type == SLICE_TYPE_SP) ||
(slice_param->slice_type == SLICE_TYPE_B));
if (slice_param->disable_deblocking_filter_idc != 1) {
i965_h264_context->enable_avc_ildb = 1;
break;
}
slice_param++;
}
}
intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
i965_avc_bsd_img_state(ctx, decode_state, i965_h264_context);
i965_avc_bsd_qm_state(ctx, decode_state, i965_h264_context);
for (j = 0; j < decode_state->num_slice_params; j++) {
assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
i965_bsd_ind_obj_base_address(ctx, decode_state, j, i965_h264_context);
for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
assert((slice_param->slice_type == SLICE_TYPE_I) ||
(slice_param->slice_type == SLICE_TYPE_SI) ||
(slice_param->slice_type == SLICE_TYPE_P) ||
(slice_param->slice_type == SLICE_TYPE_SP) ||
(slice_param->slice_type == SLICE_TYPE_B));
if (i965_h264_context->picture.i_flag &&
(slice_param->slice_type != SLICE_TYPE_I ||
slice_param->slice_type != SLICE_TYPE_SI))
i965_h264_context->picture.i_flag = 0;
i965_avc_bsd_slice_state(ctx, pic_param, slice_param, i965_h264_context);
i965_avc_bsd_buf_base_state(ctx, decode_state, pic_param, slice_param, i965_h264_context);
i965_avc_bsd_object(ctx, decode_state, pic_param, slice_param, j, i965_h264_context);
slice_param++;
}
}
i965_avc_bsd_phantom_slice(ctx, decode_state, pic_param, i965_h264_context);
intel_batchbuffer_emit_mi_flush(batch);
intel_batchbuffer_end_atomic(batch);
intel_batchbuffer_flush(batch);
}
void
i965_avc_bsd_decode_init(VADriverContextP ctx, void *h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
struct i965_avc_bsd_context *i965_avc_bsd_context;
dri_bo *bo;
assert(i965_h264_context);
i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"bsd raw store",
0x3000, /* at least 11520 bytes to support 120 MBs per row */
64);
assert(bo);
i965_avc_bsd_context->bsd_raw_store.bo = bo;
dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"mpr row store",
0x2000, /* at least 7680 bytes to support 120 MBs per row */
64);
assert(bo);
i965_avc_bsd_context->mpr_row_store.bo = bo;
}
Bool
i965_avc_bsd_ternimate(struct i965_avc_bsd_context *i965_avc_bsd_context)
{
dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
return True;
}

View File

@@ -0,0 +1,50 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef __I965_AVC_BSD_H__
#define __I965_AVC_BSD_H__
#define DMV_SIZE 0x88000 /* 557056 bytes for a frame */
struct i965_avc_bsd_context
{
struct {
dri_bo *bo;
} bsd_raw_store;
struct {
dri_bo *bo;
} mpr_row_store;
};
void i965_avc_bsd_pipeline(VADriverContextP, struct decode_state *, void *h264_context);
void i965_avc_bsd_decode_init(VADriverContextP, void *h264_context);
Bool i965_avc_bsd_ternimate(struct i965_avc_bsd_context *);
#endif /* __I965_AVC_BSD_H__ */

View File

@@ -0,0 +1,461 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_avc_hw_scoreboard.h"
#include "i965_media_h264.h"
#include "i965_media.h"
/* On Ironlake */
#include "shaders/h264/mc/export.inc.gen5"
enum {
AVC_HW_SCOREBOARD = 0,
AVC_HW_SCOREBOARD_MBAFF
};
static unsigned long avc_hw_scoreboard_kernel_offset[] = {
SETHWSCOREBOARD_IP_GEN5 * INST_UNIT_GEN5,
SETHWSCOREBOARD_MBAFF_IP_GEN5 * INST_UNIT_GEN5
};
static unsigned int avc_hw_scoreboard_constants[] = {
0x08040201,
0x00000010,
0x08000210,
0x00000000,
0x08040201,
0x08040210,
0x01000010,
0x08040200
};
static void
i965_avc_hw_scoreboard_surface_state(struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
struct i965_surface_state *ss;
dri_bo *bo;
bo = avc_hw_scoreboard_context->surface.ss_bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
ss = bo->virtual;
memset(ss, 0, sizeof(*ss));
ss->ss0.surface_type = I965_SURFACE_BUFFER;
ss->ss1.base_addr = avc_hw_scoreboard_context->surface.s_bo->offset;
ss->ss2.width = ((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) & 0x7f);
ss->ss2.height = (((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) >> 7) & 0x1fff);
ss->ss3.depth = (((avc_hw_scoreboard_context->surface.total_mbs * MB_CMD_IN_OWS - 1) >> 20) & 0x7f);
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0,
offsetof(struct i965_surface_state, ss1),
avc_hw_scoreboard_context->surface.s_bo);
dri_bo_unmap(bo);
}
static void
i965_avc_hw_scoreboard_interface_descriptor_table(struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
struct i965_interface_descriptor *desc;
dri_bo *bo;
bo = avc_hw_scoreboard_context->idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
desc = bo->virtual;
memset(desc, 0, sizeof(*desc));
desc->desc0.grf_reg_blocks = 7;
desc->desc0.kernel_start_pointer = (avc_hw_scoreboard_context->hw_kernel.bo->offset +
avc_hw_scoreboard_context->hw_kernel.offset) >> 6; /* reloc */
desc->desc1.const_urb_entry_read_offset = 0;
desc->desc1.const_urb_entry_read_len = 1;
desc->desc3.binding_table_entry_count = 0;
desc->desc3.binding_table_pointer =
avc_hw_scoreboard_context->binding_table.bo->offset >> 5; /*reloc */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
desc->desc0.grf_reg_blocks + avc_hw_scoreboard_context->hw_kernel.offset,
offsetof(struct i965_interface_descriptor, desc0),
avc_hw_scoreboard_context->hw_kernel.bo);
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
desc->desc3.binding_table_entry_count,
offsetof(struct i965_interface_descriptor, desc3),
avc_hw_scoreboard_context->binding_table.bo);
dri_bo_unmap(bo);
}
static void
i965_avc_hw_scoreboard_binding_table(struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
unsigned int *binding_table;
dri_bo *bo = avc_hw_scoreboard_context->binding_table.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
binding_table = bo->virtual;
memset(binding_table, 0, bo->size);
binding_table[0] = avc_hw_scoreboard_context->surface.ss_bo->offset;
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
0,
avc_hw_scoreboard_context->surface.ss_bo);
dri_bo_unmap(bo);
}
static void
i965_avc_hw_scoreboard_vfe_state(struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
struct i965_vfe_state *vfe_state;
dri_bo *bo;
bo = avc_hw_scoreboard_context->vfe_state.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
vfe_state = bo->virtual;
memset(vfe_state, 0, sizeof(*vfe_state));
vfe_state->vfe1.max_threads = avc_hw_scoreboard_context->urb.num_vfe_entries - 1;
vfe_state->vfe1.urb_entry_alloc_size = avc_hw_scoreboard_context->urb.size_vfe_entry - 1;
vfe_state->vfe1.num_urb_entries = avc_hw_scoreboard_context->urb.num_vfe_entries;
vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
vfe_state->vfe1.children_present = 0;
vfe_state->vfe2.interface_descriptor_base =
avc_hw_scoreboard_context->idrt.bo->offset >> 4; /* reloc */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
offsetof(struct i965_vfe_state, vfe2),
avc_hw_scoreboard_context->idrt.bo);
dri_bo_unmap(bo);
}
static void
i965_avc_hw_scoreboard_upload_constants(struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
unsigned char *constant_buffer;
if (avc_hw_scoreboard_context->curbe.upload)
return;
dri_bo_map(avc_hw_scoreboard_context->curbe.bo, 1);
assert(avc_hw_scoreboard_context->curbe.bo->virtual);
constant_buffer = avc_hw_scoreboard_context->curbe.bo->virtual;
memcpy(constant_buffer, avc_hw_scoreboard_constants, sizeof(avc_hw_scoreboard_constants));
dri_bo_unmap(avc_hw_scoreboard_context->curbe.bo);
avc_hw_scoreboard_context->curbe.upload = 1;
}
static void
i965_avc_hw_scoreboard_states_setup(struct i965_h264_context *i965_h264_context)
{
i965_avc_hw_scoreboard_surface_state(i965_h264_context);
i965_avc_hw_scoreboard_binding_table(i965_h264_context);
i965_avc_hw_scoreboard_interface_descriptor_table(i965_h264_context);
i965_avc_hw_scoreboard_vfe_state(i965_h264_context);
i965_avc_hw_scoreboard_upload_constants(i965_h264_context);
}
static void
i965_avc_hw_scoreboard_pipeline_select(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 1);
OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
ADVANCE_BATCH(batch);
}
static void
i965_avc_hw_scoreboard_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = i965_h264_context->batch;
unsigned int vfe_fence, cs_fence;
vfe_fence = avc_hw_scoreboard_context->urb.cs_start;
cs_fence = i965->intel.device_info->urb_size;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
OUT_BATCH(batch, 0);
OUT_BATCH(batch,
(vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */
(cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */
ADVANCE_BATCH(batch);
}
static void
i965_avc_hw_scoreboard_state_base_address(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 8);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
ADVANCE_BATCH(batch);
}
static void
i965_avc_hw_scoreboard_state_pointers(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
OUT_BATCH(batch, 0);
OUT_RELOC(batch, avc_hw_scoreboard_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
ADVANCE_BATCH(batch);
}
static void
i965_avc_hw_scoreboard_cs_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
OUT_BATCH(batch,
((avc_hw_scoreboard_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */
(avc_hw_scoreboard_context->urb.num_cs_entries << 0)); /* Number of URB Entries */
ADVANCE_BATCH(batch);
}
static void
i965_avc_hw_scoreboard_constant_buffer(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
OUT_RELOC(batch, avc_hw_scoreboard_context->curbe.bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
avc_hw_scoreboard_context->urb.size_cs_entry - 1);
ADVANCE_BATCH(batch);
}
static void
i965_avc_hw_scoreboard_objects(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
int number_mb_cmds = 512;
int starting_mb_number = avc_hw_scoreboard_context->inline_data.starting_mb_number;
int i;
for (i = 0; i < avc_hw_scoreboard_context->inline_data.num_mb_cmds / 512; i++) {
BEGIN_BATCH(batch, 6);
OUT_BATCH(batch, CMD_MEDIA_OBJECT | 4);
OUT_BATCH(batch, 0); /* interface descriptor offset: 0 */
OUT_BATCH(batch, 0); /* no indirect data */
OUT_BATCH(batch, 0);
OUT_BATCH(batch, ((number_mb_cmds << 16) |
(starting_mb_number << 0)));
OUT_BATCH(batch, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs);
ADVANCE_BATCH(batch);
starting_mb_number += 512;
}
number_mb_cmds = avc_hw_scoreboard_context->inline_data.num_mb_cmds % 512;
if (number_mb_cmds) {
BEGIN_BATCH(batch, 6);
OUT_BATCH(batch, CMD_MEDIA_OBJECT | 4);
OUT_BATCH(batch, 0); /* interface descriptor offset: 0 */
OUT_BATCH(batch, 0); /* no indirect data */
OUT_BATCH(batch, 0);
OUT_BATCH(batch, ((number_mb_cmds << 16) |
(starting_mb_number << 0)));
OUT_BATCH(batch, avc_hw_scoreboard_context->inline_data.pic_width_in_mbs);
ADVANCE_BATCH(batch);
}
}
static void
i965_avc_hw_scoreboard_pipeline_setup(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
intel_batchbuffer_start_atomic(batch, 0x1000);
intel_batchbuffer_emit_mi_flush(batch);
i965_avc_hw_scoreboard_pipeline_select(ctx, i965_h264_context);
i965_avc_hw_scoreboard_state_base_address(ctx, i965_h264_context);
i965_avc_hw_scoreboard_state_pointers(ctx, i965_h264_context);
i965_avc_hw_scoreboard_urb_layout(ctx, i965_h264_context);
i965_avc_hw_scoreboard_cs_urb_layout(ctx, i965_h264_context);
i965_avc_hw_scoreboard_constant_buffer(ctx, i965_h264_context);
i965_avc_hw_scoreboard_objects(ctx, i965_h264_context);
intel_batchbuffer_end_atomic(batch);
}
void
i965_avc_hw_scoreboard(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context)
{
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
if (i965_h264_context->use_avc_hw_scoreboard) {
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
avc_hw_scoreboard_context->inline_data.num_mb_cmds = i965_h264_context->avc_it_command_mb_info.mbs;
avc_hw_scoreboard_context->inline_data.starting_mb_number = i965_h264_context->avc_it_command_mb_info.mbs;
avc_hw_scoreboard_context->inline_data.pic_width_in_mbs = i965_h264_context->picture.width_in_mbs;
avc_hw_scoreboard_context->surface.total_mbs = i965_h264_context->avc_it_command_mb_info.mbs * 2;
dri_bo_unreference(avc_hw_scoreboard_context->hw_kernel.bo);
avc_hw_scoreboard_context->hw_kernel.bo = i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo;
assert(avc_hw_scoreboard_context->hw_kernel.bo != NULL);
dri_bo_reference(avc_hw_scoreboard_context->hw_kernel.bo);
if (i965_h264_context->picture.mbaff_frame_flag)
avc_hw_scoreboard_context->hw_kernel.offset = avc_hw_scoreboard_kernel_offset[AVC_HW_SCOREBOARD_MBAFF];
else
avc_hw_scoreboard_context->hw_kernel.offset = avc_hw_scoreboard_kernel_offset[AVC_HW_SCOREBOARD];
i965_avc_hw_scoreboard_states_setup(i965_h264_context);
i965_avc_hw_scoreboard_pipeline_setup(ctx, i965_h264_context);
}
}
void
i965_avc_hw_scoreboard_decode_init(VADriverContextP ctx, void *h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
if (i965_h264_context->use_avc_hw_scoreboard) {
struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context = &i965_h264_context->avc_hw_scoreboard_context;
dri_bo *bo;
if (avc_hw_scoreboard_context->curbe.bo == NULL) {
bo = dri_bo_alloc(i965->intel.bufmgr,
"constant buffer",
4096, 64);
assert(bo);
avc_hw_scoreboard_context->curbe.bo = bo;
avc_hw_scoreboard_context->curbe.upload = 0;
}
dri_bo_unreference(avc_hw_scoreboard_context->surface.s_bo);
avc_hw_scoreboard_context->surface.s_bo = i965_h264_context->avc_it_command_mb_info.bo;
assert(avc_hw_scoreboard_context->surface.s_bo != NULL);
dri_bo_reference(avc_hw_scoreboard_context->surface.s_bo);
dri_bo_unreference(avc_hw_scoreboard_context->surface.ss_bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state",
sizeof(struct i965_surface_state), 32);
assert(bo);
avc_hw_scoreboard_context->surface.ss_bo = bo;
dri_bo_unreference(avc_hw_scoreboard_context->binding_table.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"binding table",
MAX_MEDIA_SURFACES * sizeof(unsigned int), 32);
assert(bo);
avc_hw_scoreboard_context->binding_table.bo = bo;
dri_bo_unreference(avc_hw_scoreboard_context->idrt.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"interface discriptor",
MAX_INTERFACE_DESC * sizeof(struct i965_interface_descriptor), 16);
assert(bo);
avc_hw_scoreboard_context->idrt.bo = bo;
dri_bo_unreference(avc_hw_scoreboard_context->vfe_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"vfe state",
sizeof(struct i965_vfe_state), 32);
assert(bo);
avc_hw_scoreboard_context->vfe_state.bo = bo;
avc_hw_scoreboard_context->urb.num_vfe_entries = 32;
avc_hw_scoreboard_context->urb.size_vfe_entry = 2;
avc_hw_scoreboard_context->urb.num_cs_entries = 1;
avc_hw_scoreboard_context->urb.size_cs_entry = 1;
avc_hw_scoreboard_context->urb.vfe_start = 0;
avc_hw_scoreboard_context->urb.cs_start = avc_hw_scoreboard_context->urb.vfe_start +
avc_hw_scoreboard_context->urb.num_vfe_entries * avc_hw_scoreboard_context->urb.size_vfe_entry;
assert(avc_hw_scoreboard_context->urb.cs_start +
avc_hw_scoreboard_context->urb.num_cs_entries * avc_hw_scoreboard_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
}
}
Bool
i965_avc_hw_scoreboard_ternimate(struct i965_avc_hw_scoreboard_context *avc_hw_scoreboard_context)
{
dri_bo_unreference(avc_hw_scoreboard_context->curbe.bo);
avc_hw_scoreboard_context->curbe.bo = NULL;
dri_bo_unreference(avc_hw_scoreboard_context->surface.ss_bo);
avc_hw_scoreboard_context->surface.ss_bo = NULL;
dri_bo_unreference(avc_hw_scoreboard_context->surface.s_bo);
avc_hw_scoreboard_context->surface.s_bo = NULL;
dri_bo_unreference(avc_hw_scoreboard_context->binding_table.bo);
avc_hw_scoreboard_context->binding_table.bo = NULL;
dri_bo_unreference(avc_hw_scoreboard_context->idrt.bo);
avc_hw_scoreboard_context->idrt.bo = NULL;
dri_bo_unreference(avc_hw_scoreboard_context->vfe_state.bo);
avc_hw_scoreboard_context->vfe_state.bo = NULL;
dri_bo_unreference(avc_hw_scoreboard_context->hw_kernel.bo);
avc_hw_scoreboard_context->hw_kernel.bo = NULL;
return True;
}

View File

@@ -0,0 +1,85 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef __I965_AVC_HW_SCOREBOARD_H__
#define __I965_AVC_HW_SCOREBOARD_H__
struct i965_avc_hw_scoreboard_context
{
struct {
unsigned int num_mb_cmds;
unsigned int starting_mb_number;
unsigned int pic_width_in_mbs;
} inline_data;
struct {
dri_bo *ss_bo;
dri_bo *s_bo;
unsigned int total_mbs;
} surface;
struct {
dri_bo *bo;
} binding_table;
struct {
dri_bo *bo;
} idrt;
struct {
dri_bo *bo;
} vfe_state;
struct {
dri_bo *bo;
int upload;
} curbe;
struct {
dri_bo *bo;
unsigned long offset;
} hw_kernel;
struct {
unsigned int vfe_start;
unsigned int cs_start;
unsigned int num_vfe_entries;
unsigned int num_cs_entries;
unsigned int size_vfe_entry;
unsigned int size_cs_entry;
} urb;
};
void i965_avc_hw_scoreboard(VADriverContextP, struct decode_state *, void *h264_context);
void i965_avc_hw_scoreboard_decode_init(VADriverContextP, void *h264_context);
Bool i965_avc_hw_scoreboard_ternimate(struct i965_avc_hw_scoreboard_context *);
#endif /* __I965_AVC_HW_SCOREBOARD_H__ */

View File

@@ -0,0 +1,650 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_avc_ildb.h"
#include "i965_media_h264.h"
#include "i965_media.h"
/* On Cantiga */
#include "shaders/h264/mc/export.inc"
/* On Ironlake */
#include "shaders/h264/mc/export.inc.gen5"
#define PICTURE_FRAME 0
#define PICTURE_FIELD 1
#define PICTURE_MBAFF 2
enum {
AVC_ILDB_ROOT_Y_ILDB_FRAME,
AVC_ILDB_CHILD_Y_ILDB_FRAME,
AVC_ILDB_ROOT_UV_ILDB_FRAME,
AVC_ILDB_CHILD_UV_ILDB_FRAME,
AVC_ILDB_ROOT_Y_ILDB_FIELD,
AVC_ILDB_CHILD_Y_ILDB_FIELD,
AVC_ILDB_ROOT_UV_ILDB_FIELD,
AVC_ILDB_CHILD_UV_ILDB_FIELD,
AVC_ILDB_ROOT_Y_ILDB_MBAFF,
AVC_ILDB_CHILD_Y_ILDB_MBAFF,
AVC_ILDB_ROOT_UV_ILDB_MBAFF,
AVC_ILDB_CHILD_UV_ILDB_MBAFF
};
static unsigned long avc_ildb_kernel_offset_gen4[] = {
AVC_ILDB_ROOT_Y_ILDB_FRAME_IP * INST_UNIT_GEN4,
AVC_ILDB_CHILD_Y_ILDB_FRAME_IP * INST_UNIT_GEN4,
AVC_ILDB_ROOT_UV_ILDB_FRAME_IP * INST_UNIT_GEN4,
AVC_ILDB_CHILD_UV_ILDB_FRAME_IP * INST_UNIT_GEN4,
AVC_ILDB_ROOT_Y_ILDB_FIELD_IP * INST_UNIT_GEN4,
AVC_ILDB_CHILD_Y_ILDB_FIELD_IP * INST_UNIT_GEN4,
AVC_ILDB_ROOT_UV_ILDB_FIELD_IP * INST_UNIT_GEN4,
AVC_ILDB_CHILD_UV_ILDB_FIELD_IP * INST_UNIT_GEN4,
AVC_ILDB_ROOT_Y_ILDB_MBAFF_IP * INST_UNIT_GEN4,
AVC_ILDB_CHILD_Y_ILDB_MBAFF_IP * INST_UNIT_GEN4,
AVC_ILDB_ROOT_UV_ILDB_MBAFF_IP * INST_UNIT_GEN4,
AVC_ILDB_CHILD_UV_ILDB_MBAFF_IP * INST_UNIT_GEN4
};
static unsigned long avc_ildb_kernel_offset_gen5[] = {
AVC_ILDB_ROOT_Y_ILDB_FRAME_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_CHILD_Y_ILDB_FRAME_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_ROOT_UV_ILDB_FRAME_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_CHILD_UV_ILDB_FRAME_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_ROOT_Y_ILDB_FIELD_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_CHILD_Y_ILDB_FIELD_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_ROOT_UV_ILDB_FIELD_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_CHILD_UV_ILDB_FIELD_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_ROOT_Y_ILDB_MBAFF_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_CHILD_Y_ILDB_MBAFF_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_ROOT_UV_ILDB_MBAFF_IP_GEN5 * INST_UNIT_GEN5,
AVC_ILDB_CHILD_UV_ILDB_MBAFF_IP_GEN5 * INST_UNIT_GEN5
};
struct avc_ildb_root_input
{
unsigned int blocks_per_row : 16;
unsigned int blocks_per_column : 16;
unsigned int picture_type : 16;
unsigned int max_concurrent_threads : 16;
unsigned int debug_field : 16;
unsigned int mbaff_frame_flag : 1;
unsigned int bottom_field_flag : 1;
unsigned int control_data_expansion_flag : 1;
unsigned int chroma_format : 1;
unsigned int pad0 : 12;
unsigned int ramp_constant_0;
unsigned int ramp_constant_1;
int constant_0 : 8;
int constant_1 : 8;
int pad1 : 16;
unsigned int pad2;
unsigned int pad3;
};
#define NUM_AVC_ILDB_INTERFACES ARRAY_ELEMS(avc_ildb_kernel_offset_gen4)
static unsigned long *avc_ildb_kernel_offset = NULL;
static void
i965_avc_ildb_surface_state(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_h264_context *i965_h264_context)
{
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
struct i965_surface_state *ss;
struct object_surface *obj_surface;
VAPictureParameterBufferH264 *pic_param;
VAPictureH264 *va_pic;
dri_bo *bo;
int i;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
va_pic = &pic_param->CurrPic;
obj_surface = decode_state->render_object;
avc_ildb_context->surface[SURFACE_EDGE_CONTROL_DATA].s_bo = i965_h264_context->avc_ildb_data.bo;
dri_bo_reference(avc_ildb_context->surface[SURFACE_EDGE_CONTROL_DATA].s_bo);
avc_ildb_context->surface[SURFACE_EDGE_CONTROL_DATA].offset = 0;
avc_ildb_context->surface[SURFACE_EDGE_CONTROL_DATA].surface_type = I965_SURFACE_BUFFER;
avc_ildb_context->surface[SURFACE_EDGE_CONTROL_DATA].width = ((avc_ildb_context->mbs_per_picture * EDGE_CONTROL_DATA_IN_DWS - 1) & 0x7f);
avc_ildb_context->surface[SURFACE_EDGE_CONTROL_DATA].height = (((avc_ildb_context->mbs_per_picture * EDGE_CONTROL_DATA_IN_DWS - 1) >> 7) & 0x1fff);
avc_ildb_context->surface[SURFACE_EDGE_CONTROL_DATA].depth = (((avc_ildb_context->mbs_per_picture * EDGE_CONTROL_DATA_IN_DWS - 1) >> 20) & 0x7f);
avc_ildb_context->surface[SURFACE_EDGE_CONTROL_DATA].pitch = EDGE_CONTROL_DATA_IN_BTYES - 1;
avc_ildb_context->surface[SURFACE_EDGE_CONTROL_DATA].is_target = 0;
avc_ildb_context->surface[SURFACE_SRC_Y].s_bo = obj_surface->bo;
dri_bo_reference(avc_ildb_context->surface[SURFACE_SRC_Y].s_bo);
avc_ildb_context->surface[SURFACE_SRC_Y].offset = 0;
avc_ildb_context->surface[SURFACE_SRC_Y].surface_type = I965_SURFACE_2D;
avc_ildb_context->surface[SURFACE_SRC_Y].format = I965_SURFACEFORMAT_R8_SINT;
avc_ildb_context->surface[SURFACE_SRC_Y].width = obj_surface->width / 4 - 1;
avc_ildb_context->surface[SURFACE_SRC_Y].height = obj_surface->height - 1;
avc_ildb_context->surface[SURFACE_SRC_Y].depth = 0;
avc_ildb_context->surface[SURFACE_SRC_Y].pitch = obj_surface->width - 1;
avc_ildb_context->surface[SURFACE_SRC_Y].vert_line_stride = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
avc_ildb_context->surface[SURFACE_SRC_Y].vert_line_stride_ofs = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
avc_ildb_context->surface[SURFACE_SRC_Y].is_target = 0;
avc_ildb_context->surface[SURFACE_SRC_UV].s_bo = obj_surface->bo;
dri_bo_reference(avc_ildb_context->surface[SURFACE_SRC_UV].s_bo);
avc_ildb_context->surface[SURFACE_SRC_UV].offset = obj_surface->width * obj_surface->height;
avc_ildb_context->surface[SURFACE_SRC_UV].surface_type = I965_SURFACE_2D;
avc_ildb_context->surface[SURFACE_SRC_UV].format = I965_SURFACEFORMAT_R8G8_SINT;
avc_ildb_context->surface[SURFACE_SRC_UV].width = obj_surface->width / 4 - 1;
avc_ildb_context->surface[SURFACE_SRC_UV].height = obj_surface->height / 2 - 1;
avc_ildb_context->surface[SURFACE_SRC_UV].depth = 0;
avc_ildb_context->surface[SURFACE_SRC_UV].pitch = obj_surface->width - 1;
avc_ildb_context->surface[SURFACE_SRC_UV].vert_line_stride = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
avc_ildb_context->surface[SURFACE_SRC_UV].vert_line_stride_ofs = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
avc_ildb_context->surface[SURFACE_SRC_UV].is_target = 0;
avc_ildb_context->surface[SURFACE_DEST_Y].s_bo = obj_surface->bo;
dri_bo_reference(avc_ildb_context->surface[SURFACE_DEST_Y].s_bo);
avc_ildb_context->surface[SURFACE_DEST_Y].offset = 0;
avc_ildb_context->surface[SURFACE_DEST_Y].surface_type = I965_SURFACE_2D;
avc_ildb_context->surface[SURFACE_DEST_Y].format = I965_SURFACEFORMAT_R8_SINT;
avc_ildb_context->surface[SURFACE_DEST_Y].width = obj_surface->width / 4 - 1;
avc_ildb_context->surface[SURFACE_DEST_Y].height = obj_surface->height - 1;
avc_ildb_context->surface[SURFACE_DEST_Y].depth = 0;
avc_ildb_context->surface[SURFACE_DEST_Y].pitch = obj_surface->width - 1;
avc_ildb_context->surface[SURFACE_DEST_Y].vert_line_stride = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
avc_ildb_context->surface[SURFACE_DEST_Y].vert_line_stride_ofs = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
avc_ildb_context->surface[SURFACE_DEST_Y].is_target = 1;
avc_ildb_context->surface[SURFACE_DEST_UV].s_bo = obj_surface->bo;
dri_bo_reference(avc_ildb_context->surface[SURFACE_DEST_UV].s_bo);
avc_ildb_context->surface[SURFACE_DEST_UV].offset = obj_surface->width * obj_surface->height;
avc_ildb_context->surface[SURFACE_DEST_UV].surface_type = I965_SURFACE_2D;
avc_ildb_context->surface[SURFACE_DEST_UV].format = I965_SURFACEFORMAT_R8G8_SINT;
avc_ildb_context->surface[SURFACE_DEST_UV].width = obj_surface->width / 4 - 1;
avc_ildb_context->surface[SURFACE_DEST_UV].height = obj_surface->height / 2 - 1;
avc_ildb_context->surface[SURFACE_DEST_UV].depth = 0;
avc_ildb_context->surface[SURFACE_DEST_UV].pitch = obj_surface->width - 1;
avc_ildb_context->surface[SURFACE_DEST_UV].vert_line_stride = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
avc_ildb_context->surface[SURFACE_DEST_UV].vert_line_stride_ofs = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
avc_ildb_context->surface[SURFACE_DEST_UV].is_target = 1;
for (i = 0; i < NUM_AVC_ILDB_SURFACES; i++) {
bo = avc_ildb_context->surface[i].ss_bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
ss = bo->virtual;
memset(ss, 0, sizeof(*ss));
ss->ss0.surface_type = avc_ildb_context->surface[i].surface_type;
ss->ss0.surface_format = avc_ildb_context->surface[i].format;
ss->ss0.vert_line_stride = avc_ildb_context->surface[i].vert_line_stride;
ss->ss0.vert_line_stride_ofs = avc_ildb_context->surface[i].vert_line_stride_ofs;
ss->ss1.base_addr = avc_ildb_context->surface[i].s_bo->offset + avc_ildb_context->surface[i].offset;
ss->ss2.width = avc_ildb_context->surface[i].width;
ss->ss2.height = avc_ildb_context->surface[i].height;
ss->ss3.depth = avc_ildb_context->surface[i].depth;
ss->ss3.pitch = avc_ildb_context->surface[i].pitch;
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_RENDER,
avc_ildb_context->surface[i].is_target ? I915_GEM_DOMAIN_RENDER : 0,
avc_ildb_context->surface[i].offset,
offsetof(struct i965_surface_state, ss1),
avc_ildb_context->surface[i].s_bo);
dri_bo_unmap(bo);
}
}
static void
i965_avc_ildb_binding_table(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
unsigned int *binding_table;
dri_bo *bo = avc_ildb_context->binding_table.bo;
int i;
dri_bo_map(bo, 1);
assert(bo->virtual);
binding_table = bo->virtual;
memset(binding_table, 0, bo->size);
for (i = 0; i < NUM_AVC_ILDB_SURFACES; i++) {
binding_table[i] = avc_ildb_context->surface[i].ss_bo->offset;
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
i * sizeof(*binding_table),
avc_ildb_context->surface[i].ss_bo);
}
dri_bo_unmap(bo);
}
static void
i965_avc_ildb_interface_descriptor_table(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
struct i965_interface_descriptor *desc;
dri_bo *bo;
int i;
bo = avc_ildb_context->idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
desc = bo->virtual;
for (i = 0; i < NUM_AVC_ILDB_INTERFACES; i++) {
int kernel_offset = avc_ildb_kernel_offset[i];
memset(desc, 0, sizeof(*desc));
desc->desc0.grf_reg_blocks = 7;
desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
desc->desc1.const_urb_entry_read_offset = 0;
desc->desc1.const_urb_entry_read_len = ((i == AVC_ILDB_ROOT_Y_ILDB_FRAME ||
i == AVC_ILDB_ROOT_Y_ILDB_FIELD ||
i == AVC_ILDB_ROOT_Y_ILDB_MBAFF) ? 1 : 0);
desc->desc3.binding_table_entry_count = 0;
desc->desc3.binding_table_pointer =
avc_ildb_context->binding_table.bo->offset >> 5; /*reloc */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
desc->desc0.grf_reg_blocks + kernel_offset,
i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo);
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
desc->desc3.binding_table_entry_count,
i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
avc_ildb_context->binding_table.bo);
desc++;
}
dri_bo_unmap(bo);
}
static void
i965_avc_ildb_vfe_state(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
struct i965_vfe_state *vfe_state;
dri_bo *bo;
bo = avc_ildb_context->vfe_state.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
vfe_state = bo->virtual;
memset(vfe_state, 0, sizeof(*vfe_state));
vfe_state->vfe1.max_threads = 0;
vfe_state->vfe1.urb_entry_alloc_size = avc_ildb_context->urb.size_vfe_entry - 1;
vfe_state->vfe1.num_urb_entries = avc_ildb_context->urb.num_vfe_entries;
vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
vfe_state->vfe1.children_present = 1;
vfe_state->vfe2.interface_descriptor_base =
avc_ildb_context->idrt.bo->offset >> 4; /* reloc */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
offsetof(struct i965_vfe_state, vfe2),
avc_ildb_context->idrt.bo);
dri_bo_unmap(bo);
}
static void
i965_avc_ildb_upload_constants(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
VAPictureParameterBufferH264 *pic_param;
struct avc_ildb_root_input *root_input;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
dri_bo_map(avc_ildb_context->curbe.bo, 1);
assert(avc_ildb_context->curbe.bo->virtual);
root_input = avc_ildb_context->curbe.bo->virtual;
if (IS_IRONLAKE(i965->intel.device_info)) {
root_input->max_concurrent_threads = 76; /* 72 - 2 + 8 - 2 */
} else {
root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 - 2 */
}
if (pic_param->pic_fields.bits.field_pic_flag)
root_input->picture_type = PICTURE_FIELD;
else {
if (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag)
root_input->picture_type = PICTURE_MBAFF;
else
root_input->picture_type = PICTURE_FRAME;
}
avc_ildb_context->picture_type = root_input->picture_type;
root_input->blocks_per_row = pic_param->picture_width_in_mbs_minus1 + 1;
root_input->blocks_per_column = (pic_param->picture_height_in_mbs_minus1 + 1) /
(1 + (root_input->picture_type != PICTURE_FRAME));
avc_ildb_context->mbs_per_picture = (pic_param->picture_width_in_mbs_minus1 + 1) *
(pic_param->picture_height_in_mbs_minus1 + 1);
root_input->mbaff_frame_flag = (root_input->picture_type == PICTURE_MBAFF);
root_input->bottom_field_flag = !!(pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD);
root_input->control_data_expansion_flag = 1; /* Always 1 on G4x+ */
root_input->chroma_format = (pic_param->seq_fields.bits.chroma_format_idc != 1); /* 0=4:0:0, 1=4:2:0 */
root_input->ramp_constant_0 = 0x03020100;
root_input->ramp_constant_1 = 0x07060504;
root_input->constant_0 = -2;
root_input->constant_1 = 1;
dri_bo_unmap(avc_ildb_context->curbe.bo);
}
static void
i965_avc_ildb_states_setup(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_h264_context *i965_h264_context)
{
i965_avc_ildb_surface_state(ctx, decode_state, i965_h264_context);
i965_avc_ildb_binding_table(ctx, i965_h264_context);
i965_avc_ildb_interface_descriptor_table(ctx, i965_h264_context);
i965_avc_ildb_vfe_state(ctx, i965_h264_context);
i965_avc_ildb_upload_constants(ctx, decode_state, i965_h264_context);
}
static void
i965_avc_ildb_pipeline_select(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 1);
OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
ADVANCE_BATCH(batch);
}
static void
i965_avc_ildb_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
unsigned int vfe_fence, cs_fence;
vfe_fence = avc_ildb_context->urb.cs_start;
cs_fence = i965->intel.device_info->urb_size;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
OUT_BATCH(batch, 0);
OUT_BATCH(batch,
(vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */
(cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */
ADVANCE_BATCH(batch);
}
static void
i965_avc_ildb_state_base_address(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = i965_h264_context->batch;
if (IS_IRONLAKE(i965->intel.device_info)) {
BEGIN_BATCH(batch, 8);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
ADVANCE_BATCH(batch);
} else {
BEGIN_BATCH(batch, 6);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
ADVANCE_BATCH(batch);
}
}
static void
i965_avc_ildb_state_pointers(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
OUT_BATCH(batch, 0);
OUT_RELOC(batch, avc_ildb_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
ADVANCE_BATCH(batch);
}
static void
i965_avc_ildb_cs_urb_layout(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
OUT_BATCH(batch,
((avc_ildb_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */
(avc_ildb_context->urb.num_cs_entries << 0)); /* Number of URB Entries */
ADVANCE_BATCH(batch);
}
static void
i965_avc_ildb_constant_buffer(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
OUT_RELOC(batch, avc_ildb_context->curbe.bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
avc_ildb_context->urb.size_cs_entry - 1);
ADVANCE_BATCH(batch);
}
static void
i965_avc_ildb_objects(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
BEGIN_BATCH(batch, 6);
OUT_BATCH(batch, CMD_MEDIA_OBJECT | 4);
switch (avc_ildb_context->picture_type) {
case PICTURE_FRAME:
OUT_BATCH(batch, AVC_ILDB_ROOT_Y_ILDB_FRAME);
break;
case PICTURE_FIELD:
OUT_BATCH(batch, AVC_ILDB_ROOT_Y_ILDB_FIELD);
break;
case PICTURE_MBAFF:
OUT_BATCH(batch, AVC_ILDB_ROOT_Y_ILDB_MBAFF);
break;
default:
assert(0);
OUT_BATCH(batch, 0);
break;
}
OUT_BATCH(batch, 0); /* no indirect data */
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
ADVANCE_BATCH(batch);
}
static void
i965_avc_ildb_pipeline_setup(VADriverContextP ctx, struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
intel_batchbuffer_emit_mi_flush(batch);
i965_avc_ildb_pipeline_select(ctx, i965_h264_context);
i965_avc_ildb_state_base_address(ctx, i965_h264_context);
i965_avc_ildb_state_pointers(ctx, i965_h264_context);
i965_avc_ildb_urb_layout(ctx, i965_h264_context);
i965_avc_ildb_cs_urb_layout(ctx, i965_h264_context);
i965_avc_ildb_constant_buffer(ctx, i965_h264_context);
i965_avc_ildb_objects(ctx, i965_h264_context);
}
void
i965_avc_ildb(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context)
{
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
if (i965_h264_context->enable_avc_ildb) {
i965_avc_ildb_states_setup(ctx, decode_state, i965_h264_context);
i965_avc_ildb_pipeline_setup(ctx, i965_h264_context);
}
}
void
i965_avc_ildb_decode_init(VADriverContextP ctx, void *h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
struct i965_avc_ildb_context *avc_ildb_context = &i965_h264_context->avc_ildb_context;;
dri_bo *bo;
int i;
dri_bo_unreference(avc_ildb_context->curbe.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"constant buffer",
4096, 64);
assert(bo);
avc_ildb_context->curbe.bo = bo;
dri_bo_unreference(avc_ildb_context->binding_table.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"binding table",
NUM_AVC_ILDB_SURFACES * sizeof(unsigned int), 32);
assert(bo);
avc_ildb_context->binding_table.bo = bo;
dri_bo_unreference(avc_ildb_context->idrt.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"interface discriptor",
NUM_AVC_ILDB_INTERFACES * sizeof(struct i965_interface_descriptor), 16);
assert(bo);
avc_ildb_context->idrt.bo = bo;
dri_bo_unreference(avc_ildb_context->vfe_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"vfe state",
sizeof(struct i965_vfe_state), 32);
assert(bo);
avc_ildb_context->vfe_state.bo = bo;
avc_ildb_context->urb.num_vfe_entries = 1;
avc_ildb_context->urb.size_vfe_entry = 640;
avc_ildb_context->urb.num_cs_entries = 1;
avc_ildb_context->urb.size_cs_entry = 1;
avc_ildb_context->urb.vfe_start = 0;
avc_ildb_context->urb.cs_start = avc_ildb_context->urb.vfe_start +
avc_ildb_context->urb.num_vfe_entries * avc_ildb_context->urb.size_vfe_entry;
assert(avc_ildb_context->urb.cs_start +
avc_ildb_context->urb.num_cs_entries * avc_ildb_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
for (i = 0; i < NUM_AVC_ILDB_SURFACES; i++) {
dri_bo_unreference(avc_ildb_context->surface[i].s_bo);
avc_ildb_context->surface[i].s_bo = NULL;
dri_bo_unreference(avc_ildb_context->surface[i].ss_bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state",
sizeof(struct i965_surface_state), 32);
assert(bo);
avc_ildb_context->surface[i].ss_bo = bo;
}
/* kernel offset */
assert(NUM_AVC_ILDB_INTERFACES == ARRAY_ELEMS(avc_ildb_kernel_offset_gen5));
if (IS_IRONLAKE(i965->intel.device_info)) {
avc_ildb_kernel_offset = avc_ildb_kernel_offset_gen5;
} else {
avc_ildb_kernel_offset = avc_ildb_kernel_offset_gen4;
}
}
Bool
i965_avc_ildb_ternimate(struct i965_avc_ildb_context *avc_ildb_context)
{
int i;
dri_bo_unreference(avc_ildb_context->curbe.bo);
avc_ildb_context->curbe.bo = NULL;
dri_bo_unreference(avc_ildb_context->binding_table.bo);
avc_ildb_context->binding_table.bo = NULL;
dri_bo_unreference(avc_ildb_context->idrt.bo);
avc_ildb_context->idrt.bo = NULL;
dri_bo_unreference(avc_ildb_context->vfe_state.bo);
avc_ildb_context->vfe_state.bo = NULL;
for (i = 0; i < NUM_AVC_ILDB_SURFACES; i++) {
dri_bo_unreference(avc_ildb_context->surface[i].ss_bo);
avc_ildb_context->surface[i].ss_bo = NULL;
dri_bo_unreference(avc_ildb_context->surface[i].s_bo);
avc_ildb_context->surface[i].s_bo = NULL;
}
return True;
}

View File

@@ -0,0 +1,96 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef __I965_AVC_ILDB_H__
#define __I965_AVC_ILDB_H__
#define SURFACE_EDGE_CONTROL_DATA 0
#define SURFACE_SRC_Y 1
#define SURFACE_SRC_UV 2
#define SURFACE_DEST_Y 3
#define SURFACE_DEST_UV 4
#define NUM_AVC_ILDB_SURFACES 5
#define EDGE_CONTROL_DATA_IN_DWS 16
#define EDGE_CONTROL_DATA_IN_BTYES 64
struct i965_avc_ildb_context
{
struct {
dri_bo *bo;
} curbe;
struct {
dri_bo *ss_bo;
dri_bo *s_bo;
unsigned long offset;
int surface_type;
int width;
int height;
int depth;
int pitch;
int format;
int vert_line_stride;
int vert_line_stride_ofs;
int is_target;
} surface[NUM_AVC_ILDB_SURFACES];
struct {
dri_bo *bo;
} binding_table;
struct {
dri_bo *bo;
} idrt;
struct {
dri_bo *bo;
} vfe_state;
struct {
unsigned int vfe_start;
unsigned int cs_start;
unsigned int num_vfe_entries;
unsigned int num_cs_entries;
unsigned int size_vfe_entry;
unsigned int size_cs_entry;
} urb;
int picture_type;
int mbs_per_picture;
};
void i965_avc_ildb(VADriverContextP, struct decode_state *, void *h264_context);
void i965_avc_ildb_decode_init(VADriverContextP, void *h264_context);
Bool i965_avc_ildb_ternimate(struct i965_avc_ildb_context *);
#endif /* __I965_AVC_ILDB_H__ */

View File

@@ -0,0 +1,70 @@
/*
* Copyright (C) 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef I965_DECODER_H
#define I965_DECODER_H
#include <stdint.h>
#include <stdlib.h>
#include <va/va.h>
#include <va/va_dec_vp8.h>
#include <intel_bufmgr.h>
#define MAX_GEN_REFERENCE_FRAMES 16
typedef struct gen_frame_store GenFrameStore;
struct gen_frame_store {
VASurfaceID surface_id;
int frame_store_id;
struct object_surface *obj_surface;
/* This represents the time when this frame store was last used to
hold a reference frame. This is not connected to a presentation
timestamp (PTS), and this is not a common decoding time stamp
(DTS) either. It serves the purpose of tracking retired
reference frame candidates.
This is only used for H.264 decoding on platforms before Haswell */
uint64_t ref_age;
};
typedef struct gen_frame_store_context GenFrameStoreContext;
struct gen_frame_store_context {
uint64_t age;
int prev_poc;
};
typedef struct gen_buffer GenBuffer;
struct gen_buffer {
dri_bo *bo;
int valid;
};
struct hw_context *
gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config);
extern struct hw_context *
gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config);
#endif /* I965_DECODER_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,157 @@
/*
* Copyright (C) 2006-2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef I965_DECODER_UTILS_H
#define I965_DECODER_UTILS_H
#include "i965_decoder.h"
#include "intel_batchbuffer.h"
struct decode_state;
int
mpeg2_wa_slice_vertical_position(
struct decode_state *decode_state,
VAPictureParameterBufferMPEG2 *pic_param
);
void
mpeg2_set_reference_surfaces(
VADriverContextP ctx,
GenFrameStore ref_frames[MAX_GEN_REFERENCE_FRAMES],
struct decode_state *decode_state,
VAPictureParameterBufferMPEG2 *pic_param
);
VAStatus
avc_ensure_surface_bo(
VADriverContextP ctx,
struct decode_state *decode_state,
struct object_surface *obj_surface,
const VAPictureParameterBufferH264 *pic_param
);
void
avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
int
avc_get_picture_id(struct object_surface *obj_surface);
VAPictureH264 *
avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count);
unsigned int
avc_get_first_mb_bit_offset(
dri_bo *slice_data_bo,
VASliceParameterBufferH264 *slice_param,
unsigned int mode_flag
);
unsigned int
avc_get_first_mb_bit_offset_with_epb(
dri_bo *slice_data_bo,
VASliceParameterBufferH264 *slice_param,
unsigned int mode_flag
);
void
gen5_fill_avc_ref_idx_state(
uint8_t state[32],
const VAPictureH264 ref_list[32],
unsigned int ref_list_count,
const GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
);
void
gen6_send_avc_ref_idx_state(
struct intel_batchbuffer *batch,
const VASliceParameterBufferH264 *slice_param,
const GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
);
void
gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *next_slice_param,
struct intel_batchbuffer *batch
);
VAStatus
intel_decoder_sanity_check_input(VADriverContextP ctx,
VAProfile profile,
struct decode_state *decode_state);
void
intel_update_avc_frame_store_index(
VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES],
GenFrameStoreContext *fs_ctx
);
void
gen75_update_avc_frame_store_index(
VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
);
bool
gen75_fill_avc_picid_list(
uint16_t pic_ids[16],
GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
);
bool
gen75_send_avc_picid_state(
struct intel_batchbuffer *batch,
GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
);
void
intel_update_vc1_frame_store_index(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferVC1 *pic_param,
GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]);
VASliceParameterBufferMPEG2 *
intel_mpeg2_find_next_slice(struct decode_state *decode_state,
VAPictureParameterBufferMPEG2 *pic_param,
VASliceParameterBufferMPEG2 *slice_param,
int *group_idx,
int *element_idx);
void
intel_update_vp8_frame_store_index(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferVP8 *pic_param,
GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]);
bool
intel_ensure_vp8_segmentation_buffer(VADriverContextP ctx, GenBuffer *buf,
unsigned int mb_width, unsigned int mb_height);
#endif /* I965_DECODER_UTILS_H */

View File

@@ -0,0 +1,824 @@
#ifndef _I965_DEFINES_H_
#define _I965_DEFINES_H_
#define CMD(pipeline,op,sub_op) ((3 << 29) | \
((pipeline) << 27) | \
((op) << 24) | \
((sub_op) << 16))
#define CMD_URB_FENCE CMD(0, 0, 0)
#define CMD_CS_URB_STATE CMD(0, 0, 1)
#define CMD_CONSTANT_BUFFER CMD(0, 0, 2)
#define CMD_STATE_PREFETCH CMD(0, 0, 3)
#define CMD_STATE_BASE_ADDRESS CMD(0, 1, 1)
#define CMD_STATE_SIP CMD(0, 1, 2)
#define CMD_PIPELINE_SELECT CMD(1, 1, 4)
#define CMD_SAMPLER_PALETTE_LOAD CMD(3, 1, 2)
#define CMD_MEDIA_STATE_POINTERS CMD(2, 0, 0)
#define CMD_MEDIA_VFE_STATE CMD(2, 0, 0)
#define CMD_MEDIA_CURBE_LOAD CMD(2, 0, 1)
#define CMD_MEDIA_INTERFACE_LOAD CMD(2, 0, 2)
#define CMD_MEDIA_OBJECT CMD(2, 1, 0)
#define CMD_MEDIA_OBJECT_EX CMD(2, 1, 1)
#define CMD_AVC_BSD_IMG_STATE CMD(2, 4, 0)
#define CMD_AVC_BSD_QM_STATE CMD(2, 4, 1)
#define CMD_AVC_BSD_SLICE_STATE CMD(2, 4, 2)
#define CMD_AVC_BSD_BUF_BASE_STATE CMD(2, 4, 3)
#define CMD_BSD_IND_OBJ_BASE_ADDR CMD(2, 4, 4)
#define CMD_AVC_BSD_OBJECT CMD(2, 4, 8)
#define CMD_MEDIA_VFE_STATE CMD(2, 0, 0)
#define CMD_MEDIA_CURBE_LOAD CMD(2, 0, 1)
#define CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD CMD(2, 0, 2)
#define CMD_MEDIA_GATEWAY_STATE CMD(2, 0, 3)
#define CMD_MEDIA_STATE_FLUSH CMD(2, 0, 4)
#define CMD_MEDIA_OBJECT_WALKER CMD(2, 1, 3)
#define CMD_PIPELINED_POINTERS CMD(3, 0, 0)
#define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1)
# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
#define CMD_VERTEX_BUFFERS CMD(3, 0, 8)
#define CMD_VERTEX_ELEMENTS CMD(3, 0, 9)
#define CMD_DRAWING_RECTANGLE CMD(3, 1, 0)
#define CMD_CONSTANT_COLOR CMD(3, 1, 1)
#define CMD_3DPRIMITIVE CMD(3, 3, 0)
#define CMD_DEPTH_BUFFER CMD(3, 1, 5)
# define CMD_DEPTH_BUFFER_TYPE_SHIFT 29
# define CMD_DEPTH_BUFFER_FORMAT_SHIFT 18
#define CMD_CLEAR_PARAMS CMD(3, 1, 0x10)
/* DW1 */
# define CMD_CLEAR_PARAMS_DEPTH_CLEAR_VALID (1 << 15)
/* for GEN6+ */
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS CMD(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
#define GEN6_3DSTATE_URB CMD(3, 0, 0x05)
/* DW1 */
# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS CMD(3, 0, 0x0d)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
#define GEN6_3DSTATE_CC_STATE_POINTERS CMD(3, 0, 0x0e)
#define GEN6_3DSTATE_VS CMD(3, 0, 0x10)
#define GEN6_3DSTATE_GS CMD(3, 0, 0x11)
/* DW4 */
# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
#define GEN6_3DSTATE_CLIP CMD(3, 0, 0x12)
#define GEN6_3DSTATE_SF CMD(3, 0, 0x13)
/* DW1 on GEN6 */
# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW1 on GEN7 */
# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12
/* DW2 */
/* DW3 */
# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
#define GEN8_3DSTATE_RASTER CMD(3, 0, 0x50)
# define GEN8_3DSTATE_RASTER_CULL_BOTH (0 << 16)
# define GEN8_3DSTATE_RASTER_CULL_NONE (1 << 16)
# define GEN8_3DSTATE_RASTER_CULL_FRONT (2 << 16)
# define GEN8_3DSTATE_RASTER_CULL_BACK (3 << 16)
#define GEN6_3DSTATE_WM CMD(3, 0, 0x14)
/* DW2 */
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
/* 3DSTATE_WM on GEN7 */
/* DW1 */
# define GEN7_WM_STATISTICS_ENABLE (1 << 31)
# define GEN7_WM_DEPTH_CLEAR (1 << 30)
# define GEN7_WM_DISPATCH_ENABLE (1 << 29)
# define GEN6_WM_DEPTH_RESOLVE (1 << 28)
# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
# define GEN7_WM_KILL_ENABLE (1 << 25)
# define GEN7_WM_PSCDEPTH_OFF (0 << 23)
# define GEN7_WM_PSCDEPTH_ON (1 << 23)
# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23)
# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23)
# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20)
# define GEN7_WM_USES_SOURCE_W (1 << 19)
# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17)
# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17)
# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17)
# define GEN7_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 16)
# define GEN7_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 15)
# define GEN7_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 14)
# define GEN7_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 13)
# define GEN7_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 12)
# define GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11)
# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8)
# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6)
# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6)
# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6)
# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6)
# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4)
# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3)
# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2)
# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0)
# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0)
# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0)
# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0)
/* DW2 */
# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
#define GEN6_3DSTATE_CONSTANT_VS CMD(3, 0, 0x15)
#define GEN6_3DSTATE_CONSTANT_GS CMD(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS CMD(3, 0, 0x17)
/* Gen8 WM_HZ_OP */
#define GEN8_3DSTATE_WM_HZ_OP CMD(3, 0, 0x52)
# define GEN6_3DSTATE_CONSTANT_BUFFER_3_ENABLE (1 << 15)
# define GEN6_3DSTATE_CONSTANT_BUFFER_2_ENABLE (1 << 14)
# define GEN6_3DSTATE_CONSTANT_BUFFER_1_ENABLE (1 << 13)
# define GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE (1 << 12)
#define GEN6_3DSTATE_SAMPLE_MASK CMD(3, 0, 0x18)
#define GEN6_3DSTATE_MULTISAMPLE CMD(3, 1, 0x0d)
/* DW1 */
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
#define GEN8_3DSTATE_MULTISAMPLE CMD(3, 0, 0x0d)
#define GEN8_3DSTATE_SAMPLE_PATTERN CMD(3, 1, 0x1C)
/* GEN7 */
#define GEN7_3DSTATE_CLEAR_PARAMS CMD(3, 0, 0x04)
#define GEN7_3DSTATE_DEPTH_BUFFER CMD(3, 0, 0x05)
#define GEN7_3DSTATE_HIER_DEPTH_BUFFER CMD(3, 0, 0x07)
#define GEN7_3DSTATE_URB_VS CMD(3, 0, 0x30)
#define GEN7_3DSTATE_URB_HS CMD(3, 0, 0x31)
#define GEN7_3DSTATE_URB_DS CMD(3, 0, 0x32)
#define GEN7_3DSTATE_URB_GS CMD(3, 0, 0x33)
/* DW1 */
# define GEN7_URB_ENTRY_NUMBER_SHIFT 0
# define GEN7_URB_ENTRY_SIZE_SHIFT 16
# define GEN7_URB_STARTING_ADDRESS_SHIFT 25
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS CMD(3, 1, 0x12)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS CMD(3, 1, 0x16)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS CMD(3, 1, 0x14)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS CMD(3, 1, 0x13)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS CMD(3, 1, 0x15)
/* DW1 */
# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
# define GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
# define GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT 0
#define GEN7_3DSTATE_CONSTANT_HS CMD(3, 0, 0x19)
#define GEN7_3DSTATE_CONSTANT_DS CMD(3, 0, 0x1a)
#define GEN7_3DSTATE_HS CMD(3, 0, 0x1b)
#define GEN7_3DSTATE_TE CMD(3, 0, 0x1c)
#define GEN7_3DSTATE_DS CMD(3, 0, 0x1d)
#define GEN7_3DSTATE_STREAMOUT CMD(3, 0, 0x1e)
#define GEN7_3DSTATE_SBE CMD(3, 0, 0x1f)
/* DW1 */
# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28)
# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22
# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21)
# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20)
# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4
# define GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH (1 << 29)
# define GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET (1 << 28)
# define GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT 5
#define GEN8_3DSTATE_SBE_SWIZ CMD(3, 0, 0x51)
#define GEN7_3DSTATE_PS CMD(3, 0, 0x20)
/* DW1: kernel pointer */
/* DW2 */
# define GEN7_PS_SPF_MODE (1 << 31)
# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN7_PS_SAMPLER_COUNT_SHIFT 27
# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW3: scratch space */
/* DW4 */
# define GEN7_PS_MAX_THREADS_SHIFT_IVB 24
# define GEN7_PS_MAX_THREADS_SHIFT_HSW 23
# define GEN7_PS_SAMPLE_MASK_SHIFT_HSW 12
# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3)
# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2)
# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1)
# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0)
/* DW5 */
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0
/* DW6: kernel 1 pointer */
/* DW7: kernel 2 pointer */
# define GEN8_PS_MAX_THREADS_SHIFT 23
#define GEN8_3DSTATE_PSEXTRA CMD(3, 0, 0x4f)
/* DW1 */
# define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31)
# define GEN8_PSX_PSCDEPTH_OFF (0 << 26)
# define GEN8_PSX_PSCDEPTH_ON (1 << 26)
# define GEN8_PSX_PSCDEPTH_ON_GE (2 << 26)
# define GEN8_PSX_PSCDEPTH_ON_LE (3 << 26)
# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8)
#define GEN8_3DSTATE_PSBLEND CMD(3, 0, 0x4d)
/* DW1 */
# define GEN8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31)
# define GEN8_PS_BLEND_HAS_WRITEABLE_RT (1 << 30)
# define GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE (1 << 29)
# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(28, 24)
# define GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT 24
# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_MASK INTEL_MASK(23, 19)
# define GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT 19
# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_MASK INTEL_MASK(18, 14)
# define GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT 14
# define GEN8_PS_BLEND_DST_BLEND_FACTOR_MASK INTEL_MASK(13, 9)
# define GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT 9
# define GEN8_PS_BLEND_ALPHA_TEST_ENABLE (1 << 8)
# define GEN8_PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 7)
#define GEN7_3DSTATE_STENCIL_BUFFER CMD(3, 0, 0x06)
#define GEN8_3DSTATE_WM_DEPTH_STENCIL CMD(3, 0, 0x4e)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL CMD(3, 0, 0x21)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC CMD(3, 0, 0x23)
#define GEN7_3DSTATE_BLEND_STATE_POINTERS CMD(3, 0, 0x24)
#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS CMD(3, 0, 0x25)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS CMD(3, 0, 0x26)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS CMD(3, 0, 0x27)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS CMD(3, 0, 0x28)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS CMD(3, 0, 0x29)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS CMD(3, 0, 0x2a)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS CMD(3, 0, 0x2b)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS CMD(3, 0, 0x2e)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS CMD(3, 0, 0x2f)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS CMD(3, 0, 0x2c)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS CMD(3, 0, 0x2d)
#define MFX(pipeline, op, sub_opa, sub_opb) \
(3 << 29 | \
(pipeline) << 27 | \
(op) << 24 | \
(sub_opa) << 21 | \
(sub_opb) << 16)
#define MFX_PIPE_MODE_SELECT MFX(2, 0, 0, 0)
#define MFX_SURFACE_STATE MFX(2, 0, 0, 1)
#define MFX_PIPE_BUF_ADDR_STATE MFX(2, 0, 0, 2)
#define MFX_IND_OBJ_BASE_ADDR_STATE MFX(2, 0, 0, 3)
#define MFX_BSP_BUF_BASE_ADDR_STATE MFX(2, 0, 0, 4)
#define MFX_AES_STATE MFX(2, 0, 0, 5)
#define MFX_STATE_POINTER MFX(2, 0, 0, 6)
#define MFX_QM_STATE MFX(2, 0, 0, 7)
#define MFX_FQM_STATE MFX(2, 0, 0, 8)
#define MFX_INSERT_OBJECT MFX(2, 0, 2, 8)
#define MFX_WAIT MFX(1, 0, 0, 0)
#define MFX_AVC_IMG_STATE MFX(2, 1, 0, 0)
#define MFX_AVC_QM_STATE MFX(2, 1, 0, 1)
#define MFX_AVC_DIRECTMODE_STATE MFX(2, 1, 0, 2)
#define MFX_AVC_SLICE_STATE MFX(2, 1, 0, 3)
#define MFX_AVC_REF_IDX_STATE MFX(2, 1, 0, 4)
#define MFX_AVC_WEIGHTOFFSET_STATE MFX(2, 1, 0, 5)
#define MFD_AVC_PICID_STATE MFX(2, 1, 1, 5)
#define MFD_AVC_BSD_OBJECT MFX(2, 1, 1, 8)
#define MFC_AVC_FQM_STATE MFX(2, 1, 2, 2)
#define MFC_AVC_INSERT_OBJECT MFX(2, 1, 2, 8)
#define MFC_AVC_PAK_OBJECT MFX(2, 1, 2, 9)
#define MFX_MPEG2_PIC_STATE MFX(2, 3, 0, 0)
#define MFX_MPEG2_QM_STATE MFX(2, 3, 0, 1)
#define MFD_MPEG2_BSD_OBJECT MFX(2, 3, 1, 8)
#define MFC_MPEG2_SLICEGROUP_STATE MFX(2, 3, 2, 3)
#define MFC_MPEG2_PAK_OBJECT MFX(2, 3, 2, 9)
#define MFX_VC1_PIC_STATE MFX(2, 2, 0, 0)
#define MFX_VC1_PRED_PIPE_STATE MFX(2, 2, 0, 1)
#define MFX_VC1_DIRECTMODE_STATE MFX(2, 2, 0, 2)
#define MFD_VC1_SHORT_PIC_STATE MFX(2, 2, 1, 0)
#define MFD_VC1_LONG_PIC_STATE MFX(2, 2, 1, 1)
#define MFD_VC1_BSD_OBJECT MFX(2, 2, 1, 8)
#define MFX_JPEG_PIC_STATE MFX(2, 7, 0, 0)
#define MFX_JPEG_HUFF_TABLE_STATE MFX(2, 7, 0, 2)
#define MFD_JPEG_BSD_OBJECT MFX(2, 7, 1, 8)
#define MFX_VP8_PIC_STATE MFX(2, 4, 0, 0)
#define MFD_VP8_BSD_OBJECT MFX(2, 4, 1, 8)
#define VEB(pipeline, op, sub_opa, sub_opb) \
(3 << 29 | \
(pipeline) << 27 | \
(op) << 24 | \
(sub_opa) << 21 | \
(sub_opb) << 16)
#define VEB_SURFACE_STATE VEB(2, 4, 0, 0)
#define VEB_STATE VEB(2, 4, 0, 2)
#define VEB_DNDI_IECP_STATE VEB(2, 4, 0, 3)
#define I965_DEPTHFORMAT_D32_FLOAT 1
#define BASE_ADDRESS_MODIFY (1 << 0)
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
#define UF0_CS_REALLOC (1 << 13)
#define UF0_VFE_REALLOC (1 << 12)
#define UF0_SF_REALLOC (1 << 11)
#define UF0_CLIP_REALLOC (1 << 10)
#define UF0_GS_REALLOC (1 << 9)
#define UF0_VS_REALLOC (1 << 8)
#define UF1_CLIP_FENCE_SHIFT 20
#define UF1_GS_FENCE_SHIFT 10
#define UF1_VS_FENCE_SHIFT 0
#define UF2_CS_FENCE_SHIFT 20
#define UF2_VFE_FENCE_SHIFT 10
#define UF2_SF_FENCE_SHIFT 0
#define VFE_GENERIC_MODE 0x0
#define VFE_VLD_MODE 0x1
#define VFE_IS_MODE 0x2
#define VFE_AVC_MC_MODE 0x4
#define VFE_AVC_IT_MODE 0x7
#define FLOATING_POINT_IEEE_754 0
#define FLOATING_POINT_NON_IEEE_754 1
#define I965_SURFACE_1D 0
#define I965_SURFACE_2D 1
#define I965_SURFACE_3D 2
#define I965_SURFACE_CUBE 3
#define I965_SURFACE_BUFFER 4
#define I965_SURFACE_NULL 7
#define I965_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define I965_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define I965_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define I965_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define I965_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define I965_SURFACEFORMAT_R64G64_FLOAT 0x005
#define I965_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define I965_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define I965_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define I965_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define I965_SURFACEFORMAT_R32G32B32_SINT 0x041
#define I965_SURFACEFORMAT_R32G32B32_UINT 0x042
#define I965_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define I965_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define I965_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define I965_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define I965_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define I965_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define I965_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define I965_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define I965_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define I965_SURFACEFORMAT_R32G32_FLOAT 0x085
#define I965_SURFACEFORMAT_R32G32_SINT 0x086
#define I965_SURFACEFORMAT_R32G32_UINT 0x087
#define I965_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define I965_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define I965_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define I965_SURFACEFORMAT_R32G32_UNORM 0x08B
#define I965_SURFACEFORMAT_R32G32_SNORM 0x08C
#define I965_SURFACEFORMAT_R64_FLOAT 0x08D
#define I965_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define I965_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define I965_SURFACEFORMAT_A32X32_FLOAT 0x090
#define I965_SURFACEFORMAT_L32X32_FLOAT 0x091
#define I965_SURFACEFORMAT_I32X32_FLOAT 0x092
#define I965_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define I965_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define I965_SURFACEFORMAT_R32G32_SSCALED 0x095
#define I965_SURFACEFORMAT_R32G32_USCALED 0x096
#define I965_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define I965_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define I965_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define I965_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define I965_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define I965_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define I965_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define I965_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define I965_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define I965_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define I965_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define I965_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define I965_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define I965_SURFACEFORMAT_R16G16_SINT 0x0CE
#define I965_SURFACEFORMAT_R16G16_UINT 0x0CF
#define I965_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define I965_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define I965_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define I965_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define I965_SURFACEFORMAT_R32_SINT 0x0D6
#define I965_SURFACEFORMAT_R32_UINT 0x0D7
#define I965_SURFACEFORMAT_R32_FLOAT 0x0D8
#define I965_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define I965_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define I965_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define I965_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define I965_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define I965_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define I965_SURFACEFORMAT_I32_FLOAT 0x0E3
#define I965_SURFACEFORMAT_L32_FLOAT 0x0E4
#define I965_SURFACEFORMAT_A32_FLOAT 0x0E5
#define I965_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define I965_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define I965_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define I965_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define I965_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define I965_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define I965_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define I965_SURFACEFORMAT_R32_UNORM 0x0F1
#define I965_SURFACEFORMAT_R32_SNORM 0x0F2
#define I965_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define I965_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define I965_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define I965_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define I965_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define I965_SURFACEFORMAT_R32_SSCALED 0x0F8
#define I965_SURFACEFORMAT_R32_USCALED 0x0F9
#define I965_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define I965_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define I965_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define I965_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define I965_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define I965_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define I965_SURFACEFORMAT_R8G8_UNORM 0x106
#define I965_SURFACEFORMAT_R8G8_SNORM 0x107
#define I965_SURFACEFORMAT_R8G8_SINT 0x108
#define I965_SURFACEFORMAT_R8G8_UINT 0x109
#define I965_SURFACEFORMAT_R16_UNORM 0x10A
#define I965_SURFACEFORMAT_R16_SNORM 0x10B
#define I965_SURFACEFORMAT_R16_SINT 0x10C
#define I965_SURFACEFORMAT_R16_UINT 0x10D
#define I965_SURFACEFORMAT_R16_FLOAT 0x10E
#define I965_SURFACEFORMAT_I16_UNORM 0x111
#define I965_SURFACEFORMAT_L16_UNORM 0x112
#define I965_SURFACEFORMAT_A16_UNORM 0x113
#define I965_SURFACEFORMAT_L8A8_UNORM 0x114
#define I965_SURFACEFORMAT_I16_FLOAT 0x115
#define I965_SURFACEFORMAT_L16_FLOAT 0x116
#define I965_SURFACEFORMAT_A16_FLOAT 0x117
#define I965_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define I965_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define I965_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define I965_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define I965_SURFACEFORMAT_R8G8_USCALED 0x11D
#define I965_SURFACEFORMAT_R16_SSCALED 0x11E
#define I965_SURFACEFORMAT_R16_USCALED 0x11F
#define I965_SURFACEFORMAT_P8A8_UNORM 0x122
#define I965_SURFACEFORMAT_A8P8_UNORM 0x123
#define I965_SURFACEFORMAT_R8_UNORM 0x140
#define I965_SURFACEFORMAT_R8_SNORM 0x141
#define I965_SURFACEFORMAT_R8_SINT 0x142
#define I965_SURFACEFORMAT_R8_UINT 0x143
#define I965_SURFACEFORMAT_A8_UNORM 0x144
#define I965_SURFACEFORMAT_I8_UNORM 0x145
#define I965_SURFACEFORMAT_L8_UNORM 0x146
#define I965_SURFACEFORMAT_P4A4_UNORM 0x147
#define I965_SURFACEFORMAT_A4P4_UNORM 0x148
#define I965_SURFACEFORMAT_R8_SSCALED 0x149
#define I965_SURFACEFORMAT_R8_USCALED 0x14A
#define I965_SURFACEFORMAT_R1_UINT 0x181
#define I965_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define I965_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define I965_SURFACEFORMAT_BC1_UNORM 0x186
#define I965_SURFACEFORMAT_BC2_UNORM 0x187
#define I965_SURFACEFORMAT_BC3_UNORM 0x188
#define I965_SURFACEFORMAT_BC4_UNORM 0x189
#define I965_SURFACEFORMAT_BC5_UNORM 0x18A
#define I965_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define I965_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define I965_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define I965_SURFACEFORMAT_MONO8 0x18E
#define I965_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define I965_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define I965_SURFACEFORMAT_DXT1_RGB 0x191
#define I965_SURFACEFORMAT_FXT1 0x192
#define I965_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define I965_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define I965_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define I965_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define I965_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define I965_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define I965_SURFACEFORMAT_BC4_SNORM 0x199
#define I965_SURFACEFORMAT_BC5_SNORM 0x19A
#define I965_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define I965_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define I965_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define I965_SURFACEFORMAT_R16G16B16_USCALED 0x19F
#define I965_CULLMODE_BOTH 0
#define I965_CULLMODE_NONE 1
#define I965_CULLMODE_FRONT 2
#define I965_CULLMODE_BACK 3
#define I965_MAPFILTER_NEAREST 0x0
#define I965_MAPFILTER_LINEAR 0x1
#define I965_MAPFILTER_ANISOTROPIC 0x2
#define I965_MIPFILTER_NONE 0
#define I965_MIPFILTER_NEAREST 1
#define I965_MIPFILTER_LINEAR 3
#define HSW_SCS_ZERO 0
#define HSW_SCS_ONE 1
#define HSW_SCS_RED 4
#define HSW_SCS_GREEN 5
#define HSW_SCS_BLUE 6
#define HSW_SCS_ALPHA 7
#define I965_TEXCOORDMODE_WRAP 0
#define I965_TEXCOORDMODE_MIRROR 1
#define I965_TEXCOORDMODE_CLAMP 2
#define I965_TEXCOORDMODE_CUBE 3
#define I965_TEXCOORDMODE_CLAMP_BORDER 4
#define I965_TEXCOORDMODE_MIRROR_ONCE 5
#define I965_BLENDFACTOR_ONE 0x1
#define I965_BLENDFACTOR_SRC_COLOR 0x2
#define I965_BLENDFACTOR_SRC_ALPHA 0x3
#define I965_BLENDFACTOR_DST_ALPHA 0x4
#define I965_BLENDFACTOR_DST_COLOR 0x5
#define I965_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define I965_BLENDFACTOR_CONST_COLOR 0x7
#define I965_BLENDFACTOR_CONST_ALPHA 0x8
#define I965_BLENDFACTOR_SRC1_COLOR 0x9
#define I965_BLENDFACTOR_SRC1_ALPHA 0x0A
#define I965_BLENDFACTOR_ZERO 0x11
#define I965_BLENDFACTOR_INV_SRC_COLOR 0x12
#define I965_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define I965_BLENDFACTOR_INV_DST_ALPHA 0x14
#define I965_BLENDFACTOR_INV_DST_COLOR 0x15
#define I965_BLENDFACTOR_INV_CONST_COLOR 0x17
#define I965_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define I965_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define I965_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
#define I965_BLENDFUNCTION_ADD 0
#define I965_BLENDFUNCTION_SUBTRACT 1
#define I965_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define I965_BLENDFUNCTION_MIN 3
#define I965_BLENDFUNCTION_MAX 4
#define I965_SURFACERETURNFORMAT_FLOAT32 0
#define I965_SURFACERETURNFORMAT_S1 1
#define I965_VFCOMPONENT_NOSTORE 0
#define I965_VFCOMPONENT_STORE_SRC 1
#define I965_VFCOMPONENT_STORE_0 2
#define I965_VFCOMPONENT_STORE_1_FLT 3
#define I965_VFCOMPONENT_STORE_1_INT 4
#define I965_VFCOMPONENT_STORE_VID 5
#define I965_VFCOMPONENT_STORE_IID 6
#define I965_VFCOMPONENT_STORE_PID 7
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
#define VE0_VALID (1 << 26)
#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
#define VE1_VFCOMPONENT_1_SHIFT 24
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
#define GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN8 */
#define GEN8_VE0_VALID (1 << 25) /* for GEN8 */
#define VB0_BUFFER_INDEX_SHIFT 27
#define GEN6_VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 26)
#define VB0_INSTANCEDATA (1 << 26)
#define GEN6_VB0_VERTEXDATA (0 << 20)
#define GEN6_VB0_INSTANCEDATA (1 << 20)
#define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14)
#define VB0_BUFFER_PITCH_SHIFT 0
#define GEN8_VB0_BUFFER_INDEX_SHIFT 26
#define GEN8_VB0_MOCS_SHIFT 16
#define _3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define _3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
#define _3DPRIMITIVE_TOPOLOGY_SHIFT 10
/* DW1 on GEN7*/
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
#define GEN8_3DSTATE_VF_TOPOLOGY CMD(3, 0, 0x4b)
#define I965_TILEWALK_XMAJOR 0
#define I965_TILEWALK_YMAJOR 1
#define SCAN_RASTER_ORDER 0
#define SCAN_SPECIAL_ORDER 1
#define ENTROPY_CAVLD 0
#define ENTROPY_CABAC 1
#define SLICE_TYPE_P 0
#define SLICE_TYPE_B 1
#define SLICE_TYPE_I 2
#define SLICE_TYPE_SP 3
#define SLICE_TYPE_SI 4
#define PRESENT_REF_LIST0 (1 << 0)
#define PRESENT_REF_LIST1 (1 << 1)
#define PRESENT_WEIGHT_OFFSET_L0 (1 << 2)
#define PRESENT_WEIGHT_OFFSET_L1 (1 << 3)
#define RESIDUAL_DATA_OFFSET 48
#define PRESENT_NOMV 0
#define PRESENT_NOWO 1
#define PRESENT_MV_WO 3
#define SCOREBOARD_STALLING 0
#define SCOREBOARD_NON_STALLING 1
#define SURFACE_FORMAT_YCRCB_NORMAL 0
#define SURFACE_FORMAT_YCRCB_SWAPUVY 1
#define SURFACE_FORMAT_YCRCB_SWAPUV 2
#define SURFACE_FORMAT_YCRCB_SWAPY 3
#define SURFACE_FORMAT_PLANAR_420_8 4
#define SURFACE_FORMAT_PLANAR_411_8 5
#define SURFACE_FORMAT_PLANAR_422_8 6
#define SURFACE_FORMAT_STMM_DN_STATISTICS 7
#define SURFACE_FORMAT_R10G10B10A2_UNORM 8
#define SURFACE_FORMAT_R8G8B8A8_UNORM 9
#define SURFACE_FORMAT_R8B8_UNORM 10
#define SURFACE_FORMAT_R8_UNORM 11
#define SURFACE_FORMAT_Y8_UNORM 12
#define AVS_FILTER_ADAPTIVE_8_TAP 0
#define AVS_FILTER_NEAREST 1
#define IEF_FILTER_COMBO 0
#define IEF_FILTER_DETAIL 1
#define IEF_FILTER_SIZE_3X3 0
#define IEF_FILTER_SIZE_5X5 1
#define MFX_FORMAT_MPEG2 0
#define MFX_FORMAT_VC1 1
#define MFX_FORMAT_AVC 2
#define MFX_FORMAT_JPEG 3
#define MFX_FORMAT_SVC 4
#define MFX_FORMAT_VP8 5
#define MFX_SHORT_MODE 0
#define MFX_LONG_MODE 1
#define MFX_CODEC_DECODE 0
#define MFX_CODEC_ENCODE 1
#define MFX_QM_AVC_4X4_INTRA_MATRIX 0
#define MFX_QM_AVC_4X4_INTER_MATRIX 1
#define MFX_QM_AVC_8x8_INTRA_MATRIX 2
#define MFX_QM_AVC_8x8_INTER_MATRIX 3
#define MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX 0
#define MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX 1
#define MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX 0
#define MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX 1
#define MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX 2
#define MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX 3 /* for new device */
#define MFX_HUFFTABLE_ID_Y 0
#define MFX_HUFFTABLE_ID_UV 1 /* UV on Ivybridge */
#define MFD_MODE_VLD 0
#define MFD_MODE_IT 1
#define MFX_SURFACE_PLANAR_420_8 4
#define MFX_SURFACE_PLANAR_411_8 5
#define MFX_SURFACE_PLANAR_422_8 6
#define MFX_SURFACE_MONOCHROME 12
#define MPEG_I_PICTURE 1
#define MPEG_P_PICTURE 2
#define MPEG_B_PICTURE 3
#define MPEG_TOP_FIELD 1
#define MPEG_BOTTOM_FIELD 2
#define MPEG_FRAME 3
#define SUBSAMPLE_YUV400 0
#define SUBSAMPLE_YUV420 1
#define SUBSAMPLE_YUV422H 2
#define SUBSAMPLE_YUV422V 3
#define SUBSAMPLE_YUV444 4
#define SUBSAMPLE_YUV411 5
#define SUBSAMPLE_RGBX 6
#endif /* _I965_DEFINES_H_ */

View File

@@ -0,0 +1,531 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "i965_drv_video.h"
#include <string.h>
#include <strings.h>
#include <errno.h>
#include <cpuid.h>
/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */
#define EXTRA_H264_DEC_CHROMA_FORMATS \
(VA_RT_FORMAT_YUV400)
/* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */
#define EXTRA_JPEG_DEC_CHROMA_FORMATS \
(VA_RT_FORMAT_YUV400 | VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | \
VA_RT_FORMAT_YUV444)
/* Defines VA profile as a 32-bit unsigned integer mask */
#define VA_PROFILE_MASK(PROFILE) \
(1U << VAProfile##PROFILE)
extern struct hw_context *i965_proc_context_init(VADriverContextP, struct object_config *);
extern struct hw_context *g4x_dec_hw_context_init(VADriverContextP, struct object_config *);
extern bool genx_render_init(VADriverContextP);
static struct hw_codec_info g4x_hw_codec_info = {
.dec_hw_context_init = g4x_dec_hw_context_init,
.enc_hw_context_init = NULL,
.proc_hw_context_init = NULL,
.render_init = genx_render_init,
.post_processing_context_init = NULL,
.max_width = 2048,
.max_height = 2048,
.min_linear_wpitch = 16,
.min_linear_hpitch = 16,
.has_mpeg2_decoding = 1,
.num_filters = 0,
};
extern struct hw_context *ironlake_dec_hw_context_init(VADriverContextP, struct object_config *);
extern void i965_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *);
static struct hw_codec_info ilk_hw_codec_info = {
.dec_hw_context_init = ironlake_dec_hw_context_init,
.enc_hw_context_init = NULL,
.proc_hw_context_init = i965_proc_context_init,
.render_init = genx_render_init,
.post_processing_context_init = i965_post_processing_context_init,
.max_width = 2048,
.max_height = 2048,
.min_linear_wpitch = 16,
.min_linear_hpitch = 16,
.has_mpeg2_decoding = 1,
.has_h264_decoding = 1,
.has_vpp = 1,
.has_accelerated_putimage = 1,
.num_filters = 0,
};
extern struct hw_context *gen6_dec_hw_context_init(VADriverContextP, struct object_config *);
extern struct hw_context *gen6_enc_hw_context_init(VADriverContextP, struct object_config *);
static struct hw_codec_info snb_hw_codec_info = {
.dec_hw_context_init = gen6_dec_hw_context_init,
.enc_hw_context_init = gen6_enc_hw_context_init,
.proc_hw_context_init = i965_proc_context_init,
.render_init = genx_render_init,
.post_processing_context_init = i965_post_processing_context_init,
.max_width = 2048,
.max_height = 2048,
.min_linear_wpitch = 16,
.min_linear_hpitch = 16,
.h264_mvc_dec_profiles = VA_PROFILE_MASK(H264StereoHigh),
.h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
.has_h264_decoding = 1,
.has_h264_encoding = 1,
.has_vc1_decoding = 1,
.has_vpp = 1,
.has_accelerated_getimage = 1,
.has_accelerated_putimage = 1,
.has_tiled_surface = 1,
.has_di_motion_adptive = 1,
.num_filters = 2,
.filters = {
{ VAProcFilterNoiseReduction, I965_RING_NULL },
{ VAProcFilterDeinterlacing, I965_RING_NULL },
},
};
extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, struct object_config *);
extern struct hw_context *gen7_enc_hw_context_init(VADriverContextP, struct object_config *);
static struct hw_codec_info ivb_hw_codec_info = {
.dec_hw_context_init = gen7_dec_hw_context_init,
.enc_hw_context_init = gen7_enc_hw_context_init,
.proc_hw_context_init = i965_proc_context_init,
.render_init = genx_render_init,
.post_processing_context_init = i965_post_processing_context_init,
.max_width = 4096,
.max_height = 4096,
.min_linear_wpitch = 64,
.min_linear_hpitch = 16,
.h264_mvc_dec_profiles = VA_PROFILE_MASK(H264StereoHigh),
.h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
.has_mpeg2_encoding = 1,
.has_h264_decoding = 1,
.has_h264_encoding = 1,
.has_vc1_decoding = 1,
.has_jpeg_decoding = 1,
.has_vpp = 1,
.has_accelerated_getimage = 1,
.has_accelerated_putimage = 1,
.has_tiled_surface = 1,
.has_di_motion_adptive = 1,
.num_filters = 2,
.filters = {
{ VAProcFilterNoiseReduction, I965_RING_NULL },
{ VAProcFilterDeinterlacing, I965_RING_NULL },
},
};
static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *codec_info);
extern struct hw_context *gen75_dec_hw_context_init(VADriverContextP, struct object_config *);
extern struct hw_context *gen75_enc_hw_context_init(VADriverContextP, struct object_config *);
extern struct hw_context *gen75_proc_context_init(VADriverContextP, struct object_config *);
static struct hw_codec_info hsw_hw_codec_info = {
.dec_hw_context_init = gen75_dec_hw_context_init,
.enc_hw_context_init = gen75_enc_hw_context_init,
.proc_hw_context_init = gen75_proc_context_init,
.render_init = genx_render_init,
.post_processing_context_init = i965_post_processing_context_init,
.preinit_hw_codec = hsw_hw_codec_preinit,
.max_width = 4096,
.max_height = 4096,
.min_linear_wpitch = 64,
.min_linear_hpitch = 16,
.h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) |
VA_PROFILE_MASK(H264MultiviewHigh)),
.h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
.has_mpeg2_encoding = 1,
.has_h264_decoding = 1,
.has_h264_encoding = 1,
.has_vc1_decoding = 1,
.has_jpeg_decoding = 1,
.has_vpp = 1,
.has_accelerated_getimage = 1,
.has_accelerated_putimage = 1,
.has_tiled_surface = 1,
.has_di_motion_adptive = 1,
.has_di_motion_compensated = 1,
.has_h264_mvc_encoding = 1,
.num_filters = 5,
.filters = {
{ VAProcFilterNoiseReduction, I965_RING_VEBOX },
{ VAProcFilterDeinterlacing, I965_RING_VEBOX },
{ VAProcFilterSharpening, I965_RING_NULL },
{ VAProcFilterColorBalance, I965_RING_VEBOX},
{ VAProcFilterSkinToneEnhancement, I965_RING_VEBOX},
},
};
extern struct hw_context *gen8_dec_hw_context_init(VADriverContextP, struct object_config *);
extern struct hw_context *gen8_enc_hw_context_init(VADriverContextP, struct object_config *);
extern void gen8_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *);
static struct hw_codec_info bdw_hw_codec_info = {
.dec_hw_context_init = gen8_dec_hw_context_init,
.enc_hw_context_init = gen8_enc_hw_context_init,
.proc_hw_context_init = gen75_proc_context_init,
.render_init = gen8_render_init,
.post_processing_context_init = gen8_post_processing_context_init,
.max_width = 4096,
.max_height = 4096,
.min_linear_wpitch = 64,
.min_linear_hpitch = 16,
.h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) |
VA_PROFILE_MASK(H264MultiviewHigh)),
.h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
.has_mpeg2_encoding = 1,
.has_h264_decoding = 1,
.has_h264_encoding = 1,
.has_vc1_decoding = 1,
.has_jpeg_decoding = 1,
.has_vpp = 1,
.has_accelerated_getimage = 1,
.has_accelerated_putimage = 1,
.has_tiled_surface = 1,
.has_di_motion_adptive = 1,
.has_di_motion_compensated = 1,
.has_vp8_decoding = 1,
.has_h264_mvc_encoding = 1,
.num_filters = 5,
.filters = {
{ VAProcFilterNoiseReduction, I965_RING_VEBOX },
{ VAProcFilterDeinterlacing, I965_RING_VEBOX },
{ VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */
{ VAProcFilterColorBalance, I965_RING_VEBOX},
{ VAProcFilterSkinToneEnhancement, I965_RING_VEBOX},
},
};
static struct hw_codec_info chv_hw_codec_info = {
.dec_hw_context_init = gen8_dec_hw_context_init,
.enc_hw_context_init = gen8_enc_hw_context_init,
.proc_hw_context_init = gen75_proc_context_init,
.render_init = gen8_render_init,
.post_processing_context_init = gen8_post_processing_context_init,
.max_width = 4096,
.max_height = 4096,
.min_linear_wpitch = 64,
.min_linear_hpitch = 16,
.h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) |
VA_PROFILE_MASK(H264MultiviewHigh)),
.h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
.has_mpeg2_encoding = 1,
.has_h264_decoding = 1,
.has_h264_encoding = 1,
.has_vc1_decoding = 1,
.has_jpeg_decoding = 1,
.has_vpp = 1,
.has_accelerated_getimage = 1,
.has_accelerated_putimage = 1,
.has_tiled_surface = 1,
.has_di_motion_adptive = 1,
.has_di_motion_compensated = 1,
.has_vp8_decoding = 1,
.has_h264_mvc_encoding = 1,
.num_filters = 5,
.filters = {
{ VAProcFilterNoiseReduction, I965_RING_VEBOX },
{ VAProcFilterDeinterlacing, I965_RING_VEBOX },
{ VAProcFilterSharpening, I965_RING_NULL }, /* need to rebuild the shader for BDW */
{ VAProcFilterColorBalance, I965_RING_VEBOX},
{ VAProcFilterSkinToneEnhancement, I965_RING_VEBOX},
},
};
struct hw_codec_info *
i965_get_codec_info(int devid)
{
switch (devid) {
#undef CHIPSET
#define CHIPSET(id, family, dev, str) case id: return &family##_hw_codec_info;
#include "i965_pciids.h"
default:
return NULL;
}
}
static const struct intel_device_info g4x_device_info = {
.gen = 4,
.urb_size = 384,
.max_wm_threads = 50, /* 10 * 5 */
.is_g4x = 1,
};
static const struct intel_device_info ilk_device_info = {
.gen = 5,
.urb_size = 1024,
.max_wm_threads = 72, /* 12 * 6 */
};
static const struct intel_device_info snb_gt1_device_info = {
.gen = 6,
.gt = 1,
.urb_size = 1024,
.max_wm_threads = 40,
};
static const struct intel_device_info snb_gt2_device_info = {
.gen = 6,
.gt = 2,
.urb_size = 1024,
.max_wm_threads = 80,
};
static const struct intel_device_info ivb_gt1_device_info = {
.gen = 7,
.gt = 1,
.urb_size = 4096,
.max_wm_threads = 48,
.is_ivybridge = 1,
};
static const struct intel_device_info ivb_gt2_device_info = {
.gen = 7,
.gt = 2,
.urb_size = 4096,
.max_wm_threads = 172,
.is_ivybridge = 1,
};
static const struct intel_device_info byt_device_info = {
.gen = 7,
.gt = 1,
.urb_size = 4096,
.max_wm_threads = 48,
.is_ivybridge = 1,
.is_baytrail = 1,
};
static const struct intel_device_info hsw_gt1_device_info = {
.gen = 7,
.gt = 1,
.urb_size = 4096,
.max_wm_threads = 102,
.is_haswell = 1,
};
static const struct intel_device_info hsw_gt2_device_info = {
.gen = 7,
.gt = 2,
.urb_size = 4096,
.max_wm_threads = 204,
.is_haswell = 1,
};
static const struct intel_device_info hsw_gt3_device_info = {
.gen = 7,
.gt = 3,
.urb_size = 4096,
.max_wm_threads = 408,
.is_haswell = 1,
};
static const struct intel_device_info bdw_device_info = {
.gen = 8,
.urb_size = 4096,
.max_wm_threads = 64, /* per PSD */
};
static const struct intel_device_info chv_device_info = {
.gen = 8,
.urb_size = 4096,
.max_wm_threads = 64, /* per PSD */
.is_cherryview = 1,
};
const struct intel_device_info *
i965_get_device_info(int devid)
{
switch (devid) {
#undef CHIPSET
#define CHIPSET(id, family, dev, str) case id: return &dev##_device_info;
#include "i965_pciids.h"
default:
return NULL;
}
}
static void cpuid(unsigned int op,
uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx)
{
__cpuid_count(op, 0, *eax, *ebx, *ecx, *edx);
}
/*
* This function doesn't check the length. And the caller should
* assure that the length of input string should be greater than 48.
*/
static int intel_driver_detect_cpustring(char *model_id)
{
uint32_t *rdata;
if (model_id == NULL)
return -EINVAL;
rdata = (uint32_t *)model_id;
/* obtain the max supported extended CPUID info */
cpuid(0x80000000, &rdata[0], &rdata[1], &rdata[2], &rdata[3]);
/* If the max extended CPUID info is less than 0x80000004, fail */
if (rdata[0] < 0x80000004)
return -EINVAL;
/* obtain the CPUID string */
cpuid(0x80000002, &rdata[0], &rdata[1], &rdata[2], &rdata[3]);
cpuid(0x80000003, &rdata[4], &rdata[5], &rdata[6], &rdata[7]);
cpuid(0x80000004, &rdata[8], &rdata[9], &rdata[10], &rdata[11]);
*(model_id + 48) = '\0';
return 0;
}
/*
* the hook_list for HSW.
* It is captured by /proc/cpuinfo and the space character is stripped.
*/
const static char *hsw_cpu_hook_list[] = {
"Intel(R)Pentium(R)3556U",
"Intel(R)Pentium(R)3560Y",
"Intel(R)Pentium(R)3550M",
"Intel(R)Celeron(R)2980U",
"Intel(R)Celeron(R)2955U",
"Intel(R)Celeron(R)2950M",
};
static void hsw_hw_codec_preinit(VADriverContextP ctx, struct hw_codec_info *codec_info)
{
char model_string[64];
char *model_ptr, *tmp_ptr;
int i, model_len, list_len;
bool found;
memset(model_string, 0, sizeof(model_string));
/* If it can't detect cpu model_string, leave it alone */
if (intel_driver_detect_cpustring(model_string))
return;
/* strip the cpufreq info */
model_ptr = model_string;
tmp_ptr = strstr(model_ptr, "@");
if (tmp_ptr)
*tmp_ptr = '\0';
/* strip the space character */
model_ptr = model_string;
model_len = strlen(model_string);
for (i = 0; i < model_len; i++) {
if (model_string[i] != ' ') {
*model_ptr = model_string[i];
model_ptr++;
}
}
*model_ptr = '\0';
found = false;
list_len = sizeof(hsw_cpu_hook_list) / sizeof(char *);
model_len = strlen(model_string);
for (i = 0; i < list_len; i++) {
model_ptr = (char *)hsw_cpu_hook_list[i];
if (strlen(model_ptr) != model_len)
continue;
if (strncasecmp(model_string, model_ptr, model_len) == 0) {
found = true;
break;
}
}
if (found) {
codec_info->has_h264_encoding = 0;
codec_info->has_h264_mvc_encoding = 0;
codec_info->has_mpeg2_encoding = 0;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,496 @@
/*
* Copyright <20> 2009 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
* Zou Nan hai <nanhai.zou@intel.com>
*
*/
#ifndef _I965_DRV_VIDEO_H_
#define _I965_DRV_VIDEO_H_
#include <va/va.h>
#include <va/va_enc_h264.h>
#include <va/va_enc_mpeg2.h>
#include <va/va_vpp.h>
#include <va/va_backend.h>
#include <va/va_backend_vpp.h>
#include "i965_mutext.h"
#include "object_heap.h"
#include "intel_driver.h"
#include "i965_fourcc.h"
#define I965_MAX_PROFILES 20
#define I965_MAX_ENTRYPOINTS 5
#define I965_MAX_CONFIG_ATTRIBUTES 10
#define I965_MAX_IMAGE_FORMATS 10
#define I965_MAX_SUBPIC_FORMATS 6
#define I965_MAX_SUBPIC_SUM 4
#define I965_MAX_SURFACE_ATTRIBUTES 16
#define INTEL_STR_DRIVER_VENDOR "Intel"
#define INTEL_STR_DRIVER_NAME "i965"
#define I965_SURFACE_TYPE_IMAGE 0
#define I965_SURFACE_TYPE_SURFACE 1
#define I965_SURFACE_FLAG_FRAME 0x00000000
#define I965_SURFACE_FLAG_TOP_FIELD_FIRST 0x00000001
#define I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST 0x00000002
#define DEFAULT_BRIGHTNESS 0
#define DEFAULT_CONTRAST 50
#define DEFAULT_HUE 0
#define DEFAULT_SATURATION 50
#define ENCODER_QUALITY_RANGE 2
#define ENCODER_DEFAULT_QUALITY 1
#define ENCODER_HIGH_QUALITY ENCODER_DEFAULT_QUALITY
#define ENCODER_LOW_QUALITY 2
struct i965_surface
{
struct object_base *base;
int type;
int flags;
};
struct i965_kernel
{
char *name;
int interface;
const uint32_t (*bin)[4];
int size;
dri_bo *bo;
unsigned int kernel_offset;
};
struct buffer_store
{
unsigned char *buffer;
dri_bo *bo;
int ref_count;
int num_elements;
};
struct object_config
{
struct object_base base;
VAProfile profile;
VAEntrypoint entrypoint;
VAConfigAttrib attrib_list[I965_MAX_CONFIG_ATTRIBUTES];
int num_attribs;
};
#define NUM_SLICES 10
struct codec_state_base {
uint32_t chroma_formats;
};
struct decode_state
{
struct codec_state_base base;
struct buffer_store *pic_param;
struct buffer_store **slice_params;
struct buffer_store *iq_matrix;
struct buffer_store *bit_plane;
struct buffer_store *huffman_table;
struct buffer_store **slice_datas;
struct buffer_store *probability_data;
VASurfaceID current_render_target;
int max_slice_params;
int max_slice_datas;
int num_slice_params;
int num_slice_datas;
struct object_surface *render_object;
struct object_surface *reference_objects[16]; /* Up to 2 reference surfaces are valid for MPEG-2,*/
};
#define SLICE_PACKED_DATA_INDEX_TYPE 0x80000000
#define SLICE_PACKED_DATA_INDEX_MASK 0x00FFFFFF
struct encode_state
{
struct codec_state_base base;
struct buffer_store *seq_param;
struct buffer_store *pic_param;
struct buffer_store *pic_control;
struct buffer_store *iq_matrix;
struct buffer_store *q_matrix;
struct buffer_store **slice_params;
int max_slice_params;
int num_slice_params;
/* for ext */
struct buffer_store *seq_param_ext;
struct buffer_store *pic_param_ext;
struct buffer_store *packed_header_param[4];
struct buffer_store *packed_header_data[4];
struct buffer_store **slice_params_ext;
int max_slice_params_ext;
int num_slice_params_ext;
/* Check the user-configurable packed_header attribute.
* Currently it is mainly used to check whether the packed slice_header data
* is provided by user or the driver.
* TBD: It will check for the packed SPS/PPS/MISC/RAWDATA and so on.
*/
unsigned int packed_header_flag;
/* For the packed data that needs to be inserted into video clip */
/* currently it is mainly to track packed raw data and packed slice_header data. */
struct buffer_store **packed_header_params_ext;
int max_packed_header_params_ext;
int num_packed_header_params_ext;
struct buffer_store **packed_header_data_ext;
int max_packed_header_data_ext;
int num_packed_header_data_ext;
/* the index of current slice */
int slice_index;
/* the array is determined by max_slice_params_ext */
int max_slice_num;
/* This is to store the first index of packed data for one slice */
int *slice_rawdata_index;
/* This is to store the number of packed data for one slice.
* Both packed rawdata and slice_header data are tracked by this
* this variable. That is to say: When one packed slice_header is parsed,
* this variable will also be increased.
*/
int *slice_rawdata_count;
/* This is to store the index of packed slice header for one slice */
int *slice_header_index;
int last_packed_header_type;
struct buffer_store *misc_param[16];
VASurfaceID current_render_target;
struct object_surface *input_yuv_object;
struct object_surface *reconstructed_object;
struct object_buffer *coded_buf_object;
struct object_surface *reference_objects[16]; /* Up to 2 reference surfaces are valid for MPEG-2,*/
};
struct proc_state
{
struct codec_state_base base;
struct buffer_store *pipeline_param;
VASurfaceID current_render_target;
};
#define CODEC_DEC 0
#define CODEC_ENC 1
#define CODEC_PROC 2
union codec_state
{
struct codec_state_base base;
struct decode_state decode;
struct encode_state encode;
struct proc_state proc;
};
struct hw_context
{
VAStatus (*run)(VADriverContextP ctx,
VAProfile profile,
union codec_state *codec_state,
struct hw_context *hw_context);
void (*destroy)(void *);
struct intel_batchbuffer *batch;
};
struct object_context
{
struct object_base base;
VAContextID context_id;
struct object_config *obj_config;
VASurfaceID *render_targets; //input->encode, output->decode
int num_render_targets;
int picture_width;
int picture_height;
int flags;
int codec_type;
union codec_state codec_state;
struct hw_context *hw_context;
};
#define SURFACE_REFERENCED (1 << 0)
#define SURFACE_DERIVED (1 << 2)
#define SURFACE_ALL_MASK ((SURFACE_REFERENCED) | \
(SURFACE_DERIVED))
struct object_surface
{
struct object_base base;
VASurfaceStatus status;
VASubpictureID subpic[I965_MAX_SUBPIC_SUM];
struct object_subpic *obj_subpic[I965_MAX_SUBPIC_SUM];
unsigned int subpic_render_idx;
int width; /* the pitch of plane 0 in bytes in horizontal direction */
int height; /* the pitch of plane 0 in bytes in vertical direction */
int size;
int orig_width; /* the width of plane 0 in pixels */
int orig_height; /* the height of plane 0 in pixels */
int flags;
unsigned int fourcc;
dri_bo *bo;
VAImageID locked_image_id;
void (*free_private_data)(void **data);
void *private_data;
unsigned int subsampling;
int x_cb_offset;
int y_cb_offset;
int x_cr_offset;
int y_cr_offset;
int cb_cr_width;
int cb_cr_height;
int cb_cr_pitch;
/* user specified attributes see: VASurfaceAttribExternalBuffers/VA_SURFACE_ATTRIB_MEM_TYPE_VA */
uint32_t user_disable_tiling : 1;
uint32_t user_h_stride_set : 1;
uint32_t user_v_stride_set : 1;
};
struct object_buffer
{
struct object_base base;
struct buffer_store *buffer_store;
int max_num_elements;
int num_elements;
int size_element;
VABufferType type;
/* Export state */
unsigned int export_refcount;
VABufferInfo export_state;
};
struct object_image
{
struct object_base base;
VAImage image;
dri_bo *bo;
unsigned int *palette;
VASurfaceID derived_surface;
};
struct object_subpic
{
struct object_base base;
VAImageID image;
struct object_image *obj_image;
VARectangle src_rect;
VARectangle dst_rect;
unsigned int format;
int width;
int height;
int pitch;
float global_alpha;
dri_bo *bo;
unsigned int flags;
};
#define I965_RING_NULL 0
#define I965_RING_BSD 1
#define I965_RING_BLT 2
#define I965_RING_VEBOX 3
struct i965_filter
{
VAProcFilterType type;
int ring;
};
struct hw_codec_info
{
struct hw_context *(*dec_hw_context_init)(VADriverContextP, struct object_config *);
struct hw_context *(*enc_hw_context_init)(VADriverContextP, struct object_config *);
struct hw_context *(*proc_hw_context_init)(VADriverContextP, struct object_config *);
bool (*render_init)(VADriverContextP);
void (*post_processing_context_init)(VADriverContextP, void *, struct intel_batchbuffer *);
void (*preinit_hw_codec)(VADriverContextP, struct hw_codec_info *);
int max_width;
int max_height;
int min_linear_wpitch;
int min_linear_hpitch;
unsigned int h264_mvc_dec_profiles;
unsigned int h264_dec_chroma_formats;
unsigned int jpeg_dec_chroma_formats;
unsigned int has_mpeg2_decoding:1;
unsigned int has_mpeg2_encoding:1;
unsigned int has_h264_decoding:1;
unsigned int has_h264_encoding:1;
unsigned int has_vc1_decoding:1;
unsigned int has_vc1_encoding:1;
unsigned int has_jpeg_decoding:1;
unsigned int has_jpeg_encoding:1;
unsigned int has_vpp:1;
unsigned int has_accelerated_getimage:1;
unsigned int has_accelerated_putimage:1;
unsigned int has_tiled_surface:1;
unsigned int has_di_motion_adptive:1;
unsigned int has_di_motion_compensated:1;
unsigned int has_vp8_decoding:1;
unsigned int has_vp8_encoding:1;
unsigned int has_h264_mvc_encoding:1;
unsigned int num_filters;
struct i965_filter filters[VAProcFilterCount];
};
#include "i965_render.h"
struct i965_driver_data
{
struct intel_driver_data intel;
struct object_heap config_heap;
struct object_heap context_heap;
struct object_heap surface_heap;
struct object_heap buffer_heap;
struct object_heap image_heap;
struct object_heap subpic_heap;
struct hw_codec_info *codec_info;
_I965Mutex render_mutex;
_I965Mutex pp_mutex;
struct intel_batchbuffer *batch;
struct intel_batchbuffer *pp_batch;
struct i965_render_state render_state;
void *pp_context;
char va_vendor[256];
VADisplayAttribute *display_attributes;
unsigned int num_display_attributes;
VADisplayAttribute *rotation_attrib;
VADisplayAttribute *brightness_attrib;
VADisplayAttribute *contrast_attrib;
VADisplayAttribute *hue_attrib;
VADisplayAttribute *saturation_attrib;
VAContextID current_context_id;
/* VA/DRI (X11) specific data */
struct va_dri_output *dri_output;
/* VA/Wayland specific data */
struct va_wl_output *wl_output;
};
#define NEW_CONFIG_ID() object_heap_allocate(&i965->config_heap);
#define NEW_CONTEXT_ID() object_heap_allocate(&i965->context_heap);
#define NEW_SURFACE_ID() object_heap_allocate(&i965->surface_heap);
#define NEW_BUFFER_ID() object_heap_allocate(&i965->buffer_heap);
#define NEW_IMAGE_ID() object_heap_allocate(&i965->image_heap);
#define NEW_SUBPIC_ID() object_heap_allocate(&i965->subpic_heap);
#define CONFIG(id) ((struct object_config *)object_heap_lookup(&i965->config_heap, id))
#define CONTEXT(id) ((struct object_context *)object_heap_lookup(&i965->context_heap, id))
#define SURFACE(id) ((struct object_surface *)object_heap_lookup(&i965->surface_heap, id))
#define BUFFER(id) ((struct object_buffer *)object_heap_lookup(&i965->buffer_heap, id))
#define IMAGE(id) ((struct object_image *)object_heap_lookup(&i965->image_heap, id))
#define SUBPIC(id) ((struct object_subpic *)object_heap_lookup(&i965->subpic_heap, id))
#define FOURCC_IA44 0x34344149
#define FOURCC_AI44 0x34344941
#define STRIDE(w) (((w) + 0xf) & ~0xf)
#define SIZE_YUV420(w, h) (h * (STRIDE(w) + STRIDE(w >> 1)))
static INLINE struct i965_driver_data *
i965_driver_data(VADriverContextP ctx)
{
return (struct i965_driver_data *)(ctx->pDriverData);
}
VAStatus
i965_check_alloc_surface_bo(VADriverContextP ctx,
struct object_surface *obj_surface,
int tiled,
unsigned int fourcc,
unsigned int subsampling);
int
va_enc_packed_type_to_idx(int packed_type);
/* reserve 2 byte for internal using */
#define CODEC_H264 0
#define CODEC_MPEG2 1
#define CODEC_H264_MVC 2
#define H264_DELIMITER0 0x00
#define H264_DELIMITER1 0x00
#define H264_DELIMITER2 0x00
#define H264_DELIMITER3 0x00
#define H264_DELIMITER4 0x00
#define MPEG2_DELIMITER0 0x00
#define MPEG2_DELIMITER1 0x00
#define MPEG2_DELIMITER2 0x00
#define MPEG2_DELIMITER3 0x00
#define MPEG2_DELIMITER4 0xb0
struct i965_coded_buffer_segment
{
VACodedBufferSegment base;
unsigned char mapped;
unsigned char codec;
};
#define I965_CODEDBUFFER_HEADER_SIZE ALIGN(sizeof(struct i965_coded_buffer_segment), 64)
extern VAStatus i965_MapBuffer(VADriverContextP ctx,
VABufferID buf_id, /* in */
void **pbuf); /* out */
extern VAStatus i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id);
extern VAStatus i965_DestroySurfaces(VADriverContextP ctx,
VASurfaceID *surface_list,
int num_surfaces);
extern VAStatus i965_CreateSurfaces(VADriverContextP ctx,
int width,
int height,
int format,
int num_surfaces,
VASurfaceID *surfaces);
#define I965_SURFACE_MEM_NATIVE 0
#define I965_SURFACE_MEM_GEM_FLINK 1
#define I965_SURFACE_MEM_DRM_PRIME 2
void
i965_destroy_surface_storage(struct object_surface *obj_surface);
#endif /* _I965_DRV_VIDEO_H_ */

View File

@@ -0,0 +1,443 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Zhou Chang <chang.zhou@intel.com>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_encoder.h"
#include "gen6_vme.h"
#include "gen6_mfc.h"
extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
extern Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
extern Bool gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
static VAStatus
intel_encoder_check_yuv_surface(VADriverContextP ctx,
VAProfile profile,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_surface src_surface, dst_surface;
struct object_surface *obj_surface;
VAStatus status;
VARectangle rect;
/* releae the temporary surface */
if (encoder_context->is_tmp_id) {
i965_DestroySurfaces(ctx, &encoder_context->input_yuv_surface, 1);
encode_state->input_yuv_object = NULL;
}
encoder_context->is_tmp_id = 0;
obj_surface = SURFACE(encode_state->current_render_target);
assert(obj_surface && obj_surface->bo);
if (!obj_surface || !obj_surface->bo)
return VA_STATUS_ERROR_INVALID_PARAMETER;
if (obj_surface->fourcc == VA_FOURCC_NV12) {
unsigned int tiling = 0, swizzle = 0;
dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
if (tiling == I915_TILING_Y) {
encoder_context->input_yuv_surface = encode_state->current_render_target;
encode_state->input_yuv_object = obj_surface;
return VA_STATUS_SUCCESS;
}
}
rect.x = 0;
rect.y = 0;
rect.width = obj_surface->orig_width;
rect.height = obj_surface->orig_height;
src_surface.base = (struct object_base *)obj_surface;
src_surface.type = I965_SURFACE_TYPE_SURFACE;
src_surface.flags = I965_SURFACE_FLAG_FRAME;
status = i965_CreateSurfaces(ctx,
obj_surface->orig_width,
obj_surface->orig_height,
VA_RT_FORMAT_YUV420,
1,
&encoder_context->input_yuv_surface);
assert(status == VA_STATUS_SUCCESS);
if (status != VA_STATUS_SUCCESS)
return status;
obj_surface = SURFACE(encoder_context->input_yuv_surface);
encode_state->input_yuv_object = obj_surface;
assert(obj_surface);
i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
dst_surface.base = (struct object_base *)obj_surface;
dst_surface.type = I965_SURFACE_TYPE_SURFACE;
dst_surface.flags = I965_SURFACE_FLAG_FRAME;
status = i965_image_processing(ctx,
&src_surface,
&rect,
&dst_surface,
&rect);
assert(status == VA_STATUS_SUCCESS);
encoder_context->is_tmp_id = 1;
return VA_STATUS_SUCCESS;
}
static VAStatus
intel_encoder_check_misc_parameter(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
if (encode_state->misc_param[VAEncMiscParameterTypeQualityLevel] &&
encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer) {
VAEncMiscParameterBuffer* pMiscParam = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer;
VAEncMiscParameterBufferQualityLevel* param_quality_level = (VAEncMiscParameterBufferQualityLevel*)pMiscParam->data;
encoder_context->quality_level = param_quality_level->quality_level;
if (encoder_context->quality_level == 0)
encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
else if (encoder_context->quality_level > encoder_context->quality_range)
goto error;
}
return VA_STATUS_SUCCESS;
error:
return VA_STATUS_ERROR_INVALID_PARAMETER;
}
static VAStatus
intel_encoder_check_avc_parameter(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_surface *obj_surface;
struct object_buffer *obj_buffer;
VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
int i;
assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
if (pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID)
goto error;
obj_surface = SURFACE(pic_param->CurrPic.picture_id);
assert(obj_surface); /* It is possible the store buffer isn't allocated yet */
if (!obj_surface)
goto error;
encode_state->reconstructed_object = obj_surface;
obj_buffer = BUFFER(pic_param->coded_buf);
assert(obj_buffer && obj_buffer->buffer_store && obj_buffer->buffer_store->bo);
if (!obj_buffer || !obj_buffer->buffer_store || !obj_buffer->buffer_store->bo)
goto error;
encode_state->coded_buf_object = obj_buffer;
for (i = 0; i < 16; i++) {
if (pic_param->ReferenceFrames[i].flags & VA_PICTURE_H264_INVALID ||
pic_param->ReferenceFrames[i].picture_id == VA_INVALID_SURFACE)
break;
else {
obj_surface = SURFACE(pic_param->ReferenceFrames[i].picture_id);
assert(obj_surface);
if (!obj_surface)
goto error;
if (obj_surface->bo)
encode_state->reference_objects[i] = obj_surface;
else
encode_state->reference_objects[i] = NULL; /* FIXME: Warning or Error ??? */
}
}
for ( ; i < 16; i++)
encode_state->reference_objects[i] = NULL;
return VA_STATUS_SUCCESS;
error:
return VA_STATUS_ERROR_INVALID_PARAMETER;
}
static VAStatus
intel_encoder_check_mpeg2_parameter(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
struct object_surface *obj_surface;
struct object_buffer *obj_buffer;
int i = 0;
obj_surface = SURFACE(pic_param->reconstructed_picture);
assert(obj_surface); /* It is possible the store buffer isn't allocated yet */
if (!obj_surface)
goto error;
encode_state->reconstructed_object = obj_surface;
obj_buffer = BUFFER(pic_param->coded_buf);
assert(obj_buffer && obj_buffer->buffer_store && obj_buffer->buffer_store->bo);
if (!obj_buffer || !obj_buffer->buffer_store || !obj_buffer->buffer_store->bo)
goto error;
encode_state->coded_buf_object = obj_buffer;
if (pic_param->picture_type == VAEncPictureTypeIntra) {
} else if (pic_param->picture_type == VAEncPictureTypePredictive) {
assert(pic_param->forward_reference_picture != VA_INVALID_SURFACE);
obj_surface = SURFACE(pic_param->forward_reference_picture);
assert(obj_surface && obj_surface->bo);
if (!obj_surface || !obj_surface->bo)
goto error;
encode_state->reference_objects[i++] = obj_surface;
} else if (pic_param->picture_type == VAEncPictureTypeBidirectional) {
assert(pic_param->forward_reference_picture != VA_INVALID_SURFACE);
obj_surface = SURFACE(pic_param->forward_reference_picture);
assert(obj_surface && obj_surface->bo);
if (!obj_surface || !obj_surface->bo)
goto error;
encode_state->reference_objects[i++] = obj_surface;
assert(pic_param->backward_reference_picture != VA_INVALID_SURFACE);
obj_surface = SURFACE(pic_param->backward_reference_picture);
assert(obj_surface && obj_surface->bo);
if (!obj_surface || !obj_surface->bo)
goto error;
encode_state->reference_objects[i++] = obj_surface;
} else
goto error;
for ( ; i < 16; i++)
encode_state->reference_objects[i] = NULL;
return VA_STATUS_SUCCESS;
error:
return VA_STATUS_ERROR_INVALID_PARAMETER;
}
static VAStatus
intel_encoder_sanity_check_input(VADriverContextP ctx,
VAProfile profile,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
VAStatus vaStatus;
switch (profile) {
case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
vaStatus = intel_encoder_check_avc_parameter(ctx, encode_state, encoder_context);
break;
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
vaStatus = intel_encoder_check_mpeg2_parameter(ctx, encode_state, encoder_context);
break;
default:
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
break;
}
if (vaStatus != VA_STATUS_SUCCESS)
goto out;
vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
if (vaStatus == VA_STATUS_SUCCESS)
vaStatus = intel_encoder_check_misc_parameter(ctx, encode_state, encoder_context);
out:
return vaStatus;
}
static VAStatus
intel_encoder_end_picture(VADriverContextP ctx,
VAProfile profile,
union codec_state *codec_state,
struct hw_context *hw_context)
{
struct intel_encoder_context *encoder_context = (struct intel_encoder_context *)hw_context;
struct encode_state *encode_state = &codec_state->encode;
VAStatus vaStatus;
vaStatus = intel_encoder_sanity_check_input(ctx, profile, encode_state, encoder_context);
if (vaStatus != VA_STATUS_SUCCESS)
return vaStatus;
encoder_context->mfc_brc_prepare(encode_state, encoder_context);
vaStatus = encoder_context->vme_pipeline(ctx, profile, encode_state, encoder_context);
if (vaStatus == VA_STATUS_SUCCESS)
encoder_context->mfc_pipeline(ctx, profile, encode_state, encoder_context);
return VA_STATUS_SUCCESS;
}
static void
intel_encoder_context_destroy(void *hw_context)
{
struct intel_encoder_context *encoder_context = (struct intel_encoder_context *)hw_context;
encoder_context->mfc_context_destroy(encoder_context->mfc_context);
encoder_context->vme_context_destroy(encoder_context->vme_context);
intel_batchbuffer_free(encoder_context->base.batch);
free(encoder_context);
}
typedef Bool (* hw_init_func)(VADriverContextP, struct intel_encoder_context *);
static struct hw_context *
intel_enc_hw_context_init(VADriverContextP ctx,
struct object_config *obj_config,
hw_init_func vme_context_init,
hw_init_func mfc_context_init)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
struct intel_encoder_context *encoder_context = calloc(1, sizeof(struct intel_encoder_context));
int i;
encoder_context->base.destroy = intel_encoder_context_destroy;
encoder_context->base.run = intel_encoder_end_picture;
encoder_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
encoder_context->input_yuv_surface = VA_INVALID_SURFACE;
encoder_context->is_tmp_id = 0;
encoder_context->rate_control_mode = VA_RC_NONE;
encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
encoder_context->quality_range = 1;
switch (obj_config->profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
encoder_context->codec = CODEC_MPEG2;
break;
case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
encoder_context->codec = CODEC_H264;
encoder_context->quality_range = ENCODER_QUALITY_RANGE;
break;
case VAProfileH264StereoHigh:
case VAProfileH264MultiviewHigh:
encoder_context->codec = CODEC_H264_MVC;
break;
default:
/* Never get here */
assert(0);
break;
}
for (i = 0; i < obj_config->num_attribs; i++) {
if (obj_config->attrib_list[i].type == VAConfigAttribRateControl) {
encoder_context->rate_control_mode = obj_config->attrib_list[i].value;
if (encoder_context->codec == CODEC_MPEG2 &&
encoder_context->rate_control_mode & VA_RC_CBR) {
WARN_ONCE("Don't support CBR for MPEG-2 encoding\n");
encoder_context->rate_control_mode &= ~VA_RC_CBR;
}
break;
}
}
vme_context_init(ctx, encoder_context);
assert(encoder_context->vme_context);
assert(encoder_context->vme_context_destroy);
assert(encoder_context->vme_pipeline);
mfc_context_init(ctx, encoder_context);
assert(encoder_context->mfc_context);
assert(encoder_context->mfc_context_destroy);
assert(encoder_context->mfc_pipeline);
return (struct hw_context *)encoder_context;
}
struct hw_context *
gen6_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
{
return intel_enc_hw_context_init(ctx, obj_config, gen6_vme_context_init, gen6_mfc_context_init);
}
struct hw_context *
gen7_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
{
return intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init);
}
struct hw_context *
gen75_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
{
return intel_enc_hw_context_init(ctx, obj_config, gen75_vme_context_init, gen75_mfc_context_init);
}
struct hw_context *
gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
{
return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init);
}

View File

@@ -0,0 +1,71 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Zhou chang <chang.zhou@intel.com>
*
*/
#ifndef _I965_ENCODER_H_
#define _I965_ENCODER_H_
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include "i965_structs.h"
#include "i965_drv_video.h"
struct intel_encoder_context
{
struct hw_context base;
int codec;
VASurfaceID input_yuv_surface;
int is_tmp_id;
unsigned int rate_control_mode;
unsigned int quality_level;
unsigned int quality_range;
void *vme_context;
void *mfc_context;
void (*vme_context_destroy)(void *vme_context);
VAStatus (*vme_pipeline)(VADriverContextP ctx,
VAProfile profile,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
void (*mfc_context_destroy)(void *mfc_context);
VAStatus (*mfc_pipeline)(VADriverContextP ctx,
VAProfile profile,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
void (*mfc_brc_prepare)(struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
};
extern struct hw_context *
gen75_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config);
extern struct hw_context *
gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config);
#endif /* _I965_ENCODER_H_ */

View File

@@ -0,0 +1,498 @@
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <stdlib.h>
#include <assert.h>
#include <va/va.h>
#include <va/va_enc_h264.h>
#include <va/va_enc_mpeg2.h>
#include "i965_encoder_utils.h"
#define BITSTREAM_ALLOCATE_STEPPING 4096
#define NAL_REF_IDC_NONE 0
#define NAL_REF_IDC_LOW 1
#define NAL_REF_IDC_MEDIUM 2
#define NAL_REF_IDC_HIGH 3
#define NAL_NON_IDR 1
#define NAL_IDR 5
#define NAL_SPS 7
#define NAL_PPS 8
#define NAL_SEI 6
#define SLICE_TYPE_P 0
#define SLICE_TYPE_B 1
#define SLICE_TYPE_I 2
#define IS_I_SLICE(type) (SLICE_TYPE_I == (type) || SLICE_TYPE_I == (type - 5))
#define IS_P_SLICE(type) (SLICE_TYPE_P == (type) || SLICE_TYPE_P == (type - 5))
#define IS_B_SLICE(type) (SLICE_TYPE_B == (type) || SLICE_TYPE_B == (type - 5))
#define ENTROPY_MODE_CAVLC 0
#define ENTROPY_MODE_CABAC 1
#define PROFILE_IDC_BASELINE 66
#define PROFILE_IDC_MAIN 77
#define PROFILE_IDC_HIGH 100
struct __avc_bitstream {
unsigned int *buffer;
int bit_offset;
int max_size_in_dword;
};
typedef struct __avc_bitstream avc_bitstream;
static unsigned int
swap32(unsigned int val)
{
unsigned char *pval = (unsigned char *)&val;
return ((pval[0] << 24) |
(pval[1] << 16) |
(pval[2] << 8) |
(pval[3] << 0));
}
static void
avc_bitstream_start(avc_bitstream *bs)
{
bs->max_size_in_dword = BITSTREAM_ALLOCATE_STEPPING;
bs->buffer = calloc(bs->max_size_in_dword * sizeof(int), 1);
bs->bit_offset = 0;
}
static void
avc_bitstream_end(avc_bitstream *bs)
{
int pos = (bs->bit_offset >> 5);
int bit_offset = (bs->bit_offset & 0x1f);
int bit_left = 32 - bit_offset;
if (bit_offset) {
bs->buffer[pos] = swap32((bs->buffer[pos] << bit_left));
}
// free(bs->buffer);
}
static void
avc_bitstream_put_ui(avc_bitstream *bs, unsigned int val, int size_in_bits)
{
int pos = (bs->bit_offset >> 5);
int bit_offset = (bs->bit_offset & 0x1f);
int bit_left = 32 - bit_offset;
if (!size_in_bits)
return;
if (size_in_bits < 32)
val &= (( 1 << size_in_bits) - 1);
bs->bit_offset += size_in_bits;
if (bit_left > size_in_bits) {
bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val);
} else {
size_in_bits -= bit_left;
bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits);
bs->buffer[pos] = swap32(bs->buffer[pos]);
if (pos + 1 == bs->max_size_in_dword) {
bs->max_size_in_dword += BITSTREAM_ALLOCATE_STEPPING;
bs->buffer = realloc(bs->buffer, bs->max_size_in_dword * sizeof(unsigned int));
if (!bs->buffer)
return;
}
bs->buffer[pos + 1] = val;
}
}
static void
avc_bitstream_put_ue(avc_bitstream *bs, unsigned int val)
{
int size_in_bits = 0;
int tmp_val = ++val;
while (tmp_val) {
tmp_val >>= 1;
size_in_bits++;
}
avc_bitstream_put_ui(bs, 0, size_in_bits - 1); // leading zero
avc_bitstream_put_ui(bs, val, size_in_bits);
}
static void
avc_bitstream_put_se(avc_bitstream *bs, int val)
{
unsigned int new_val;
if (val <= 0)
new_val = -2 * val;
else
new_val = 2 * val - 1;
avc_bitstream_put_ue(bs, new_val);
}
static void
avc_bitstream_byte_aligning(avc_bitstream *bs, int bit)
{
int bit_offset = (bs->bit_offset & 0x7);
int bit_left = 8 - bit_offset;
int new_val;
if (!bit_offset)
return;
assert(bit == 0 || bit == 1);
if (bit)
new_val = (1 << bit_left) - 1;
else
new_val = 0;
avc_bitstream_put_ui(bs, new_val, bit_left);
}
static void avc_rbsp_trailing_bits(avc_bitstream *bs)
{
avc_bitstream_put_ui(bs, 1, 1);
avc_bitstream_byte_aligning(bs, 0);
}
static void nal_start_code_prefix(avc_bitstream *bs)
{
avc_bitstream_put_ui(bs, 0x00000001, 32);
}
static void nal_header(avc_bitstream *bs, int nal_ref_idc, int nal_unit_type)
{
avc_bitstream_put_ui(bs, 0, 1); /* forbidden_zero_bit: 0 */
avc_bitstream_put_ui(bs, nal_ref_idc, 2);
avc_bitstream_put_ui(bs, nal_unit_type, 5);
}
static void
slice_header(avc_bitstream *bs,
VAEncSequenceParameterBufferH264 *sps_param,
VAEncPictureParameterBufferH264 *pic_param,
VAEncSliceParameterBufferH264 *slice_param)
{
int first_mb_in_slice = slice_param->macroblock_address;
avc_bitstream_put_ue(bs, first_mb_in_slice); /* first_mb_in_slice: 0 */
avc_bitstream_put_ue(bs, slice_param->slice_type); /* slice_type */
avc_bitstream_put_ue(bs, slice_param->pic_parameter_set_id); /* pic_parameter_set_id: 0 */
avc_bitstream_put_ui(bs, pic_param->frame_num, sps_param->seq_fields.bits.log2_max_frame_num_minus4 + 4); /* frame_num */
/* frame_mbs_only_flag == 1 */
if (!sps_param->seq_fields.bits.frame_mbs_only_flag) {
/* FIXME: */
assert(0);
}
if (pic_param->pic_fields.bits.idr_pic_flag)
avc_bitstream_put_ue(bs, slice_param->idr_pic_id); /* idr_pic_id: 0 */
if (sps_param->seq_fields.bits.pic_order_cnt_type == 0) {
avc_bitstream_put_ui(bs, pic_param->CurrPic.TopFieldOrderCnt, sps_param->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 + 4);
/* pic_order_present_flag == 0 */
} else {
/* FIXME: */
assert(0);
}
/* redundant_pic_cnt_present_flag == 0 */
/* slice type */
if (IS_P_SLICE(slice_param->slice_type)) {
avc_bitstream_put_ui(bs, slice_param->num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */
if (slice_param->num_ref_idx_active_override_flag)
avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l0_active_minus1);
/* ref_pic_list_reordering */
avc_bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */
} else if (IS_B_SLICE(slice_param->slice_type)) {
avc_bitstream_put_ui(bs, slice_param->direct_spatial_mv_pred_flag, 1); /* direct_spatial_mv_pred: 1 */
avc_bitstream_put_ui(bs, slice_param->num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */
if (slice_param->num_ref_idx_active_override_flag) {
avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l0_active_minus1);
avc_bitstream_put_ue(bs, slice_param->num_ref_idx_l1_active_minus1);
}
/* ref_pic_list_reordering */
avc_bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */
avc_bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l1: 0 */
}
if ((pic_param->pic_fields.bits.weighted_pred_flag &&
IS_P_SLICE(slice_param->slice_type)) ||
((pic_param->pic_fields.bits.weighted_bipred_idc == 1) &&
IS_B_SLICE(slice_param->slice_type))) {
/* FIXME: fill weight/offset table */
assert(0);
}
/* dec_ref_pic_marking */
if (pic_param->pic_fields.bits.reference_pic_flag) { /* nal_ref_idc != 0 */
unsigned char no_output_of_prior_pics_flag = 0;
unsigned char long_term_reference_flag = 0;
unsigned char adaptive_ref_pic_marking_mode_flag = 0;
if (pic_param->pic_fields.bits.idr_pic_flag) {
avc_bitstream_put_ui(bs, no_output_of_prior_pics_flag, 1); /* no_output_of_prior_pics_flag: 0 */
avc_bitstream_put_ui(bs, long_term_reference_flag, 1); /* long_term_reference_flag: 0 */
} else {
avc_bitstream_put_ui(bs, adaptive_ref_pic_marking_mode_flag, 1); /* adaptive_ref_pic_marking_mode_flag: 0 */
}
}
if (pic_param->pic_fields.bits.entropy_coding_mode_flag &&
!IS_I_SLICE(slice_param->slice_type))
avc_bitstream_put_ue(bs, slice_param->cabac_init_idc); /* cabac_init_idc: 0 */
avc_bitstream_put_se(bs, slice_param->slice_qp_delta); /* slice_qp_delta: 0 */
/* ignore for SP/SI */
if (pic_param->pic_fields.bits.deblocking_filter_control_present_flag) {
avc_bitstream_put_ue(bs, slice_param->disable_deblocking_filter_idc); /* disable_deblocking_filter_idc: 0 */
if (slice_param->disable_deblocking_filter_idc != 1) {
avc_bitstream_put_se(bs, slice_param->slice_alpha_c0_offset_div2); /* slice_alpha_c0_offset_div2: 2 */
avc_bitstream_put_se(bs, slice_param->slice_beta_offset_div2); /* slice_beta_offset_div2: 2 */
}
}
if (pic_param->pic_fields.bits.entropy_coding_mode_flag) {
avc_bitstream_byte_aligning(bs, 1);
}
}
int
build_avc_slice_header(VAEncSequenceParameterBufferH264 *sps_param,
VAEncPictureParameterBufferH264 *pic_param,
VAEncSliceParameterBufferH264 *slice_param,
unsigned char **slice_header_buffer)
{
avc_bitstream bs;
int is_idr = !!pic_param->pic_fields.bits.idr_pic_flag;
int is_ref = !!pic_param->pic_fields.bits.reference_pic_flag;
avc_bitstream_start(&bs);
nal_start_code_prefix(&bs);
if (IS_I_SLICE(slice_param->slice_type)) {
nal_header(&bs, NAL_REF_IDC_HIGH, is_idr ? NAL_IDR : NAL_NON_IDR);
} else if (IS_P_SLICE(slice_param->slice_type)) {
assert(!is_idr);
nal_header(&bs, NAL_REF_IDC_MEDIUM, NAL_NON_IDR);
} else {
assert(IS_B_SLICE(slice_param->slice_type));
assert(!is_idr);
nal_header(&bs, is_ref ? NAL_REF_IDC_LOW : NAL_REF_IDC_NONE, NAL_NON_IDR);
}
slice_header(&bs, sps_param, pic_param, slice_param);
avc_bitstream_end(&bs);
*slice_header_buffer = (unsigned char *)bs.buffer;
return bs.bit_offset;
}
int
build_avc_sei_buffering_period(int cpb_removal_length,
unsigned int init_cpb_removal_delay,
unsigned int init_cpb_removal_delay_offset,
unsigned char **sei_buffer)
{
unsigned char *byte_buf;
int byte_size, i;
avc_bitstream nal_bs;
avc_bitstream sei_bs;
avc_bitstream_start(&sei_bs);
avc_bitstream_put_ue(&sei_bs, 0); /*seq_parameter_set_id*/
avc_bitstream_put_ui(&sei_bs, init_cpb_removal_delay, cpb_removal_length);
avc_bitstream_put_ui(&sei_bs, init_cpb_removal_delay_offset, cpb_removal_length);
if ( sei_bs.bit_offset & 0x7) {
avc_bitstream_put_ui(&sei_bs, 1, 1);
}
avc_bitstream_end(&sei_bs);
byte_size = (sei_bs.bit_offset + 7) / 8;
avc_bitstream_start(&nal_bs);
nal_start_code_prefix(&nal_bs);
nal_header(&nal_bs, NAL_REF_IDC_NONE, NAL_SEI);
avc_bitstream_put_ui(&nal_bs, 0, 8);
avc_bitstream_put_ui(&nal_bs, byte_size, 8);
byte_buf = (unsigned char *)sei_bs.buffer;
for(i = 0; i < byte_size; i++) {
avc_bitstream_put_ui(&nal_bs, byte_buf[i], 8);
}
free(byte_buf);
avc_rbsp_trailing_bits(&nal_bs);
avc_bitstream_end(&nal_bs);
*sei_buffer = (unsigned char *)nal_bs.buffer;
return nal_bs.bit_offset;
}
int
build_avc_sei_pic_timing(unsigned int cpb_removal_length, unsigned int cpb_removal_delay,
unsigned int dpb_output_length, unsigned int dpb_output_delay,
unsigned char **sei_buffer)
{
unsigned char *byte_buf;
int byte_size, i;
avc_bitstream nal_bs;
avc_bitstream sei_bs;
avc_bitstream_start(&sei_bs);
avc_bitstream_put_ui(&sei_bs, cpb_removal_delay, cpb_removal_length);
avc_bitstream_put_ui(&sei_bs, dpb_output_delay, dpb_output_length);
if ( sei_bs.bit_offset & 0x7) {
avc_bitstream_put_ui(&sei_bs, 1, 1);
}
avc_bitstream_end(&sei_bs);
byte_size = (sei_bs.bit_offset + 7) / 8;
avc_bitstream_start(&nal_bs);
nal_start_code_prefix(&nal_bs);
nal_header(&nal_bs, NAL_REF_IDC_NONE, NAL_SEI);
avc_bitstream_put_ui(&nal_bs, 0x01, 8);
avc_bitstream_put_ui(&nal_bs, byte_size, 8);
byte_buf = (unsigned char *)sei_bs.buffer;
for(i = 0; i < byte_size; i++) {
avc_bitstream_put_ui(&nal_bs, byte_buf[i], 8);
}
free(byte_buf);
avc_rbsp_trailing_bits(&nal_bs);
avc_bitstream_end(&nal_bs);
*sei_buffer = (unsigned char *)nal_bs.buffer;
return nal_bs.bit_offset;
}
int
build_avc_sei_buffer_timing(unsigned int init_cpb_removal_length,
unsigned int init_cpb_removal_delay,
unsigned int init_cpb_removal_delay_offset,
unsigned int cpb_removal_length,
unsigned int cpb_removal_delay,
unsigned int dpb_output_length,
unsigned int dpb_output_delay,
unsigned char **sei_buffer)
{
unsigned char *byte_buf;
int bp_byte_size, i, pic_byte_size;
avc_bitstream nal_bs;
avc_bitstream sei_bp_bs, sei_pic_bs;
avc_bitstream_start(&sei_bp_bs);
avc_bitstream_put_ue(&sei_bp_bs, 0); /*seq_parameter_set_id*/
avc_bitstream_put_ui(&sei_bp_bs, init_cpb_removal_delay, cpb_removal_length);
avc_bitstream_put_ui(&sei_bp_bs, init_cpb_removal_delay_offset, cpb_removal_length);
if ( sei_bp_bs.bit_offset & 0x7) {
avc_bitstream_put_ui(&sei_bp_bs, 1, 1);
}
avc_bitstream_end(&sei_bp_bs);
bp_byte_size = (sei_bp_bs.bit_offset + 7) / 8;
avc_bitstream_start(&sei_pic_bs);
avc_bitstream_put_ui(&sei_pic_bs, cpb_removal_delay, cpb_removal_length);
avc_bitstream_put_ui(&sei_pic_bs, dpb_output_delay, dpb_output_length);
if ( sei_pic_bs.bit_offset & 0x7) {
avc_bitstream_put_ui(&sei_pic_bs, 1, 1);
}
avc_bitstream_end(&sei_pic_bs);
pic_byte_size = (sei_pic_bs.bit_offset + 7) / 8;
avc_bitstream_start(&nal_bs);
nal_start_code_prefix(&nal_bs);
nal_header(&nal_bs, NAL_REF_IDC_NONE, NAL_SEI);
/* Write the SEI buffer period data */
avc_bitstream_put_ui(&nal_bs, 0, 8);
avc_bitstream_put_ui(&nal_bs, bp_byte_size, 8);
byte_buf = (unsigned char *)sei_bp_bs.buffer;
for(i = 0; i < bp_byte_size; i++) {
avc_bitstream_put_ui(&nal_bs, byte_buf[i], 8);
}
free(byte_buf);
/* write the SEI timing data */
avc_bitstream_put_ui(&nal_bs, 0x01, 8);
avc_bitstream_put_ui(&nal_bs, pic_byte_size, 8);
byte_buf = (unsigned char *)sei_pic_bs.buffer;
for(i = 0; i < pic_byte_size; i++) {
avc_bitstream_put_ui(&nal_bs, byte_buf[i], 8);
}
free(byte_buf);
avc_rbsp_trailing_bits(&nal_bs);
avc_bitstream_end(&nal_bs);
*sei_buffer = (unsigned char *)nal_bs.buffer;
return nal_bs.bit_offset;
}
int
build_mpeg2_slice_header(VAEncSequenceParameterBufferMPEG2 *sps_param,
VAEncPictureParameterBufferMPEG2 *pic_param,
VAEncSliceParameterBufferMPEG2 *slice_param,
unsigned char **slice_header_buffer)
{
avc_bitstream bs;
avc_bitstream_start(&bs);
avc_bitstream_end(&bs);
*slice_header_buffer = (unsigned char *)bs.buffer;
return bs.bit_offset;
}

View File

@@ -0,0 +1,36 @@
#ifndef __I965_ENCODER_UTILS_H__
#define __I965_ENCODER_UTILS_H__
int
build_avc_slice_header(VAEncSequenceParameterBufferH264 *sps_param,
VAEncPictureParameterBufferH264 *pic_param,
VAEncSliceParameterBufferH264 *slice_param,
unsigned char **slice_header_buffer);
int
build_avc_sei_buffering_period(int cpb_removal_length,
unsigned int init_cpb_removal_delay,
unsigned int init_cpb_removal_delay_offset,
unsigned char **sei_buffer);
int
build_avc_sei_pic_timing(unsigned int cpb_removal_length, unsigned int cpb_removal_delay,
unsigned int dpb_output_length, unsigned int dpb_output_delay,
unsigned char **sei_buffer);
int
build_avc_sei_buffer_timing(unsigned int init_cpb_removal_length,
unsigned int init_cpb_removal_delay,
unsigned int init_cpb_removal_delay_offset,
unsigned int cpb_removal_length,
unsigned int cpb_removal_delay,
unsigned int dpb_output_length,
unsigned int dpb_output_delay,
unsigned char **sei_buffer);
int
build_mpeg2_slice_header(VAEncSequenceParameterBufferMPEG2 *sps_param,
VAEncPictureParameterBufferMPEG2 *pic_param,
VAEncSliceParameterBufferMPEG2 *slice_param,
unsigned char **slice_header_buffer);
#endif /* __I965_ENCODER_UTILS_H__ */

View File

@@ -0,0 +1,68 @@
#ifndef _I965_FOURCC_H_
#define _I965_FOURCC_H_
#ifndef VA_FOURCC_YV16
#define VA_FOURCC_YV16 VA_FOURCC('Y','V','1','6')
#endif
#ifndef VA_FOURCC_I420
#define VA_FOURCC_I420 VA_FOURCC('I','4','2','0')
#endif
/*
* VA_FOURCC_IA44 is an exception because the va.h already
* defines the AI44 as VA_FOURCC('I', 'A', '4', '4').
*/
#ifndef VA_FOURCC_IA44
#define VA_FOURCC_IA44 VA_FOURCC('A','I','4','4')
#endif
#ifndef VA_FOURCC_IA88
#define VA_FOURCC_IA88 VA_FOURCC('I','A','8','8')
#endif
#ifndef VA_FOURCC_AI88
#define VA_FOURCC_AI88 VA_FOURCC('A','I','8','8')
#endif
#ifndef VA_FOURCC_IMC1
#define VA_FOURCC_IMC1 VA_FOURCC('I','M','C','1')
#endif
#ifndef VA_FOURCC_YVY2
#define VA_FOURCC_YVY2 VA_FOURCC('Y','V','Y','2')
#endif
#define I965_MAX_PLANES 4
#define I965_MAX_COMONENTS 4
#define I965_COLOR_YUV 0
#define I965_COLOR_RGB 1
#define I965_COLOR_INDEX 2
typedef struct {
uint8_t plane; /* the plane which the pixel belongs to */
uint8_t offset; /* bits offset within a pixel in the plane */
} i965_component_info;
typedef struct {
uint32_t fourcc; /* fourcc */
uint32_t format; /* 0: YUV, 1: RGB, 2: Indexed format */
uint32_t subsampling; /* Sub sampling */
uint8_t flag; /* 1: only supported by vaCreateSurfaces(), 2: only supported by vaCreateImage(), 3: both */
uint8_t hfactor; /* horizontal sampling factor */
uint8_t vfactor; /* vertical sampling factor */
uint8_t num_planes; /* number of planes */
uint8_t bpp[I965_MAX_PLANES]; /* bits per pixel within a plane */
uint8_t num_components; /* number of components */
/*
* Components in the array are ordered in Y, U, V, A (up to 4 components)
* for YUV formats, R, G, B, A (up to 4 components) for RGB formats and
* I, A (2 components) for indexed formats
*/
i965_component_info components[I965_MAX_COMONENTS];
} i965_fourcc_info;
extern const i965_fourcc_info *get_fourcc_info(unsigned int);
#endif /* _I965_FOURCC_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,222 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*/
#ifndef _I965_GPE_UTILS_H_
#define _I965_GPE_UTILS_H_
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_structs.h"
#define MAX_GPE_KERNELS 32
struct i965_buffer_surface
{
dri_bo *bo;
unsigned int num_blocks;
unsigned int size_block;
unsigned int pitch;
};
struct i965_gpe_context
{
struct {
dri_bo *bo;
unsigned int length; /* in bytes */
} surface_state_binding_table;
struct {
dri_bo *bo;
unsigned int max_entries;
unsigned int entry_size; /* in bytes */
} idrt;
struct {
dri_bo *bo;
unsigned int length; /* in bytes */
} curbe;
struct {
unsigned int gpgpu_mode : 1;
unsigned int pad0 : 7;
unsigned int max_num_threads : 16;
unsigned int num_urb_entries : 8;
unsigned int urb_entry_size : 16;
unsigned int curbe_allocation_size : 16;
} vfe_state;
/* vfe_desc5/6/7 is used to determine whether the HW scoreboard is used.
* If scoreboard is not used, don't touch them
*/
union {
unsigned int dword;
struct {
unsigned int mask:8;
unsigned int pad:22;
unsigned int type:1;
unsigned int enable:1;
} scoreboard0;
}vfe_desc5;
union {
unsigned int dword;
struct {
int delta_x0:4;
int delta_y0:4;
int delta_x1:4;
int delta_y1:4;
int delta_x2:4;
int delta_y2:4;
int delta_x3:4;
int delta_y3:4;
} scoreboard1;
} vfe_desc6;
union {
unsigned int dword;
struct {
int delta_x4:4;
int delta_y4:4;
int delta_x5:4;
int delta_y5:4;
int delta_x6:4;
int delta_y6:4;
int delta_x7:4;
int delta_y7:4;
} scoreboard2;
} vfe_desc7;
unsigned int num_kernels;
struct i965_kernel kernels[MAX_GPE_KERNELS];
struct {
dri_bo *bo;
int bo_size;
unsigned int end_offset;
} instruction_state;
struct {
dri_bo *bo;
} indirect_state;
struct {
dri_bo *bo;
int bo_size;
unsigned int end_offset;
} dynamic_state;
unsigned int sampler_offset;
int sampler_size;
unsigned int idrt_offset;
int idrt_size;
unsigned int curbe_offset;
int curbe_size;
};
void i965_gpe_context_destroy(struct i965_gpe_context *gpe_context);
void i965_gpe_context_init(VADriverContextP ctx,
struct i965_gpe_context *gpe_context);
void i965_gpe_load_kernels(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct i965_kernel *kernel_list,
unsigned int num_kernels);
void gen6_gpe_pipeline_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch);
void i965_gpe_surface2_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void i965_gpe_buffer_suface_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct i965_buffer_surface *buffer_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void gen7_gpe_surface2_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct i965_buffer_surface *buffer_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
extern void gen8_gpe_surface2_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
extern void gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
extern void gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct i965_buffer_surface *buffer_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
extern void gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct object_surface *obj_surface,
unsigned long binding_table_offset,
unsigned long surface_state_offset);
void gen8_gpe_pipeline_setup(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch);
void gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context);
void gen8_gpe_context_init(VADriverContextP ctx,
struct i965_gpe_context *gpe_context);
void gen8_gpe_load_kernels(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct i965_kernel *kernel_list,
unsigned int num_kernels);
#endif /* _I965_GPE_UTILS_H_ */

View File

@@ -0,0 +1,396 @@
/*
* Copyright <20> 2009 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
* Zou Nan hai <nanhai.zou@intel.com>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_media.h"
#include "i965_media_mpeg2.h"
#include "i965_media_h264.h"
#include "i965_decoder_utils.h"
static void
i965_media_pipeline_select(VADriverContextP ctx, struct i965_media_context *media_context)
{
struct intel_batchbuffer *batch = media_context->base.batch;
BEGIN_BATCH(batch, 1);
OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
ADVANCE_BATCH(batch);
}
static void
i965_media_urb_layout(VADriverContextP ctx, struct i965_media_context *media_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = media_context->base.batch;
unsigned int vfe_fence, cs_fence;
vfe_fence = media_context->urb.cs_start;
cs_fence = i965->intel.device_info->urb_size;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
OUT_BATCH(batch, 0);
OUT_BATCH(batch,
(vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */
(cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */
ADVANCE_BATCH(batch);
}
static void
i965_media_state_base_address(VADriverContextP ctx, struct i965_media_context *media_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = media_context->base.batch;
if (IS_IRONLAKE(i965->intel.device_info)) {
BEGIN_BATCH(batch, 8);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
if (media_context->indirect_object.bo) {
OUT_RELOC(batch, media_context->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
media_context->indirect_object.offset | BASE_ADDRESS_MODIFY);
} else {
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
}
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
ADVANCE_BATCH(batch);
} else {
BEGIN_BATCH(batch, 6);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
if (media_context->indirect_object.bo) {
OUT_RELOC(batch, media_context->indirect_object.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
media_context->indirect_object.offset | BASE_ADDRESS_MODIFY);
} else {
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
}
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
ADVANCE_BATCH(batch);
}
}
static void
i965_media_state_pointers(VADriverContextP ctx, struct i965_media_context *media_context)
{
struct intel_batchbuffer *batch = media_context->base.batch;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
if (media_context->extended_state.enabled)
OUT_RELOC(batch, media_context->extended_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
else
OUT_BATCH(batch, 0);
OUT_RELOC(batch, media_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
ADVANCE_BATCH(batch);
}
static void
i965_media_cs_urb_layout(VADriverContextP ctx, struct i965_media_context *media_context)
{
struct intel_batchbuffer *batch = media_context->base.batch;
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
OUT_BATCH(batch,
((media_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */
(media_context->urb.num_cs_entries << 0)); /* Number of URB Entries */
ADVANCE_BATCH(batch);
}
static void
i965_media_pipeline_state(VADriverContextP ctx, struct i965_media_context *media_context)
{
i965_media_state_base_address(ctx, media_context);
i965_media_state_pointers(ctx, media_context);
i965_media_cs_urb_layout(ctx, media_context);
}
static void
i965_media_constant_buffer(VADriverContextP ctx, struct decode_state *decode_state, struct i965_media_context *media_context)
{
struct intel_batchbuffer *batch = media_context->base.batch;
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
OUT_RELOC(batch, media_context->curbe.bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
media_context->urb.size_cs_entry - 1);
ADVANCE_BATCH(batch);
}
static void
i965_media_depth_buffer(VADriverContextP ctx, struct i965_media_context *media_context)
{
struct intel_batchbuffer *batch = media_context->base.batch;
BEGIN_BATCH(batch, 6);
OUT_BATCH(batch, CMD_DEPTH_BUFFER | 4);
OUT_BATCH(batch, (I965_DEPTHFORMAT_D32_FLOAT << 18) |
(I965_SURFACE_NULL << 29));
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
ADVANCE_BATCH(batch);
}
static void
i965_media_pipeline_setup(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_media_context *media_context)
{
struct intel_batchbuffer *batch = media_context->base.batch;
intel_batchbuffer_start_atomic(batch, 0x1000);
intel_batchbuffer_emit_mi_flush(batch); /* step 1 */
i965_media_depth_buffer(ctx, media_context);
i965_media_pipeline_select(ctx, media_context); /* step 2 */
i965_media_urb_layout(ctx, media_context); /* step 3 */
i965_media_pipeline_state(ctx, media_context); /* step 4 */
i965_media_constant_buffer(ctx, decode_state, media_context); /* step 5 */
assert(media_context->media_objects);
media_context->media_objects(ctx, decode_state, media_context); /* step 6 */
intel_batchbuffer_end_atomic(batch);
}
static void
i965_media_decode_init(VADriverContextP ctx,
VAProfile profile,
struct decode_state *decode_state,
struct i965_media_context *media_context)
{
int i;
struct i965_driver_data *i965 = i965_driver_data(ctx);
dri_bo *bo;
/* constant buffer */
dri_bo_unreference(media_context->curbe.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"constant buffer",
4096, 64);
assert(bo);
media_context->curbe.bo = bo;
/* surface state */
for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
dri_bo_unreference(media_context->surface_state[i].bo);
media_context->surface_state[i].bo = NULL;
}
/* binding table */
dri_bo_unreference(media_context->binding_table.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"binding table",
MAX_MEDIA_SURFACES * sizeof(unsigned int), 32);
assert(bo);
media_context->binding_table.bo = bo;
/* interface descriptor remapping table */
dri_bo_unreference(media_context->idrt.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"interface discriptor",
MAX_INTERFACE_DESC * sizeof(struct i965_interface_descriptor), 16);
assert(bo);
media_context->idrt.bo = bo;
/* vfe state */
dri_bo_unreference(media_context->vfe_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"vfe state",
sizeof(struct i965_vfe_state), 32);
assert(bo);
media_context->vfe_state.bo = bo;
/* extended state */
media_context->extended_state.enabled = 0;
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
i965_media_mpeg2_decode_init(ctx, decode_state, media_context);
break;
case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
i965_media_h264_decode_init(ctx, decode_state, media_context);
break;
default:
assert(0);
break;
}
}
static VAStatus
i965_media_decode_picture(VADriverContextP ctx,
VAProfile profile,
union codec_state *codec_state,
struct hw_context *hw_context)
{
struct i965_media_context *media_context = (struct i965_media_context *)hw_context;
struct decode_state *decode_state = &codec_state->decode;
VAStatus vaStatus;
vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
if (vaStatus != VA_STATUS_SUCCESS)
goto out;
i965_media_decode_init(ctx, profile, decode_state, media_context);
assert(media_context->media_states_setup);
media_context->media_states_setup(ctx, decode_state, media_context);
i965_media_pipeline_setup(ctx, decode_state, media_context);
intel_batchbuffer_flush(hw_context->batch);
vaStatus = VA_STATUS_SUCCESS;
out:
return vaStatus;
}
static void
i965_media_context_destroy(void *hw_context)
{
struct i965_media_context *media_context = (struct i965_media_context *)hw_context;
int i;
if (media_context->free_private_context)
media_context->free_private_context(&media_context->private_context);
for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
dri_bo_unreference(media_context->surface_state[i].bo);
media_context->surface_state[i].bo = NULL;
}
dri_bo_unreference(media_context->extended_state.bo);
media_context->extended_state.bo = NULL;
dri_bo_unreference(media_context->vfe_state.bo);
media_context->vfe_state.bo = NULL;
dri_bo_unreference(media_context->idrt.bo);
media_context->idrt.bo = NULL;
dri_bo_unreference(media_context->binding_table.bo);
media_context->binding_table.bo = NULL;
dri_bo_unreference(media_context->curbe.bo);
media_context->curbe.bo = NULL;
dri_bo_unreference(media_context->indirect_object.bo);
media_context->indirect_object.bo = NULL;
intel_batchbuffer_free(media_context->base.batch);
free(media_context);
}
struct hw_context *
g4x_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
struct i965_media_context *media_context = calloc(1, sizeof(struct i965_media_context));
media_context->base.destroy = i965_media_context_destroy;
media_context->base.run = i965_media_decode_picture;
media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
switch (obj_config->profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
i965_media_mpeg2_dec_context_init(ctx, media_context);
break;
case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileVC1Simple:
case VAProfileVC1Main:
case VAProfileVC1Advanced:
default:
assert(0);
break;
}
return (struct hw_context *)media_context;
}
struct hw_context *
ironlake_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
struct i965_media_context *media_context = calloc(1, sizeof(struct i965_media_context));
media_context->base.destroy = i965_media_context_destroy;
media_context->base.run = i965_media_decode_picture;
media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
switch (obj_config->profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
i965_media_mpeg2_dec_context_init(ctx, media_context);
break;
case VAProfileH264ConstrainedBaseline:
case VAProfileH264Main:
case VAProfileH264High:
i965_media_h264_dec_context_init(ctx, media_context);
break;
case VAProfileVC1Simple:
case VAProfileVC1Main:
case VAProfileVC1Advanced:
default:
assert(0);
break;
}
return (struct hw_context *)media_context;
}

View File

@@ -0,0 +1,96 @@
/*
* Copyright <20> 2009 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
* Zou Nan hai <nanhai.zou@intel.com>
*
*/
#ifndef _I965_MEDIA_H_
#define _I965_MEDIA_H_
#include <xf86drm.h>
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include "i965_structs.h"
#define MAX_INTERFACE_DESC 16
#define MAX_MEDIA_SURFACES 34
struct decode_state;
struct i965_media_context
{
struct hw_context base;
struct {
dri_bo *bo;
} surface_state[MAX_MEDIA_SURFACES];
struct {
dri_bo *bo;
} binding_table;
struct {
dri_bo *bo;
} idrt; /* interface descriptor remap table */
struct {
dri_bo *bo;
int enabled;
} extended_state;
struct {
dri_bo *bo;
} vfe_state;
struct {
dri_bo *bo;
} curbe;
struct {
dri_bo *bo;
unsigned long offset;
} indirect_object;
struct {
unsigned int vfe_start;
unsigned int cs_start;
unsigned int num_vfe_entries;
unsigned int num_cs_entries;
unsigned int size_vfe_entry;
unsigned int size_cs_entry;
} urb;
void *private_context;
void (*media_states_setup)(VADriverContextP ctx, struct decode_state *decode_state, struct i965_media_context *media_context);
void (*media_objects)(VADriverContextP ctx, struct decode_state *decode_state, struct i965_media_context *media_context);
void (*free_private_context)(void **data);
};
#endif /* _I965_MEDIA_H_ */

View File

@@ -0,0 +1,901 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_media.h"
#include "i965_media_h264.h"
#include "i965_decoder_utils.h"
enum {
INTRA_16X16 = 0,
INTRA_8X8,
INTRA_4X4,
INTRA_PCM,
FRAMEMB_MOTION,
FIELDMB_MOTION,
MBAFF_MOTION,
};
struct intra_kernel_header
{
/* R1.0 */
unsigned char intra_4x4_luma_mode_0_offset;
unsigned char intra_4x4_luma_mode_1_offset;
unsigned char intra_4x4_luma_mode_2_offset;
unsigned char intra_4x4_luma_mode_3_offset;
/* R1.1 */
unsigned char intra_4x4_luma_mode_4_offset;
unsigned char intra_4x4_luma_mode_5_offset;
unsigned char intra_4x4_luma_mode_6_offset;
unsigned char intra_4x4_luma_mode_7_offset;
/* R1.2 */
unsigned char intra_4x4_luma_mode_8_offset;
unsigned char pad0;
unsigned short top_reference_offset;
/* R1.3 */
unsigned char intra_8x8_luma_mode_0_offset;
unsigned char intra_8x8_luma_mode_1_offset;
unsigned char intra_8x8_luma_mode_2_offset;
unsigned char intra_8x8_luma_mode_3_offset;
/* R1.4 */
unsigned char intra_8x8_luma_mode_4_offset;
unsigned char intra_8x8_luma_mode_5_offset;
unsigned char intra_8x8_luma_mode_6_offset;
unsigned char intra_8x8_luma_mode_7_offset;
/* R1.5 */
unsigned char intra_8x8_luma_mode_8_offset;
unsigned char pad1;
unsigned short const_reverse_data_transfer_intra_8x8;
/* R1.6 */
unsigned char intra_16x16_luma_mode_0_offset;
unsigned char intra_16x16_luma_mode_1_offset;
unsigned char intra_16x16_luma_mode_2_offset;
unsigned char intra_16x16_luma_mode_3_offset;
/* R1.7 */
unsigned char intra_chroma_mode_0_offset;
unsigned char intra_chroma_mode_1_offset;
unsigned char intra_chroma_mode_2_offset;
unsigned char intra_chroma_mode_3_offset;
/* R2.0 */
unsigned int const_intra_16x16_plane_0;
/* R2.1 */
unsigned int const_intra_16x16_chroma_plane_0;
/* R2.2 */
unsigned int const_intra_16x16_chroma_plane_1;
/* R2.3 */
unsigned int const_intra_16x16_plane_1;
/* R2.4 */
unsigned int left_shift_count_reverse_dw_ordering;
/* R2.5 */
unsigned int const_reverse_data_transfer_intra_4x4;
/* R2.6 */
unsigned int intra_4x4_pred_mode_offset;
};
struct inter_kernel_header
{
unsigned short weight_offset;
unsigned char weight_offset_flag;
unsigned char pad0;
};
#include "shaders/h264/mc/export.inc"
static unsigned long avc_mc_kernel_offset_gen4[] = {
INTRA_16x16_IP * INST_UNIT_GEN4,
INTRA_8x8_IP * INST_UNIT_GEN4,
INTRA_4x4_IP * INST_UNIT_GEN4,
INTRA_PCM_IP * INST_UNIT_GEN4,
FRAME_MB_IP * INST_UNIT_GEN4,
FIELD_MB_IP * INST_UNIT_GEN4,
MBAFF_MB_IP * INST_UNIT_GEN4
};
struct intra_kernel_header intra_kernel_header_gen4 = {
0,
(INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP),
(INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP),
(INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP),
(INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
(INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
(INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP),
(INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP),
(INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP),
0,
0xFFFC,
0,
(INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP),
(INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP),
(INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP),
(INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
(INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
(INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP),
(INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP),
(INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP),
0,
0x0001,
0,
(INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP),
(INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP),
(INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP),
0,
(INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP),
(INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP),
(INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP),
0xFCFBFAF9,
0x00FFFEFD,
0x04030201,
0x08070605,
0x18100800,
0x00020406,
(intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) * 0x1000000 +
(intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) * 0x10000 +
(intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) * 0x100 +
(intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP)
};
static const uint32_t h264_avc_combined_gen4[][4] = {
#include "shaders/h264/mc/avc_mc.g4b"
};
static const uint32_t h264_avc_null_gen4[][4] = {
#include "shaders/h264/mc/null.g4b"
};
static struct i965_kernel h264_avc_kernels_gen4[] = {
{
"AVC combined kernel",
H264_AVC_COMBINED,
h264_avc_combined_gen4,
sizeof(h264_avc_combined_gen4),
NULL
},
{
"NULL kernel",
H264_AVC_NULL,
h264_avc_null_gen4,
sizeof(h264_avc_null_gen4),
NULL
}
};
/* On Ironlake */
#include "shaders/h264/mc/export.inc.gen5"
static unsigned long avc_mc_kernel_offset_gen5[] = {
INTRA_16x16_IP_GEN5 * INST_UNIT_GEN5,
INTRA_8x8_IP_GEN5 * INST_UNIT_GEN5,
INTRA_4x4_IP_GEN5 * INST_UNIT_GEN5,
INTRA_PCM_IP_GEN5 * INST_UNIT_GEN5,
FRAME_MB_IP_GEN5 * INST_UNIT_GEN5,
FIELD_MB_IP_GEN5 * INST_UNIT_GEN5,
MBAFF_MB_IP_GEN5 * INST_UNIT_GEN5
};
struct intra_kernel_header intra_kernel_header_gen5 = {
0,
(INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
(INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
(INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
(INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
(INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
(INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
(INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
(INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
0,
0xFFFC,
0,
(INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
(INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
(INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
(INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
(INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
(INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
(INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
(INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
0,
0x0001,
0,
(INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
(INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
(INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
0,
(INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
(INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
(INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
0xFCFBFAF9,
0x00FFFEFD,
0x04030201,
0x08070605,
0x18100800,
0x00020406,
(intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) * 0x1000000 +
(intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) * 0x10000 +
(intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) * 0x100 +
(intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5)
};
static const uint32_t h264_avc_combined_gen5[][4] = {
#include "shaders/h264/mc/avc_mc.g4b.gen5"
};
static const uint32_t h264_avc_null_gen5[][4] = {
#include "shaders/h264/mc/null.g4b.gen5"
};
static struct i965_kernel h264_avc_kernels_gen5[] = {
{
"AVC combined kernel",
H264_AVC_COMBINED,
h264_avc_combined_gen5,
sizeof(h264_avc_combined_gen5),
NULL
},
{
"NULL kernel",
H264_AVC_NULL,
h264_avc_null_gen5,
sizeof(h264_avc_null_gen5),
NULL
}
};
#define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
static unsigned long *avc_mc_kernel_offset = NULL;
static struct intra_kernel_header *intra_kernel_header = NULL;
static void
i965_media_h264_surface_state(VADriverContextP ctx,
int index,
struct object_surface *obj_surface,
unsigned long offset,
int w, int h, int pitch,
Bool is_dst,
int vert_line_stride,
int vert_line_stride_ofs,
int format,
struct i965_media_context *media_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_surface_state *ss;
dri_bo *bo;
uint32_t write_domain, read_domain;
assert(obj_surface->bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state",
sizeof(struct i965_surface_state), 32);
assert(bo);
dri_bo_map(bo, 1);
assert(bo->virtual);
ss = bo->virtual;
memset(ss, 0, sizeof(*ss));
ss->ss0.surface_type = I965_SURFACE_2D;
ss->ss0.surface_format = format;
ss->ss0.vert_line_stride = vert_line_stride;
ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
ss->ss1.base_addr = obj_surface->bo->offset + offset;
ss->ss2.width = w - 1;
ss->ss2.height = h - 1;
ss->ss3.pitch = pitch - 1;
if (is_dst) {
write_domain = I915_GEM_DOMAIN_RENDER;
read_domain = I915_GEM_DOMAIN_RENDER;
} else {
write_domain = 0;
read_domain = I915_GEM_DOMAIN_SAMPLER;
}
dri_bo_emit_reloc(bo,
read_domain, write_domain,
offset,
offsetof(struct i965_surface_state, ss1),
obj_surface->bo);
dri_bo_unmap(bo);
assert(index < MAX_MEDIA_SURFACES);
media_context->surface_state[index].bo = bo;
}
static void
i965_media_h264_surfaces_setup(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_media_context *media_context)
{
struct i965_h264_context *i965_h264_context;
struct object_surface *obj_surface;
VAPictureParameterBufferH264 *pic_param;
VAPictureH264 *va_pic;
int i, w, h;
int field_picture;
assert(media_context->private_context);
i965_h264_context = (struct i965_h264_context *)media_context->private_context;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
/* Target Picture */
va_pic = &pic_param->CurrPic;
obj_surface = decode_state->render_object;
w = obj_surface->width;
h = obj_surface->height;
field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
i965_media_h264_surface_state(ctx, 0, obj_surface,
0, w / 4, h / (1 + field_picture), w,
1,
field_picture,
!!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
I965_SURFACEFORMAT_R8_SINT, /* Y */
media_context);
i965_media_h264_surface_state(ctx, 1, obj_surface,
w * h, w / 4, h / 2 / (1 + field_picture), w,
1,
field_picture,
!!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
media_context);
/* Reference Pictures */
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
struct object_surface * const obj_surface =
i965_h264_context->fsid_list[i].obj_surface;
if (obj_surface) {
const VAPictureH264 * const va_pic = avc_find_picture(
obj_surface->base.id, pic_param->ReferenceFrames,
ARRAY_ELEMS(pic_param->ReferenceFrames));
assert(va_pic != NULL);
w = obj_surface->width;
h = obj_surface->height;
field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
0, w / 4, h / (1 + field_picture), w,
0,
field_picture,
!!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
I965_SURFACEFORMAT_R8_SINT, /* Y */
media_context);
i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
w * h, w / 4, h / 2 / (1 + field_picture), w,
0,
field_picture,
!!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
media_context);
}
}
}
static void
i965_media_h264_binding_table(VADriverContextP ctx, struct i965_media_context *media_context)
{
int i;
unsigned int *binding_table;
dri_bo *bo = media_context->binding_table.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
binding_table = bo->virtual;
memset(binding_table, 0, bo->size);
for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
if (media_context->surface_state[i].bo) {
binding_table[i] = media_context->surface_state[i].bo->offset;
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
i * sizeof(*binding_table),
media_context->surface_state[i].bo);
}
}
dri_bo_unmap(media_context->binding_table.bo);
}
static void
i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i965_media_context *media_context)
{
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_context->private_context;
struct i965_interface_descriptor *desc;
int i;
dri_bo *bo;
bo = media_context->idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
desc = bo->virtual;
for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
int kernel_offset = avc_mc_kernel_offset[i];
memset(desc, 0, sizeof(*desc));
desc->desc0.grf_reg_blocks = 7;
desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
desc->desc1.const_urb_entry_read_offset = 0;
desc->desc1.const_urb_entry_read_len = 2;
desc->desc3.binding_table_entry_count = 0;
desc->desc3.binding_table_pointer =
media_context->binding_table.bo->offset >> 5; /*reloc */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
desc->desc0.grf_reg_blocks + kernel_offset,
i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo);
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
desc->desc3.binding_table_entry_count,
i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
media_context->binding_table.bo);
desc++;
}
dri_bo_unmap(bo);
}
static void
i965_media_h264_vfe_state(VADriverContextP ctx, struct i965_media_context *media_context)
{
struct i965_vfe_state *vfe_state;
dri_bo *bo;
bo = media_context->vfe_state.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
vfe_state = bo->virtual;
memset(vfe_state, 0, sizeof(*vfe_state));
vfe_state->vfe0.extend_vfe_state_present = 1;
vfe_state->vfe1.max_threads = media_context->urb.num_vfe_entries - 1;
vfe_state->vfe1.urb_entry_alloc_size = media_context->urb.size_vfe_entry - 1;
vfe_state->vfe1.num_urb_entries = media_context->urb.num_vfe_entries;
vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
vfe_state->vfe1.children_present = 0;
vfe_state->vfe2.interface_descriptor_base =
media_context->idrt.bo->offset >> 4; /* reloc */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0,
offsetof(struct i965_vfe_state, vfe2),
media_context->idrt.bo);
dri_bo_unmap(bo);
}
static void
i965_media_h264_vfe_state_extension(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_media_context *media_context)
{
struct i965_h264_context *i965_h264_context;
struct i965_vfe_state_ex *vfe_state_ex;
VAPictureParameterBufferH264 *pic_param;
int mbaff_frame_flag;
assert(media_context->private_context);
i965_h264_context = (struct i965_h264_context *)media_context->private_context;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
!pic_param->pic_fields.bits.field_pic_flag);
assert(media_context->extended_state.bo);
dri_bo_map(media_context->extended_state.bo, 1);
assert(media_context->extended_state.bo->virtual);
vfe_state_ex = media_context->extended_state.bo->virtual;
memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
/*
* Indirect data buffer:
* --------------------------------------------------------
* | Motion Vectors | Weight/Offset data | Residual data |
* --------------------------------------------------------
* R4-R7: Motion Vectors
* R8-R9: Weight/Offset
* R10-R33: Residual data
*/
vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
if (i965_h264_context->picture.i_flag) {
vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
} else {
vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
}
if (!pic_param->pic_fields.bits.field_pic_flag) {
if (mbaff_frame_flag) {
vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
} else {
vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
}
} else {
vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
}
if (i965_h264_context->use_avc_hw_scoreboard) {
vfe_state_ex->scoreboard0.enable = 1;
vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
vfe_state_ex->scoreboard0.mask = 0xff;
vfe_state_ex->scoreboard1.delta_x0 = -1;
vfe_state_ex->scoreboard1.delta_y0 = 0;
vfe_state_ex->scoreboard1.delta_x1 = 0;
vfe_state_ex->scoreboard1.delta_y1 = -1;
vfe_state_ex->scoreboard1.delta_x2 = 1;
vfe_state_ex->scoreboard1.delta_y2 = -1;
vfe_state_ex->scoreboard1.delta_x3 = -1;
vfe_state_ex->scoreboard1.delta_y3 = -1;
vfe_state_ex->scoreboard2.delta_x4 = -1;
vfe_state_ex->scoreboard2.delta_y4 = 1;
vfe_state_ex->scoreboard2.delta_x5 = 0;
vfe_state_ex->scoreboard2.delta_y5 = -2;
vfe_state_ex->scoreboard2.delta_x6 = 1;
vfe_state_ex->scoreboard2.delta_y6 = -2;
vfe_state_ex->scoreboard2.delta_x7 = -1;
vfe_state_ex->scoreboard2.delta_y7 = -2;
}
dri_bo_unmap(media_context->extended_state.bo);
}
static void
i965_media_h264_upload_constants(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_media_context *media_context)
{
struct i965_h264_context *i965_h264_context;
unsigned char *constant_buffer;
VASliceParameterBufferH264 *slice_param;
assert(media_context->private_context);
i965_h264_context = (struct i965_h264_context *)media_context->private_context;
assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
dri_bo_map(media_context->curbe.bo, 1);
assert(media_context->curbe.bo->virtual);
constant_buffer = media_context->curbe.bo->virtual;
/* HW solution for W=128 */
if (i965_h264_context->use_hw_w128) {
memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
} else {
if (slice_param->slice_type == SLICE_TYPE_I ||
slice_param->slice_type == SLICE_TYPE_SI) {
memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
} else {
/* FIXME: Need to upload CURBE data to inter kernel interface
* to support weighted prediction work-around
*/
*(short *)constant_buffer = i965_h264_context->weight128_offset0;
constant_buffer += 2;
*(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
constant_buffer++;
*constant_buffer = 0;
}
}
dri_bo_unmap(media_context->curbe.bo);
}
static void
i965_media_h264_states_setup(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_media_context *media_context)
{
struct i965_h264_context *i965_h264_context;
assert(media_context->private_context);
i965_h264_context = (struct i965_h264_context *)media_context->private_context;
i965_avc_bsd_pipeline(ctx, decode_state, i965_h264_context);
if (i965_h264_context->use_avc_hw_scoreboard)
i965_avc_hw_scoreboard(ctx, decode_state, i965_h264_context);
i965_media_h264_surfaces_setup(ctx, decode_state, media_context);
i965_media_h264_binding_table(ctx, media_context);
i965_media_h264_interface_descriptor_remap_table(ctx, media_context);
i965_media_h264_vfe_state_extension(ctx, decode_state, media_context);
i965_media_h264_vfe_state(ctx, media_context);
i965_media_h264_upload_constants(ctx, decode_state, media_context);
}
static void
i965_media_h264_objects(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_media_context *media_context)
{
struct intel_batchbuffer *batch = media_context->base.batch;
struct i965_h264_context *i965_h264_context;
unsigned int *object_command;
assert(media_context->private_context);
i965_h264_context = (struct i965_h264_context *)media_context->private_context;
dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
*object_command++ = 0;
*object_command = MI_BATCH_BUFFER_END;
dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
OUT_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo,
I915_GEM_DOMAIN_COMMAND, 0,
0);
ADVANCE_BATCH(batch);
/* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
* will cause control to pass back to ring buffer
*/
intel_batchbuffer_end_atomic(batch);
intel_batchbuffer_flush(batch);
intel_batchbuffer_start_atomic(batch, 0x1000);
i965_avc_ildb(ctx, decode_state, i965_h264_context);
}
static void
i965_media_h264_free_private_context(void **data)
{
struct i965_h264_context *i965_h264_context = *data;
int i;
if (i965_h264_context == NULL)
return;
i965_avc_ildb_ternimate(&i965_h264_context->avc_ildb_context);
i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
dri_bo_unreference(i965_h264_context->avc_it_data.bo);
dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
dri_bo_unreference(kernel->bo);
kernel->bo = NULL;
}
free(i965_h264_context);
*data = NULL;
}
void
i965_media_h264_decode_init(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_media_context *media_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_h264_context *i965_h264_context = media_context->private_context;
dri_bo *bo;
VAPictureParameterBufferH264 *pic_param;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) /
(1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
!pic_param->pic_fields.bits.field_pic_flag);
i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs *
i965_h264_context->picture.height_in_mbs);
dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"avc it command mb info",
i965_h264_context->avc_it_command_mb_info.mbs * MB_CMD_IN_BYTES * (1 + i965_h264_context->use_avc_hw_scoreboard) + 8,
0x1000);
assert(bo);
i965_h264_context->avc_it_command_mb_info.bo = bo;
dri_bo_unreference(i965_h264_context->avc_it_data.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"avc it data",
i965_h264_context->avc_it_command_mb_info.mbs *
0x800 *
(1 + !!pic_param->pic_fields.bits.field_pic_flag),
0x1000);
assert(bo);
i965_h264_context->avc_it_data.bo = bo;
i965_h264_context->avc_it_data.write_offset = 0;
dri_bo_unreference(media_context->indirect_object.bo);
media_context->indirect_object.bo = bo;
dri_bo_reference(media_context->indirect_object.bo);
media_context->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"AVC-ILDB Data Buffer",
i965_h264_context->avc_it_command_mb_info.mbs * 64 * 2,
0x1000);
assert(bo);
i965_h264_context->avc_ildb_data.bo = bo;
/* bsd pipeline */
i965_avc_bsd_decode_init(ctx, i965_h264_context);
/* HW scoreboard */
if (i965_h264_context->use_avc_hw_scoreboard)
i965_avc_hw_scoreboard_decode_init(ctx, i965_h264_context);
/* ILDB */
i965_avc_ildb_decode_init(ctx, i965_h264_context);
/* for Media pipeline */
media_context->extended_state.enabled = 1;
dri_bo_unreference(media_context->extended_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"extened vfe state",
sizeof(struct i965_vfe_state_ex), 32);
assert(bo);
media_context->extended_state.bo = bo;
}
void
i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_h264_context *i965_h264_context;
int i;
i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
/* kernel */
assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) /
sizeof(h264_avc_kernels_gen5[0])));
assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
sizeof(avc_mc_kernel_offset_gen5[0])));
if (IS_IRONLAKE(i965->intel.device_info)) {
memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels));
avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
intra_kernel_header = &intra_kernel_header_gen5;
i965_h264_context->use_avc_hw_scoreboard = 1;
i965_h264_context->use_hw_w128 = 1;
} else {
memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen4, sizeof(i965_h264_context->avc_kernels));
avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
intra_kernel_header = &intra_kernel_header_gen4;
i965_h264_context->use_avc_hw_scoreboard = 0;
i965_h264_context->use_hw_w128 = 0;
}
for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
kernel->name,
kernel->size, 0x1000);
assert(kernel->bo);
dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
}
for (i = 0; i < 16; i++) {
i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
i965_h264_context->fsid_list[i].frame_store_id = -1;
}
i965_h264_context->batch = media_context->base.batch;
media_context->private_context = i965_h264_context;
media_context->free_private_context = i965_media_h264_free_private_context;
/* URB */
if (IS_IRONLAKE(i965->intel.device_info)) {
media_context->urb.num_vfe_entries = 63;
} else {
media_context->urb.num_vfe_entries = 23;
}
media_context->urb.size_vfe_entry = 16;
media_context->urb.num_cs_entries = 1;
media_context->urb.size_cs_entry = 1;
media_context->urb.vfe_start = 0;
media_context->urb.cs_start = media_context->urb.vfe_start +
media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
assert(media_context->urb.cs_start +
media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
/* hook functions */
media_context->media_states_setup = i965_media_h264_states_setup;
media_context->media_objects = i965_media_h264_objects;
}

View File

@@ -0,0 +1,74 @@
#ifndef _I965_MEDIA_H264_H_
#define _I965_MEDIA_H264_H_
#include "i965_avc_bsd.h"
#include "i965_avc_hw_scoreboard.h"
#include "i965_avc_ildb.h"
#include "i965_decoder.h"
struct decode_state;
struct i965_media_context;
#define INST_UNIT_GEN4 16
#define INST_UNIT_GEN5 8
#define MB_CMD_IN_BYTES 64
#define MB_CMD_IN_DWS 16
#define MB_CMD_IN_OWS 4
enum {
H264_AVC_COMBINED = 0,
H264_AVC_NULL
};
#define NUM_H264_AVC_KERNELS 2
struct i965_h264_context
{
struct {
dri_bo *bo;
unsigned int mbs;
} avc_it_command_mb_info;
struct {
dri_bo *bo;
long write_offset;
} avc_it_data;
struct {
dri_bo *bo;
} avc_ildb_data;
struct {
unsigned int width_in_mbs;
unsigned int height_in_mbs;
int mbaff_frame_flag;
int i_flag;
} picture;
int enable_avc_ildb;
int use_avc_hw_scoreboard;
int use_hw_w128;
unsigned int weight128_luma_l0;
unsigned int weight128_luma_l1;
unsigned int weight128_chroma_l0;
unsigned int weight128_chroma_l1;
char weight128_offset0_flag;
short weight128_offset0;
struct i965_avc_bsd_context i965_avc_bsd_context;
struct i965_avc_hw_scoreboard_context avc_hw_scoreboard_context;
struct i965_avc_ildb_context avc_ildb_context;
GenFrameStoreContext fs_ctx;
GenFrameStore fsid_list[MAX_GEN_REFERENCE_FRAMES];
struct i965_kernel avc_kernels[NUM_H264_AVC_KERNELS];
struct intel_batchbuffer *batch;
};
void i965_media_h264_decode_init(VADriverContextP ctx, struct decode_state *decode_state, struct i965_media_context *media_context);
void i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context);
#endif /* _I965_MEDIA_H264_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,53 @@
/*
* Copyright <20> 2009 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
* Zou Nan hai <nanhai.zou@intel.com>
*
*/
#ifndef _I965_MEDIA_MPEG2_H_
#define _I965_MEDIA_MPEG2_H_
#include <xf86drm.h>
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
struct decode_state;
struct i965_media_context;
#define NUM_MPEG2_VLD_KERNELS 15
struct i965_mpeg2_context
{
struct i965_kernel vld_kernels[NUM_MPEG2_VLD_KERNELS];
VAIQMatrixBufferMPEG2 iq_matrix;
int wa_slice_vertical_position;
};
void i965_media_mpeg2_decode_init(VADriverContextP ctx, struct decode_state * decode_state, struct i965_media_context *media_context);
void i965_media_mpeg2_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context);
#endif /* _I965_MEDIA_MPEG2_H_ */

View File

@@ -0,0 +1,52 @@
#ifndef _I965_MUTEX_H_
#define _I965_MUTEX_H_
#include "intel_compiler.h"
#if defined PTHREADS
#include <pthread.h>
typedef pthread_mutex_t _I965Mutex;
static INLINE void _i965InitMutex(_I965Mutex *m)
{
pthread_mutex_init(m, NULL);
}
static INLINE void
_i965DestroyMutex(_I965Mutex *m)
{
pthread_mutex_destroy(m);
}
static INLINE void
_i965LockMutex(_I965Mutex *m)
{
pthread_mutex_lock(m);
}
static INLINE void
_i965UnlockMutex(_I965Mutex *m)
{
pthread_mutex_unlock(m);
}
#define _I965_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
#define _I965_DECLARE_MUTEX(m) \
_I965Mutex m = _I965_MUTEX_INITIALIZER
#else
typedef int _I965Mutex;
static INLINE void _i965InitMutex(_I965Mutex *m) { (void) m; }
static INLINE void _i965DestroyMutex(_I965Mutex *m) { (void) m; }
static INLINE void _i965LockMutex(_I965Mutex *m) { (void) m; }
static INLINE void _i965UnlockMutex(_I965Mutex *m) { (void) m; }
#define _I965_MUTEX_INITIALIZER 0
#define _I965_DECLARE_MUTEX(m) \
_I965Mutex m = _I965_MUTEX_INITIALIZER
#endif
#endif /* _I965_MUTEX_H_ */

View File

@@ -0,0 +1,212 @@
/*
* Copyright (C) 2012 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sysdeps.h"
#include <va/va_dricommon.h>
#include "i965_drv_video.h"
#include "i965_output_dri.h"
#include "dso_utils.h"
#define LIBVA_X11_NAME "libva-x11.so.1"
typedef struct dri_drawable *(*dri_get_drawable_func)(
VADriverContextP ctx, XID drawable);
typedef union dri_buffer *(*dri_get_rendering_buffer_func)(
VADriverContextP ctx, struct dri_drawable *d);
typedef void (*dri_swap_buffer_func)(
VADriverContextP ctx, struct dri_drawable *d);
struct dri_vtable {
dri_get_drawable_func get_drawable;
dri_get_rendering_buffer_func get_rendering_buffer;
dri_swap_buffer_func swap_buffer;
};
struct va_dri_output {
struct dso_handle *handle;
struct dri_vtable vtable;
};
bool
i965_output_dri_init(VADriverContextP ctx)
{
struct i965_driver_data * const i965 = i965_driver_data(ctx);
struct dso_handle *dso_handle;
struct dri_vtable *dri_vtable;
static const struct dso_symbol symbols[] = {
{ "dri_get_drawable",
offsetof(struct dri_vtable, get_drawable) },
{ "dri_get_rendering_buffer",
offsetof(struct dri_vtable, get_rendering_buffer) },
{ "dri_swap_buffer",
offsetof(struct dri_vtable, swap_buffer) },
{ NULL, }
};
i965->dri_output = calloc(1, sizeof(struct va_dri_output));
if (!i965->dri_output)
goto error;
i965->dri_output->handle = dso_open(LIBVA_X11_NAME);
if (!i965->dri_output->handle)
goto error;
dso_handle = i965->dri_output->handle;
dri_vtable = &i965->dri_output->vtable;
if (!dso_get_symbols(dso_handle, dri_vtable, sizeof(*dri_vtable), symbols))
goto error;
return true;
error:
i965_output_dri_terminate(ctx);
return false;
}
void
i965_output_dri_terminate(VADriverContextP ctx)
{
struct i965_driver_data * const i965 = i965_driver_data(ctx);
struct va_dri_output * const dri_output = i965->dri_output;
if (!dri_output)
return;
if (dri_output->handle) {
dso_close(dri_output->handle);
dri_output->handle = NULL;
}
free(dri_output);
i965->dri_output = NULL;
}
VAStatus
i965_put_surface_dri(
VADriverContextP ctx,
VASurfaceID surface,
void *draw,
const VARectangle *src_rect,
const VARectangle *dst_rect,
const VARectangle *cliprects,
unsigned int num_cliprects,
unsigned int flags
)
{
struct i965_driver_data * const i965 = i965_driver_data(ctx);
struct dri_vtable * const dri_vtable = &i965->dri_output->vtable;
struct i965_render_state * const render_state = &i965->render_state;
struct dri_drawable *dri_drawable;
union dri_buffer *buffer;
struct intel_region *dest_region;
struct object_surface *obj_surface;
unsigned int pp_flag = 0;
bool new_region = false;
uint32_t name;
int i, ret;
unsigned int color_flag = 0;
/* Currently don't support DRI1 */
if (!VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2))
return VA_STATUS_ERROR_UNKNOWN;
/* Some broken sources such as H.264 conformance case FM2_SVA_C
* will get here
*/
obj_surface = SURFACE(surface);
ASSERT_RET(obj_surface && obj_surface->bo, VA_STATUS_SUCCESS);
_i965LockMutex(&i965->render_mutex);
dri_drawable = dri_vtable->get_drawable(ctx, (Drawable)draw);
assert(dri_drawable);
buffer = dri_vtable->get_rendering_buffer(ctx, dri_drawable);
assert(buffer);
dest_region = render_state->draw_region;
if (dest_region) {
assert(dest_region->bo);
dri_bo_flink(dest_region->bo, &name);
if (buffer->dri2.name != name) {
new_region = True;
dri_bo_unreference(dest_region->bo);
}
} else {
dest_region = (struct intel_region *)calloc(1, sizeof(*dest_region));
assert(dest_region);
render_state->draw_region = dest_region;
new_region = True;
}
if (new_region) {
dest_region->x = dri_drawable->x;
dest_region->y = dri_drawable->y;
dest_region->width = dri_drawable->width;
dest_region->height = dri_drawable->height;
dest_region->cpp = buffer->dri2.cpp;
dest_region->pitch = buffer->dri2.pitch;
dest_region->bo = intel_bo_gem_create_from_name(i965->intel.bufmgr, "rendering buffer", buffer->dri2.name);
assert(dest_region->bo);
ret = dri_bo_get_tiling(dest_region->bo, &(dest_region->tiling), &(dest_region->swizzle));
assert(ret == 0);
}
color_flag = flags & VA_SRC_COLOR_MASK;
if (color_flag == 0)
color_flag = VA_SRC_BT601;
pp_flag = color_flag;
if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC)
pp_flag |= I965_PP_FLAG_AVS;
if (flags & VA_TOP_FIELD)
pp_flag |= I965_PP_FLAG_TOP_FIELD;
else if (flags & VA_BOTTOM_FIELD)
pp_flag |= I965_PP_FLAG_BOTTOM_FIELD;
intel_render_put_surface(ctx, obj_surface, src_rect, dst_rect, pp_flag);
for (i = 0; i < I965_MAX_SUBPIC_SUM; i++) {
if (obj_surface->obj_subpic[i] != NULL) {
assert(obj_surface->subpic[i] != VA_INVALID_ID);
obj_surface->subpic_render_idx = i;
intel_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
}
}
if (!(g_intel_debug_option_flags & VA_INTEL_DEBUG_OPTION_BENCH))
dri_vtable->swap_buffer(ctx, dri_drawable);
_i965UnlockMutex(&i965->render_mutex);
return VA_STATUS_SUCCESS;
}

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2012 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef I965_OUTPUT_DRI_H
#define I965_OUTPUT_DRI_H
#include <stdbool.h>
#include <va/va_backend.h>
bool
i965_output_dri_init(VADriverContextP ctx);
void
i965_output_dri_terminate(VADriverContextP ctx);
VAStatus
i965_put_surface_dri(
VADriverContextP ctx,
VASurfaceID surface,
void *draw,
const VARectangle *src_rect,
const VARectangle *dst_rect,
const VARectangle *cliprects,
unsigned int num_cliprects,
unsigned int flags
);
#endif /* I965_OUTPUT_DRI_H */

View File

@@ -0,0 +1,424 @@
/*
* Copyright (C) 2012 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <va/va_backend.h>
#include <va/va_backend_wayland.h>
#include <wayland-client.h>
#include <wayland-drm-client-protocol.h>
#include "intel_driver.h"
#include "i965_output_wayland.h"
#include "i965_drv_video.h"
#include "i965_defines.h"
#include "dso_utils.h"
#define LIBEGL_NAME "libEGL.so.1"
#define LIBWAYLAND_CLIENT_NAME "libwayland-client.so.0"
typedef uint32_t (*wl_display_get_global_func)(struct wl_display *display,
const char *interface, uint32_t version);
typedef void (*wl_display_roundtrip_func)(struct wl_display *display);
typedef struct wl_proxy *(*wl_proxy_create_func)(struct wl_proxy *factory,
const struct wl_interface *interface);
typedef void (*wl_proxy_destroy_func)(struct wl_proxy *proxy);
typedef void (*wl_proxy_marshal_func)(struct wl_proxy *p, uint32_t opcode, ...);
typedef int (*wl_proxy_add_listener_func) (struct wl_proxy *proxy,
void (**implementation)(void), void *data);
struct wl_vtable {
const struct wl_interface *buffer_interface;
const struct wl_interface *drm_interface;
const struct wl_interface *registry_interface;
wl_display_roundtrip_func display_roundtrip;
wl_proxy_create_func proxy_create;
wl_proxy_destroy_func proxy_destroy;
wl_proxy_marshal_func proxy_marshal;
wl_proxy_add_listener_func proxy_add_listener;
};
struct va_wl_output {
struct dso_handle *libegl_handle;
struct dso_handle *libwl_client_handle;
struct wl_vtable vtable;
struct wl_drm *wl_drm;
struct wl_registry *wl_registry;
};
/* These function are copied and adapted from the version inside
* wayland-client-protocol.h
*/
static void *
registry_bind(
struct wl_vtable *wl_vtable,
struct wl_registry *wl_registry,
uint32_t name,
const struct wl_interface *interface,
uint32_t version
)
{
struct wl_proxy *id;
id = wl_vtable->proxy_create((struct wl_proxy *) wl_registry,
interface);
if (!id)
return NULL;
wl_vtable->proxy_marshal((struct wl_proxy *) wl_registry,
WL_REGISTRY_BIND, name, interface->name,
version, id);
return (void *) id;
}
static struct wl_registry *
display_get_registry(
struct wl_vtable *wl_vtable,
struct wl_display *wl_display
)
{
struct wl_proxy *callback;
callback = wl_vtable->proxy_create((struct wl_proxy *) wl_display,
wl_vtable->registry_interface);
if (!callback)
return NULL;
wl_vtable->proxy_marshal((struct wl_proxy *) wl_display,
WL_DISPLAY_GET_REGISTRY, callback);
return (struct wl_registry *) callback;
}
static int
registry_add_listener(
struct wl_vtable *wl_vtable,
struct wl_registry *wl_registry,
const struct wl_registry_listener *listener,
void *data
)
{
return wl_vtable->proxy_add_listener((struct wl_proxy *) wl_registry,
(void (**)(void)) listener, data);
}
static void
registry_handle_global(
void *data,
struct wl_registry *registry,
uint32_t id,
const char *interface,
uint32_t version
)
{
VADriverContextP ctx = data;
struct i965_driver_data * const i965 = i965_driver_data(ctx);
struct va_wl_output * const wl_output = i965->wl_output;
struct wl_vtable * const wl_vtable = &wl_output->vtable;
if (strcmp(interface, "wl_drm") == 0) {
wl_output->wl_drm = registry_bind(wl_vtable, wl_output->wl_registry,
id, wl_vtable->drm_interface, 1);
}
}
static const struct wl_registry_listener registry_listener = {
registry_handle_global,
NULL
};
/* Ensure wl_drm instance is created */
static bool
ensure_wl_output(VADriverContextP ctx)
{
struct i965_driver_data * const i965 = i965_driver_data(ctx);
struct va_wl_output * const wl_output = i965->wl_output;
struct wl_vtable * const wl_vtable = &wl_output->vtable;
if (wl_output->wl_drm)
return true;
wl_output->wl_registry = display_get_registry(wl_vtable, ctx->native_dpy);
registry_add_listener(wl_vtable, wl_output->wl_registry,
&registry_listener, ctx);
wl_vtable->display_roundtrip(ctx->native_dpy);
if (!wl_output->wl_drm)
return false;
return true;
}
/* Create planar YUV buffer */
static struct wl_buffer *
create_planar_buffer(
struct va_wl_output *wl_output,
uint32_t name,
int32_t width,
int32_t height,
uint32_t format,
int32_t offsets[3],
int32_t pitches[3]
)
{
struct wl_vtable * const wl_vtable = &wl_output->vtable;
struct wl_proxy *id;
id = wl_vtable->proxy_create(
(struct wl_proxy *)wl_output->wl_drm,
wl_vtable->buffer_interface
);
if (!id)
return NULL;
wl_vtable->proxy_marshal(
(struct wl_proxy *)wl_output->wl_drm,
WL_DRM_CREATE_PLANAR_BUFFER,
id,
name,
width, height, format,
offsets[0], pitches[0],
offsets[1], pitches[1],
offsets[2], pitches[2]
);
return (struct wl_buffer *)id;
}
/* Hook to return Wayland buffer associated with the VA surface */
static VAStatus
va_GetSurfaceBufferWl(
struct VADriverContext *ctx,
VASurfaceID surface,
unsigned int flags,
struct wl_buffer **out_buffer
)
{
struct i965_driver_data * const i965 = i965_driver_data(ctx);
struct object_surface *obj_surface;
struct wl_buffer *buffer;
uint32_t name, drm_format;
int offsets[3], pitches[3];
obj_surface = SURFACE(surface);
if (!obj_surface)
return VA_STATUS_ERROR_INVALID_SURFACE;
if (flags != VA_FRAME_PICTURE)
return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
if (!out_buffer)
return VA_STATUS_ERROR_INVALID_PARAMETER;
if (!ensure_wl_output(ctx))
return VA_STATUS_ERROR_INVALID_DISPLAY;
if (drm_intel_bo_flink(obj_surface->bo, &name) != 0)
return VA_STATUS_ERROR_INVALID_SURFACE;
switch (obj_surface->fourcc) {
case VA_FOURCC_NV12:
drm_format = WL_DRM_FORMAT_NV12;
offsets[0] = 0;
pitches[0] = obj_surface->width;
offsets[1] = obj_surface->width * obj_surface->y_cb_offset;
pitches[1] = obj_surface->cb_cr_pitch;
offsets[2] = 0;
pitches[2] = 0;
break;
case VA_FOURCC_YV12:
case VA_FOURCC_I420:
case VA_FOURCC_IMC1:
case VA_FOURCC_IMC3:
case VA_FOURCC_422H:
case VA_FOURCC_422V:
case VA_FOURCC_411P:
case VA_FOURCC_444P:
switch (obj_surface->subsampling) {
case SUBSAMPLE_YUV411:
drm_format = WL_DRM_FORMAT_YUV411;
break;
case SUBSAMPLE_YUV420:
drm_format = WL_DRM_FORMAT_YUV420;
break;
case SUBSAMPLE_YUV422H:
case SUBSAMPLE_YUV422V:
drm_format = WL_DRM_FORMAT_YUV422;
break;
case SUBSAMPLE_YUV444:
drm_format = WL_DRM_FORMAT_YUV444;
break;
default:
assert(0 && "unsupported subsampling");
return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
}
offsets[0] = 0;
pitches[0] = obj_surface->width;
offsets[1] = obj_surface->width * obj_surface->y_cb_offset;
pitches[1] = obj_surface->cb_cr_pitch;
offsets[2] = obj_surface->width * obj_surface->y_cr_offset;
pitches[2] = obj_surface->cb_cr_pitch;
break;
default:
assert(0 && "unsupported format");
return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
}
buffer = create_planar_buffer(
i965->wl_output,
name,
obj_surface->orig_width,
obj_surface->orig_height,
drm_format,
offsets,
pitches
);
if (!buffer)
return VA_STATUS_ERROR_ALLOCATION_FAILED;
*out_buffer = buffer;
return VA_STATUS_SUCCESS;
}
/* Hook to return Wayland buffer associated with the VA image */
static VAStatus
va_GetImageBufferWl(
struct VADriverContext *ctx,
VAImageID image,
unsigned int flags,
struct wl_buffer **out_buffer
)
{
return VA_STATUS_ERROR_UNIMPLEMENTED;
}
bool
ensure_driver_vtable(VADriverContextP ctx)
{
struct VADriverVTableWayland * const vtable = ctx->vtable_wayland;
if (!vtable)
return false;
vtable->vaGetSurfaceBufferWl = va_GetSurfaceBufferWl;
vtable->vaGetImageBufferWl = va_GetImageBufferWl;
return true;
}
bool
i965_output_wayland_init(VADriverContextP ctx)
{
struct i965_driver_data * const i965 = i965_driver_data(ctx);
struct dso_handle *dso_handle;
struct wl_vtable *wl_vtable;
static const struct dso_symbol libegl_symbols[] = {
{ "wl_drm_interface",
offsetof(struct wl_vtable, drm_interface) },
{ NULL, }
};
static const struct dso_symbol libwl_client_symbols[] = {
{ "wl_buffer_interface",
offsetof(struct wl_vtable, buffer_interface) },
{ "wl_registry_interface",
offsetof(struct wl_vtable, registry_interface) },
{ "wl_display_roundtrip",
offsetof(struct wl_vtable, display_roundtrip) },
{ "wl_proxy_create",
offsetof(struct wl_vtable, proxy_create) },
{ "wl_proxy_destroy",
offsetof(struct wl_vtable, proxy_destroy) },
{ "wl_proxy_marshal",
offsetof(struct wl_vtable, proxy_marshal) },
{ "wl_proxy_add_listener",
offsetof(struct wl_vtable, proxy_add_listener) },
{ NULL, }
};
if (ctx->display_type != VA_DISPLAY_WAYLAND)
return false;
i965->wl_output = calloc(1, sizeof(struct va_wl_output));
if (!i965->wl_output)
goto error;
i965->wl_output->libegl_handle = dso_open(LIBEGL_NAME);
if (!i965->wl_output->libegl_handle)
goto error;
dso_handle = i965->wl_output->libegl_handle;
wl_vtable = &i965->wl_output->vtable;
if (!dso_get_symbols(dso_handle, wl_vtable, sizeof(*wl_vtable),
libegl_symbols))
goto error;
i965->wl_output->libwl_client_handle = dso_open(LIBWAYLAND_CLIENT_NAME);
if (!i965->wl_output->libwl_client_handle)
goto error;
dso_handle = i965->wl_output->libwl_client_handle;
wl_vtable = &i965->wl_output->vtable;
if (!dso_get_symbols(dso_handle, wl_vtable, sizeof(*wl_vtable),
libwl_client_symbols))
goto error;
if (!ensure_driver_vtable(ctx))
goto error;
return true;
error:
i965_output_wayland_terminate(ctx);
return false;
}
void
i965_output_wayland_terminate(VADriverContextP ctx)
{
struct i965_driver_data * const i965 = i965_driver_data(ctx);
struct va_wl_output *wl_output;
if (ctx->display_type != VA_DISPLAY_WAYLAND)
return;
wl_output = i965->wl_output;
if (!wl_output)
return;
if (wl_output->wl_drm) {
wl_output->vtable.proxy_destroy((struct wl_proxy *)wl_output->wl_drm);
wl_output->wl_drm = NULL;
}
if (wl_output->libegl_handle) {
dso_close(wl_output->libegl_handle);
wl_output->libegl_handle = NULL;
}
if (wl_output->libwl_client_handle) {
dso_close(wl_output->libwl_client_handle);
wl_output->libwl_client_handle = NULL;
}
free(wl_output);
i965->wl_output = NULL;
}

View File

@@ -0,0 +1,36 @@
/*
* Copyright (C) 2012 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef I965_OUTPUT_WAYLAND_H
#define I965_OUTPUT_WAYLAND_H
#include <stdbool.h>
bool
i965_output_wayland_init(VADriverContextP ctx);
void
i965_output_wayland_terminate(VADriverContextP ctx);
#endif /* I965_OUTPUT_WAYLAND_H */

View File

@@ -0,0 +1,135 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Copied and modified from (mesa) include/pci_ids/i965_pci_ids.h
*/
CHIPSET(0x2A42, g4x, g4x, "Intel(R) GM45 Express Chipset")
CHIPSET(0x2E02, g4x, g4x, "Intel(R) Integrated Graphics Device")
CHIPSET(0x2E12, g4x, g4x, "Intel(R) Q45/Q43")
CHIPSET(0x2E22, g4x, g4x, "Intel(R) G45/G43")
CHIPSET(0x2E32, g4x, g4x, "Intel(R) G41")
CHIPSET(0x2E42, g4x, g4x, "Intel(R) B43")
CHIPSET(0x2E92, g4x, g4x, "Intel(R) B43")
CHIPSET(0x0042, ilk, ilk, "Intel(R) Ironlake Desktop")
CHIPSET(0x0046, ilk, ilk, "Intel(R) Ironlake Mobile")
CHIPSET(0x0102, snb, snb_gt1, "Intel(R) Sandybridge Desktop")
CHIPSET(0x0112, snb, snb_gt2, "Intel(R) Sandybridge Desktop")
CHIPSET(0x0122, snb, snb_gt2, "Intel(R) Sandybridge Desktop")
CHIPSET(0x0106, snb, snb_gt1, "Intel(R) Sandybridge Mobile")
CHIPSET(0x0116, snb, snb_gt2, "Intel(R) Sandybridge Mobile")
CHIPSET(0x0126, snb, snb_gt2, "Intel(R) Sandybridge Mobile")
CHIPSET(0x010A, snb, snb_gt1, "Intel(R) Sandybridge Server")
CHIPSET(0x0152, ivb, ivb_gt1, "Intel(R) Ivybridge Desktop")
CHIPSET(0x0162, ivb, ivb_gt2, "Intel(R) Ivybridge Desktop")
CHIPSET(0x0156, ivb, ivb_gt1, "Intel(R) Ivybridge Mobile")
CHIPSET(0x0166, ivb, ivb_gt2, "Intel(R) Ivybridge Mobile")
CHIPSET(0x015A, ivb, ivb_gt1, "Intel(R) Ivybridge Server")
CHIPSET(0x016A, ivb, ivb_gt2, "Intel(R) Ivybridge Server")
CHIPSET(0x0F31, ivb, byt, "Intel(R) Bay Trail")
CHIPSET(0x0F32, ivb, byt, "Intel(R) Bay Trail")
CHIPSET(0x0F33, ivb, byt, "Intel(R) Bay Trail")
CHIPSET(0x0157, ivb, byt, "Intel(R) Bay Trail")
CHIPSET(0x0155, ivb, byt, "Intel(R) Bay Trail")
CHIPSET(0x0402, hsw, hsw_gt1, "Intel(R) Haswell Desktop")
CHIPSET(0x0412, hsw, hsw_gt2, "Intel(R) Haswell Desktop")
CHIPSET(0x0422, hsw, hsw_gt3, "Intel(R) Haswell Desktop")
CHIPSET(0x0406, hsw, hsw_gt1, "Intel(R) Haswell Mobile")
CHIPSET(0x0416, hsw, hsw_gt2, "Intel(R) Haswell Mobile")
CHIPSET(0x0426, hsw, hsw_gt3, "Intel(R) Haswell Mobile")
CHIPSET(0x040A, hsw, hsw_gt1, "Intel(R) Haswell Server")
CHIPSET(0x041A, hsw, hsw_gt2, "Intel(R) Haswell Server")
CHIPSET(0x042A, hsw, hsw_gt3, "Intel(R) Haswell Server")
CHIPSET(0x040B, hsw, hsw_gt1, "Intel(R) Haswell")
CHIPSET(0x041B, hsw, hsw_gt2, "Intel(R) Haswell")
CHIPSET(0x042B, hsw, hsw_gt3, "Intel(R) Haswell")
CHIPSET(0x040E, hsw, hsw_gt1, "Intel(R) Haswell")
CHIPSET(0x041E, hsw, hsw_gt2, "Intel(R) Haswell")
CHIPSET(0x042E, hsw, hsw_gt3, "Intel(R) Haswell")
CHIPSET(0x0C02, hsw, hsw_gt1, "Intel(R) Haswell Desktop")
CHIPSET(0x0C12, hsw, hsw_gt2, "Intel(R) Haswell Desktop")
CHIPSET(0x0C22, hsw, hsw_gt3, "Intel(R) Haswell Desktop")
CHIPSET(0x0C06, hsw, hsw_gt1, "Intel(R) Haswell Mobile")
CHIPSET(0x0C16, hsw, hsw_gt2, "Intel(R) Haswell Mobile")
CHIPSET(0x0C26, hsw, hsw_gt3, "Intel(R) Haswell Mobile")
CHIPSET(0x0C0A, hsw, hsw_gt1, "Intel(R) Haswell Server")
CHIPSET(0x0C1A, hsw, hsw_gt2, "Intel(R) Haswell Server")
CHIPSET(0x0C2A, hsw, hsw_gt3, "Intel(R) Haswell Server")
CHIPSET(0x0C0B, hsw, hsw_gt1, "Intel(R) Haswell")
CHIPSET(0x0C1B, hsw, hsw_gt2, "Intel(R) Haswell")
CHIPSET(0x0C2B, hsw, hsw_gt3, "Intel(R) Haswell")
CHIPSET(0x0C0E, hsw, hsw_gt1, "Intel(R) Haswell")
CHIPSET(0x0C1E, hsw, hsw_gt2, "Intel(R) Haswell")
CHIPSET(0x0C2E, hsw, hsw_gt3, "Intel(R) Haswell")
CHIPSET(0x0A02, hsw, hsw_gt1, "Intel(R) Haswell Desktop")
CHIPSET(0x0A12, hsw, hsw_gt2, "Intel(R) Haswell Desktop")
CHIPSET(0x0A22, hsw, hsw_gt3, "Intel(R) Haswell Desktop")
CHIPSET(0x0A06, hsw, hsw_gt1, "Intel(R) Haswell Mobile")
CHIPSET(0x0A16, hsw, hsw_gt2, "Intel(R) Haswell Mobile")
CHIPSET(0x0A26, hsw, hsw_gt3, "Intel(R) Haswell Mobile")
CHIPSET(0x0A0A, hsw, hsw_gt1, "Intel(R) Haswell Server")
CHIPSET(0x0A1A, hsw, hsw_gt2, "Intel(R) Haswell Server")
CHIPSET(0x0A2A, hsw, hsw_gt3, "Intel(R) Haswell Server")
CHIPSET(0x0A0B, hsw, hsw_gt1, "Intel(R) Haswell")
CHIPSET(0x0A1B, hsw, hsw_gt2, "Intel(R) Haswell")
CHIPSET(0x0A2B, hsw, hsw_gt3, "Intel(R) Haswell")
CHIPSET(0x0A0E, hsw, hsw_gt1, "Intel(R) Haswell")
CHIPSET(0x0A1E, hsw, hsw_gt2, "Intel(R) Haswell")
CHIPSET(0x0A2E, hsw, hsw_gt3, "Intel(R) Haswell")
CHIPSET(0x0D02, hsw, hsw_gt1, "Intel(R) Haswell Desktop")
CHIPSET(0x0D12, hsw, hsw_gt2, "Intel(R) Haswell Desktop")
CHIPSET(0x0D22, hsw, hsw_gt3, "Intel(R) Haswell Desktop")
CHIPSET(0x0D06, hsw, hsw_gt1, "Intel(R) Haswell Mobile")
CHIPSET(0x0D16, hsw, hsw_gt2, "Intel(R) Haswell Mobile")
CHIPSET(0x0D26, hsw, hsw_gt3, "Intel(R) Haswell Mobile")
CHIPSET(0x0D0A, hsw, hsw_gt1, "Intel(R) Haswell Server")
CHIPSET(0x0D1A, hsw, hsw_gt2, "Intel(R) Haswell Server")
CHIPSET(0x0D2A, hsw, hsw_gt3, "Intel(R) Haswell")
CHIPSET(0x0D0B, hsw, hsw_gt1, "Intel(R) Haswell")
CHIPSET(0x0D1B, hsw, hsw_gt2, "Intel(R) Haswell")
CHIPSET(0x0D2B, hsw, hsw_gt3, "Intel(R) Haswell")
CHIPSET(0x0D0E, hsw, hsw_gt1, "Intel(R) Haswell")
CHIPSET(0x0D1E, hsw, hsw_gt2, "Intel(R) Haswell")
CHIPSET(0x0D2E, hsw, hsw_gt3, "Intel(R) Haswell")
CHIPSET(0x1602, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x1606, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x160A, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x160B, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x160D, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x160E, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x1612, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x1616, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x161A, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x161B, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x161D, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x161E, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x1622, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x1626, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x162A, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x162B, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x162D, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x162E, bdw, bdw, "Intel(R) Broadwell")
CHIPSET(0x22B0, chv, chv, "Intel(R) CherryView")
CHIPSET(0x22B1, chv, chv, "Intel(R) CherryView")
CHIPSET(0x22B2, chv, chv, "Intel(R) CherryView")
CHIPSET(0x22B3, chv, chv, "Intel(R) CherryView")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,586 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef __I965_POST_PROCESSING_H__
#define __I965_POST_PROCESSING_H__
#define MAX_PP_SURFACES 48
#define I965_PP_FLAG_TOP_FIELD 1
#define I965_PP_FLAG_BOTTOM_FIELD 2
#define I965_PP_FLAG_MCDI 4
#define I965_PP_FLAG_AVS 8
enum
{
PP_NULL = 0,
PP_NV12_LOAD_SAVE_N12,
PP_NV12_LOAD_SAVE_PL3,
PP_PL3_LOAD_SAVE_N12,
PP_PL3_LOAD_SAVE_PL3,
PP_NV12_SCALING,
PP_NV12_AVS,
PP_NV12_DNDI,
PP_NV12_DN,
PP_NV12_LOAD_SAVE_PA,
PP_PL3_LOAD_SAVE_PA,
PP_PA_LOAD_SAVE_NV12,
PP_PA_LOAD_SAVE_PL3,
PP_PA_LOAD_SAVE_PA,
PP_RGBX_LOAD_SAVE_NV12,
PP_NV12_LOAD_SAVE_RGBX,
NUM_PP_MODULES,
};
struct i965_post_processing_context;
struct pp_load_save_context
{
int dest_x;
int dest_y;
int dest_w;
int dest_h;
};
struct pp_scaling_context
{
int dest_x; /* in pixel */
int dest_y; /* in pixel */
int dest_w;
int dest_h;
float src_normalized_x;
float src_normalized_y;
};
struct pp_avs_context
{
int dest_x; /* in pixel */
int dest_y; /* in pixel */
int dest_w;
int dest_h;
float src_normalized_x;
float src_normalized_y;
int src_w;
int src_h;
float horiz_range;
};
struct pp_dndi_context
{
int dest_w;
int dest_h;
dri_bo *stmm_bo;
int frame_order; /* -1 for the first frame */
VASurfaceID current_out_surface;
struct object_surface *current_out_obj_surface;
};
struct pp_dn_context
{
int dest_w;
int dest_h;
dri_bo *stmm_bo;
};
struct i965_post_processing_context;
struct pp_module
{
struct i965_kernel kernel;
/* others */
VAStatus (*initialize)(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
const struct i965_surface *src_surface,
const VARectangle *src_rect,
struct i965_surface *dst_surface,
const VARectangle *dst_rect,
void *filter_param);
};
struct pp_static_parameter
{
struct {
/* Procamp r1.0 */
float procamp_constant_c0;
/* Load and Same r1.1 */
unsigned int source_packed_y_offset:8;
unsigned int source_packed_u_offset:8;
unsigned int source_packed_v_offset:8;
unsigned int source_rgb_layout:8; // 1 for |R|G|B|X| layout, 0 for |B|G|R|X| layout
union {
/* Load and Save r1.2 */
struct {
unsigned int destination_packed_y_offset:8;
unsigned int destination_packed_u_offset:8;
unsigned int destination_packed_v_offset:8;
unsigned int pad0:8;
} load_and_save;
/* CSC r1.2 */
struct {
unsigned int pad0:24;
unsigned int destination_rgb_layout:8; // 1 for |R|G|B|X| layout, 0 for |B|G|R|X| layout
} csc;
} r1_2;
/* Procamp r1.3 */
float procamp_constant_c1;
/* Procamp r1.4 */
float procamp_constant_c2;
/* DI r1.5 */
unsigned int statistics_surface_picth:16; /* Devided by 2 */
unsigned int pad1:16;
union {
/* DI r1.6 */
struct {
unsigned int pad0:24;
unsigned int top_field_first:8;
} di;
/* AVS/Scaling r1.6 */
float normalized_video_y_scaling_step;
} r1_6;
/* Procamp r1.7 */
float procamp_constant_c5;
} grf1;
struct {
/* Procamp r2.0 */
float procamp_constant_c3;
/* MBZ r2.1*/
unsigned int pad0;
/* WG+CSC r2.2 */
float wg_csc_constant_c4;
/* WG+CSC r2.3 */
float wg_csc_constant_c8;
/* Procamp r2.4 */
float procamp_constant_c4;
/* MBZ r2.5 */
unsigned int pad1;
/* MBZ r2.6 */
unsigned int pad2;
/* WG+CSC r2.7 */
float wg_csc_constant_c9;
} grf2;
struct {
/* WG+CSC r3.0 */
float wg_csc_constant_c0;
/* Blending r3.1 */
float scaling_step_ratio;
/* Blending r3.2 */
float normalized_alpha_y_scaling;
/* WG+CSC r3.3 */
float wg_csc_constant_c4;
/* WG+CSC r3.4 */
float wg_csc_constant_c1;
/* ALL r3.5 */
int horizontal_origin_offset:16;
int vertical_origin_offset:16;
/* Shared r3.6*/
union {
/* Color filll */
unsigned int color_pixel;
/* WG+CSC */
float wg_csc_constant_c2;
} r3_6;
/* WG+CSC r3.7 */
float wg_csc_constant_c3;
} grf3;
struct {
/* WG+CSC r4.0 */
float wg_csc_constant_c6;
/* ALL r4.1 MBZ ???*/
unsigned int pad0;
/* Shared r4.2 */
union {
/* AVS */
struct {
unsigned int pad1:15;
unsigned int nlas:1;
unsigned int pad2:16;
} avs;
/* DI */
struct {
unsigned int motion_history_coefficient_m2:8;
unsigned int motion_history_coefficient_m1:8;
unsigned int pad0:16;
} di;
} r4_2;
/* WG+CSC r4.3 */
float wg_csc_constant_c7;
/* WG+CSC r4.4 */
float wg_csc_constant_c10;
/* AVS r4.5 */
float source_video_frame_normalized_horizontal_origin;
/* MBZ r4.6 */
unsigned int pad1;
/* WG+CSC r4.7 */
float wg_csc_constant_c11;
} grf4;
};
struct pp_inline_parameter
{
struct {
/* ALL r5.0 */
int destination_block_horizontal_origin:16;
int destination_block_vertical_origin:16;
/* Shared r5.1 */
union {
/* AVS/Scaling */
float source_surface_block_normalized_horizontal_origin;
/* FMD */
struct {
unsigned int variance_surface_vertical_origin:16;
unsigned int pad0:16;
} fmd;
} r5_1;
/* AVS/Scaling r5.2 */
float source_surface_block_normalized_vertical_origin;
/* Alpha r5.3 */
float alpha_surface_block_normalized_horizontal_origin;
/* Alpha r5.4 */
float alpha_surface_block_normalized_vertical_origin;
/* Alpha r5.5 */
unsigned int alpha_mask_x:16;
unsigned int alpha_mask_y:8;
unsigned int block_count_x:8;
/* r5.6 */
/* we only support M*1 or 1*N block partitation now.
* -- it means asm code only need update this mask from grf6 for the last block
*/
unsigned int block_horizontal_mask:16;
unsigned int block_vertical_mask:8;
unsigned int number_blocks:8;
/* AVS/Scaling r5.7 */
float normalized_video_x_scaling_step;
} grf5;
struct {
/* AVS r6.0 */
float video_step_delta;
/* r6.1 */ // sizeof(int) == 4?
unsigned int block_horizontal_mask_right:16;
unsigned int block_vertical_mask_bottom:8;
unsigned int pad1:8;
/* r6.2 */
unsigned int block_horizontal_mask_middle:16;
unsigned int pad2:16;
/* r6.3-r6.7 */
unsigned int padx[5];
} grf6;
};
struct gen7_pp_static_parameter
{
struct {
/* r1.0-r1.5 */
unsigned int padx[6];
/* r1.6 */
unsigned int di_statistics_surface_pitch_div2:16;
unsigned int di_statistics_surface_height_div4:16;
/* r1.7 */
unsigned int di_top_field_first:8;
unsigned int pad0:16;
unsigned int pointer_to_inline_parameter:8; /* value: 7 */
} grf1;
struct {
/* r2.0 */
/* Indicates whether the rgb is swapped for the src surface
* 0: RGBX(MSB. X-B-G-R). 1: BGRX(MSB: X-R-G-B)
*/
unsigned int src_avs_rgb_swap:1;
unsigned int pad3:31;
/* r2.1 */
unsigned int pad2:16;
unsigned int save_avs_rgb_swap:1; /* 0: RGB, 1: BGR */
unsigned int avs_wa_enable:1; /* must enabled for GEN7 */
unsigned int ief_enable:1;
unsigned int avs_wa_width:13;
/* 2.2 */
float avs_wa_one_div_256_width;
/* 2.3 */
float avs_wa_five_div_256_width;
/* 2.4 - 2.6 */
unsigned int padx[3];
/* r2.7 */
unsigned int di_destination_packed_y_component_offset:8;
unsigned int di_destination_packed_u_component_offset:8;
unsigned int di_destination_packed_v_component_offset:8;
unsigned int alpha:8;
} grf2;
struct {
float sampler_load_horizontal_scaling_step_ratio;
unsigned int padx[7];
} grf3;
struct {
float sampler_load_vertical_scaling_step;
unsigned int pad0;
unsigned int di_hoffset_svf_from_dvf:16;
unsigned int di_voffset_svf_from_dvf:16;
unsigned int padx[5];
} grf4;
struct {
float sampler_load_vertical_frame_origin;
unsigned int padx[7];
} grf5;
struct {
float sampler_load_horizontal_frame_origin;
unsigned int padx[7];
} grf6;
};
struct gen7_pp_inline_parameter
{
struct {
/* r7.0 */
unsigned int destination_block_horizontal_origin:16;
unsigned int destination_block_vertical_origin:16;
/* r7.1: 0xffffffff */
unsigned int constant_0;
/* r7.2 */
unsigned int pad0;
/* r7.3 */
unsigned int pad1;
/* r7.4 */
float sampler_load_main_video_x_scaling_step;
/* r7.5 */
unsigned int pad2;
/* r7.6: must be zero */
unsigned int avs_vertical_block_number;
/* r7.7: 0 */
unsigned int group_id_number;
} grf7;
struct {
unsigned int padx[8];
} grf8;
};
struct i965_post_processing_context
{
int current_pp;
struct pp_module pp_modules[NUM_PP_MODULES];
void *pp_static_parameter;
void *pp_inline_parameter;
struct {
dri_bo *bo;
} surface_state_binding_table;
struct {
dri_bo *bo;
} curbe;
struct {
dri_bo *bo;
int num_interface_descriptors;
} idrt;
struct {
dri_bo *bo;
} vfe_state;
struct {
dri_bo *bo;
dri_bo *bo_8x8;
dri_bo *bo_8x8_uv;
} sampler_state_table;
struct {
unsigned int size;
unsigned int vfe_start;
unsigned int cs_start;
unsigned int num_vfe_entries;
unsigned int num_cs_entries;
unsigned int size_vfe_entry;
unsigned int size_cs_entry;
} urb;
struct {
unsigned int gpgpu_mode : 1;
unsigned int pad0 : 7;
unsigned int max_num_threads : 16;
unsigned int num_urb_entries : 8;
unsigned int urb_entry_size : 16;
unsigned int curbe_allocation_size : 16;
} vfe_gpu_state;
struct pp_load_save_context pp_load_save_context;
struct pp_scaling_context pp_scaling_context;
struct pp_avs_context pp_avs_context;
struct pp_dndi_context pp_dndi_context;
struct pp_dn_context pp_dn_context;
void *private_context; /* pointer to the current private context */
void *pipeline_param; /* pointer to the pipeline parameter */
int (*pp_x_steps)(void *private_context);
int (*pp_y_steps)(void *private_context);
int (*pp_set_block_parameter)(struct i965_post_processing_context *pp_context, int x, int y);
struct intel_batchbuffer *batch;
unsigned int block_horizontal_mask_left:16;
unsigned int block_horizontal_mask_right:16;
unsigned int block_vertical_mask_bottom:8;
struct {
dri_bo *bo;
int bo_size;
unsigned int end_offset;
} instruction_state;
struct {
dri_bo *bo;
} indirect_state;
struct {
dri_bo *bo;
int bo_size;
unsigned int end_offset;
} dynamic_state;
unsigned int sampler_offset;
int sampler_size;
unsigned int idrt_offset;
int idrt_size;
unsigned int curbe_offset;
int curbe_size;
VAStatus (*intel_post_processing)(VADriverContextP ctx,
struct i965_post_processing_context *pp_context,
const struct i965_surface *src_surface,
const VARectangle *src_rect,
struct i965_surface *dst_surface,
const VARectangle *dst_rect,
int pp_index,
void * filter_param);
void (*finalize)(struct i965_post_processing_context *pp_context);
};
struct i965_proc_context
{
struct hw_context base;
struct i965_post_processing_context pp_context;
};
VASurfaceID
i965_post_processing(
VADriverContextP ctx,
struct object_surface *obj_surface,
const VARectangle *src_rect,
const VARectangle *dst_rect,
unsigned int flags,
int *has_done_scaling
);
VAStatus
i965_scaling_processing(
VADriverContextP ctx,
struct object_surface *src_surface_obj,
const VARectangle *src_rect,
struct object_surface *dst_surface_obj,
const VARectangle *dst_rect,
unsigned int flags
);
VAStatus
i965_image_processing(VADriverContextP ctx,
const struct i965_surface *src_surface,
const VARectangle *src_rect,
struct i965_surface *dst_surface,
const VARectangle *dst_rect);
void
i965_post_processing_terminate(VADriverContextP ctx);
bool
i965_post_processing_init(VADriverContextP ctx);
extern VAStatus
i965_proc_picture(VADriverContextP ctx,
VAProfile profile,
union codec_state *codec_state,
struct hw_context *hw_context);
#endif /* __I965_POST_PROCESSING_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,160 @@
/*
* Copyright <20> 2006 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#ifndef _I965_RENDER_H_
#define _I965_RENDER_H_
#define MAX_SAMPLERS 16
#define MAX_RENDER_SURFACES (MAX_SAMPLERS + 1)
#define NUM_RENDER_KERNEL 3
#define VA_SRC_COLOR_MASK 0x000000f0
#include "i965_post_processing.h"
struct i965_kernel;
struct i965_render_state
{
struct {
dri_bo *vertex_buffer;
} vb;
struct {
dri_bo *state;
} vs;
struct {
dri_bo *state;
} sf;
struct {
int sampler_count;
dri_bo *sampler;
dri_bo *state;
dri_bo *surface_state_binding_table_bo;
} wm;
struct {
dri_bo *state;
dri_bo *viewport;
dri_bo *blend;
dri_bo *depth_stencil;
} cc;
struct {
dri_bo *bo;
} curbe;
unsigned short interleaved_uv;
unsigned short inited;
struct intel_region *draw_region;
int pp_flag; /* 0: disable, 1: enable */
struct i965_kernel render_kernels[3];
struct {
dri_bo *bo;
int bo_size;
unsigned int end_offset;
} instruction_state;
struct {
dri_bo *bo;
} indirect_state;
struct {
dri_bo *bo;
int bo_size;
unsigned int end_offset;
} dynamic_state;
unsigned int curbe_offset;
int curbe_size;
unsigned int sampler_offset;
int sampler_size;
unsigned int cc_viewport_offset;
int cc_viewport_size;
unsigned int cc_state_offset;
int cc_state_size;
unsigned int blend_state_offset;
int blend_state_size;
unsigned int sf_clip_offset;
int sf_clip_size;
unsigned int scissor_offset;
int scissor_size;
void (*render_put_surface)(VADriverContextP ctx, struct object_surface *,
const VARectangle *src_rec,
const VARectangle *dst_rect,
unsigned int flags);
void (*render_put_subpicture)(VADriverContextP ctx, struct object_surface *,
const VARectangle *src_rec,
const VARectangle *dst_rect);
void (*render_terminate)(VADriverContextP ctx);
};
bool i965_render_init(VADriverContextP ctx);
void i965_render_terminate(VADriverContextP ctx);
void
intel_render_put_surface(
VADriverContextP ctx,
struct object_surface *obj_surface,
const VARectangle *src_rect,
const VARectangle *dst_rect,
unsigned int flags
);
void
intel_render_put_subpicture(
VADriverContextP ctx,
struct object_surface *obj_surface,
const VARectangle *src_rect,
const VARectangle *dst_rect
);
struct gen7_surface_state;
void
gen7_render_set_surface_scs(struct gen7_surface_state *ss);
struct gen8_surface_state;
void
gen8_render_set_surface_scs(struct gen8_surface_state *ss);
extern bool gen8_render_init(VADriverContextP ctx);
#endif /* _I965_RENDER_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,388 @@
/**************************************************************************
*
* Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "intel_batchbuffer.h"
#define MAX_BATCH_SIZE 0x400000
static void
intel_batchbuffer_reset(struct intel_batchbuffer *batch, int buffer_size)
{
struct intel_driver_data *intel = batch->intel;
int batch_size = buffer_size;
assert(batch->flag == I915_EXEC_RENDER ||
batch->flag == I915_EXEC_BLT ||
batch->flag == I915_EXEC_BSD ||
batch->flag == I915_EXEC_VEBOX);
dri_bo_unreference(batch->buffer);
batch->buffer = dri_bo_alloc(intel->bufmgr,
"batch buffer",
batch_size,
0x1000);
assert(batch->buffer);
dri_bo_map(batch->buffer, 1);
assert(batch->buffer->virtual);
batch->map = batch->buffer->virtual;
batch->size = batch_size;
batch->ptr = batch->map;
batch->atomic = 0;
}
static unsigned int
intel_batchbuffer_space(struct intel_batchbuffer *batch)
{
return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
}
struct intel_batchbuffer *
intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size)
{
struct intel_batchbuffer *batch = calloc(1, sizeof(*batch));
assert(flag == I915_EXEC_RENDER ||
flag == I915_EXEC_BSD ||
flag == I915_EXEC_BLT ||
flag == I915_EXEC_VEBOX);
if (!buffer_size || buffer_size < BATCH_SIZE) {
buffer_size = BATCH_SIZE;
}
/* the buffer size can't exceed 4M */
if (buffer_size > MAX_BATCH_SIZE) {
buffer_size = MAX_BATCH_SIZE;
}
batch->intel = intel;
batch->flag = flag;
batch->run = drm_intel_bo_mrb_exec;
if (IS_GEN6(intel->device_info) &&
flag == I915_EXEC_RENDER)
batch->wa_render_bo = dri_bo_alloc(intel->bufmgr,
"wa scratch",
4096,
4096);
else
batch->wa_render_bo = NULL;
intel_batchbuffer_reset(batch, buffer_size);
return batch;
}
void intel_batchbuffer_free(struct intel_batchbuffer *batch)
{
if (batch->map) {
dri_bo_unmap(batch->buffer);
batch->map = NULL;
}
dri_bo_unreference(batch->buffer);
dri_bo_unreference(batch->wa_render_bo);
free(batch);
}
void
intel_batchbuffer_flush(struct intel_batchbuffer *batch)
{
unsigned int used = batch->ptr - batch->map;
if (used == 0) {
return;
}
if ((used & 4) == 0) {
*(unsigned int*)batch->ptr = 0;
batch->ptr += 4;
}
*(unsigned int*)batch->ptr = MI_BATCH_BUFFER_END;
batch->ptr += 4;
dri_bo_unmap(batch->buffer);
used = batch->ptr - batch->map;
batch->run(batch->buffer, used, 0, 0, 0, batch->flag);
intel_batchbuffer_reset(batch, batch->size);
}
void
intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, unsigned int x)
{
assert(intel_batchbuffer_space(batch) >= 4);
*(unsigned int *)batch->ptr = x;
batch->ptr += 4;
}
void
intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *bo,
uint32_t read_domains, uint32_t write_domains,
uint32_t delta)
{
assert(batch->ptr - batch->map < batch->size);
dri_bo_emit_reloc(batch->buffer, read_domains, write_domains,
delta, batch->ptr - batch->map, bo);
intel_batchbuffer_emit_dword(batch, bo->offset + delta);
}
void
intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
unsigned int size)
{
assert(size < batch->size - 8);
if (intel_batchbuffer_space(batch) < size) {
intel_batchbuffer_flush(batch);
}
}
void
intel_batchbuffer_data(struct intel_batchbuffer *batch,
void *data,
unsigned int size)
{
assert((size & 3) == 0);
intel_batchbuffer_require_space(batch, size);
assert(batch->ptr);
memcpy(batch->ptr, data, size);
batch->ptr += size;
}
void
intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
{
struct intel_driver_data *intel = batch->intel;
if (IS_GEN6(intel->device_info) ||
IS_GEN7(intel->device_info) ||
IS_GEN8(intel->device_info)) {
if (batch->flag == I915_EXEC_RENDER) {
if (IS_GEN8(intel->device_info)) {
BEGIN_BATCH(batch, 6);
OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
OUT_BATCH(batch,
CMD_PIPE_CONTROL_CS_STALL |
CMD_PIPE_CONTROL_WC_FLUSH |
CMD_PIPE_CONTROL_TC_FLUSH |
CMD_PIPE_CONTROL_DC_FLUSH |
CMD_PIPE_CONTROL_NOWRITE);
OUT_BATCH(batch, 0); /* write address */
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0); /* write data */
OUT_BATCH(batch, 0);
ADVANCE_BATCH(batch);
} else if (IS_GEN6(intel->device_info)) {
assert(batch->wa_render_bo);
BEGIN_BATCH(batch, 4 * 3);
OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
OUT_BATCH(batch,
CMD_PIPE_CONTROL_CS_STALL |
CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD);
OUT_BATCH(batch, 0); /* address */
OUT_BATCH(batch, 0); /* write data */
OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
OUT_BATCH(batch, CMD_PIPE_CONTROL_WRITE_QWORD);
OUT_RELOC(batch,
batch->wa_render_bo,
I915_GEM_DOMAIN_INSTRUCTION,
I915_GEM_DOMAIN_INSTRUCTION,
0);
OUT_BATCH(batch, 0); /* write data */
/* now finally the _real flush */
OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
OUT_BATCH(batch,
CMD_PIPE_CONTROL_WC_FLUSH |
CMD_PIPE_CONTROL_TC_FLUSH |
CMD_PIPE_CONTROL_NOWRITE);
OUT_BATCH(batch, 0); /* write address */
OUT_BATCH(batch, 0); /* write data */
ADVANCE_BATCH(batch);
} else {
BEGIN_BATCH(batch, 4);
OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
OUT_BATCH(batch,
CMD_PIPE_CONTROL_WC_FLUSH |
CMD_PIPE_CONTROL_TC_FLUSH |
CMD_PIPE_CONTROL_DC_FLUSH |
CMD_PIPE_CONTROL_NOWRITE);
OUT_BATCH(batch, 0); /* write address */
OUT_BATCH(batch, 0); /* write data */
ADVANCE_BATCH(batch);
}
} else {
if (batch->flag == I915_EXEC_BLT) {
BEGIN_BLT_BATCH(batch, 4);
OUT_BLT_BATCH(batch, MI_FLUSH_DW);
OUT_BLT_BATCH(batch, 0);
OUT_BLT_BATCH(batch, 0);
OUT_BLT_BATCH(batch, 0);
ADVANCE_BLT_BATCH(batch);
}else if (batch->flag == I915_EXEC_VEBOX) {
BEGIN_VEB_BATCH(batch, 4);
OUT_VEB_BATCH(batch, MI_FLUSH_DW);
OUT_VEB_BATCH(batch, 0);
OUT_VEB_BATCH(batch, 0);
OUT_VEB_BATCH(batch, 0);
ADVANCE_VEB_BATCH(batch);
} else {
assert(batch->flag == I915_EXEC_BSD);
BEGIN_BCS_BATCH(batch, 4);
OUT_BCS_BATCH(batch, MI_FLUSH_DW | MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
}
}
} else {
if (batch->flag == I915_EXEC_RENDER) {
BEGIN_BATCH(batch, 1);
OUT_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
ADVANCE_BATCH(batch);
} else {
assert(batch->flag == I915_EXEC_BSD);
BEGIN_BCS_BATCH(batch, 1);
OUT_BCS_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
ADVANCE_BCS_BATCH(batch);
}
}
}
void
intel_batchbuffer_begin_batch(struct intel_batchbuffer *batch, int total)
{
batch->emit_total = total * 4;
batch->emit_start = batch->ptr;
}
void
intel_batchbuffer_advance_batch(struct intel_batchbuffer *batch)
{
assert(batch->emit_total == (batch->ptr - batch->emit_start));
}
void
intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer *batch, int flag)
{
if (flag != I915_EXEC_RENDER &&
flag != I915_EXEC_BLT &&
flag != I915_EXEC_BSD &&
flag != I915_EXEC_VEBOX)
return;
if (batch->flag == flag)
return;
intel_batchbuffer_flush(batch);
batch->flag = flag;
}
int
intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size)
{
return intel_batchbuffer_space(batch) >= size;
}
static void
intel_batchbuffer_start_atomic_helper(struct intel_batchbuffer *batch,
int flag,
unsigned int size)
{
assert(!batch->atomic);
intel_batchbuffer_check_batchbuffer_flag(batch, flag);
intel_batchbuffer_require_space(batch, size);
batch->atomic = 1;
}
void
intel_batchbuffer_start_atomic(struct intel_batchbuffer *batch, unsigned int size)
{
intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_RENDER, size);
}
void
intel_batchbuffer_start_atomic_blt(struct intel_batchbuffer *batch, unsigned int size)
{
intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_BLT, size);
}
void
intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer *batch, unsigned int size)
{
intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_BSD, size);
}
void
intel_batchbuffer_start_atomic_veb(struct intel_batchbuffer *batch, unsigned int size)
{
intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_VEBOX, size);
}
void
intel_batchbuffer_end_atomic(struct intel_batchbuffer *batch)
{
assert(batch->atomic);
batch->atomic = 0;
}
int
intel_batchbuffer_used_size(struct intel_batchbuffer *batch)
{
return batch->ptr - batch->map;
}
void
intel_batchbuffer_align(struct intel_batchbuffer *batch, unsigned int alignedment)
{
int used = batch->ptr - batch->map;
int pad_size;
assert((alignedment & 3) == 0);
pad_size = ALIGN(used, alignedment) - used;
assert((pad_size & 3) == 0);
assert(intel_batchbuffer_space(batch) >= pad_size);
while (pad_size >= 4) {
intel_batchbuffer_emit_dword(batch, 0);
pad_size -= 4;
}
}

View File

@@ -0,0 +1,98 @@
#ifndef _INTEL_BATCHBUFFER_H_
#define _INTEL_BATCHBUFFER_H_
#include <xf86drm.h>
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include "intel_driver.h"
struct intel_batchbuffer
{
struct intel_driver_data *intel;
dri_bo *buffer;
unsigned int size;
unsigned char *map;
unsigned char *ptr;
int atomic;
int flag;
int emit_total;
unsigned char *emit_start;
int (*run)(drm_intel_bo *bo, int used,
drm_clip_rect_t *cliprects, int num_cliprects,
int DR4, unsigned int ring_flag);
/* Used for Sandybdrige workaround */
dri_bo *wa_render_bo;
};
struct intel_batchbuffer *intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size);
void intel_batchbuffer_free(struct intel_batchbuffer *batch);
void intel_batchbuffer_start_atomic(struct intel_batchbuffer *batch, unsigned int size);
void intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer *batch, unsigned int size);
void intel_batchbuffer_start_atomic_blt(struct intel_batchbuffer *batch, unsigned int size);
void intel_batchbuffer_start_atomic_veb(struct intel_batchbuffer *batch, unsigned int size);
void intel_batchbuffer_end_atomic(struct intel_batchbuffer *batch);
void intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, unsigned int x);
void intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *bo,
uint32_t read_domains, uint32_t write_domains,
uint32_t delta);
void intel_batchbuffer_require_space(struct intel_batchbuffer *batch, unsigned int size);
void intel_batchbuffer_data(struct intel_batchbuffer *batch, void *data, unsigned int size);
void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch);
void intel_batchbuffer_flush(struct intel_batchbuffer *batch);
void intel_batchbuffer_begin_batch(struct intel_batchbuffer *batch, int total);
void intel_batchbuffer_advance_batch(struct intel_batchbuffer *batch);
void intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer *batch, int flag);
int intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size);
int intel_batchbuffer_used_size(struct intel_batchbuffer *batch);
void intel_batchbuffer_align(struct intel_batchbuffer *batch, unsigned int alignedment);
#define __BEGIN_BATCH(batch, n, f) do { \
assert(f == batch->flag); \
intel_batchbuffer_check_batchbuffer_flag(batch, f); \
intel_batchbuffer_require_space(batch, (n) * 4); \
intel_batchbuffer_begin_batch(batch, (n)); \
} while (0)
#define __OUT_BATCH(batch, d) do { \
intel_batchbuffer_emit_dword(batch, d); \
} while (0)
#define __OUT_RELOC(batch, bo, read_domains, write_domain, delta) do { \
assert((delta) >= 0); \
intel_batchbuffer_emit_reloc(batch, bo, \
read_domains, write_domain, \
delta); \
} while (0)
#define __ADVANCE_BATCH(batch) do { \
intel_batchbuffer_advance_batch(batch); \
} while (0)
#define BEGIN_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_RENDER)
#define BEGIN_BLT_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_BLT)
#define BEGIN_BCS_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_BSD)
#define BEGIN_VEB_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_VEBOX)
#define OUT_BATCH(batch, d) __OUT_BATCH(batch, d)
#define OUT_BLT_BATCH(batch, d) __OUT_BATCH(batch, d)
#define OUT_BCS_BATCH(batch, d) __OUT_BATCH(batch, d)
#define OUT_VEB_BATCH(batch, d) __OUT_BATCH(batch, d)
#define OUT_RELOC(batch, bo, read_domains, write_domain, delta) \
__OUT_RELOC(batch, bo, read_domains, write_domain, delta)
#define OUT_BLT_RELOC(batch, bo, read_domains, write_domain, delta) \
__OUT_RELOC(batch, bo, read_domains, write_domain, delta)
#define OUT_BCS_RELOC(batch, bo, read_domains, write_domain, delta) \
__OUT_RELOC(batch, bo, read_domains, write_domain, delta)
#define ADVANCE_BATCH(batch) __ADVANCE_BATCH(batch)
#define ADVANCE_BLT_BATCH(batch) __ADVANCE_BATCH(batch)
#define ADVANCE_BCS_BATCH(batch) __ADVANCE_BATCH(batch)
#define ADVANCE_VEB_BATCH(batch) __ADVANCE_BATCH(batch)
#endif /* _INTEL_BATCHBUFFER_H_ */

View File

@@ -0,0 +1,775 @@
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <inttypes.h>
#include "intel_driver.h"
#include "intel_batchbuffer_dump.h"
#ifdef I965_DEBUG
#define BUFFER_FAIL(_count, _len, _name) do { \
fprintf(gout, "Buffer size too small in %s (%d < %d)\n", \
(_name), (_count), (_len)); \
(*failures)++; \
return count; \
} while (0)
static FILE *gout;
static void
instr_out(unsigned int *data, unsigned int offset, unsigned int index, char *fmt, ...)
{
va_list va;
fprintf(gout, "0x%08x: 0x%08x:%s ", offset + index * 4, data[index],
index == 0 ? "" : " ");
va_start(va, fmt);
vfprintf(gout, fmt, va);
va_end(va);
}
static int
dump_mi(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
{
unsigned int opcode;
int length, i;
struct {
unsigned int opcode;
int mask_length;
int min_len;
int max_len;
char *name;
} mi_commands[] = {
{ 0x00, 0, 1, 1, "MI_NOOP" },
{ 0x04, 0, 1, 1, "MI_FLUSH" },
{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
{ 0x26, 0x3f, 4, 5, "MI_FLUSH_DW" },
};
opcode = ((data[0] & MASK_MI_OPCODE) >> SHIFT_MI_OPCODE);
for (i = 0; i < sizeof(mi_commands) / sizeof(mi_commands[0]); i++) {
if (opcode == mi_commands[i].opcode) {
int index;
length = 1;
instr_out(data, offset, 0, "%s\n", mi_commands[i].name);
if (mi_commands[i].max_len > 1) {
length = (data[0] & mi_commands[i].mask_length) + 2;
if (length < mi_commands[i].min_len ||
length > mi_commands[i].max_len) {
fprintf(gout, "Bad length (%d) in %s, [%d, %d]\n",
length, mi_commands[i].name,
mi_commands[i].min_len,
mi_commands[i].max_len);
}
}
for (index = 1; index < length; index++) {
if (index >= count)
BUFFER_FAIL(count, length, mi_commands[i].name);
instr_out(data, offset, index, "dword %d\n", index);
}
return length;
}
}
instr_out(data, offset, 0, "UNKNOWN MI COMMAND\n");
(*failures)++;
return 1;
}
static int
dump_gfxpipe_3d(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
{
instr_out(data, offset, 0, "UNKNOWN 3D COMMAND\n");
(*failures)++;
return 1;
}
static void
dump_avc_bsd_img_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
int img_struct = ((data[3] >> 8) & 0x3);
instr_out(data, offset, 1, "frame size: %d\n", (data[1] & 0xffff));
instr_out(data, offset, 2, "width: %d, height: %d\n", (data[2] & 0xff), (data[2] >> 16) & 0xff);
instr_out(data, offset, 3,
"second_chroma_qp_offset: %d,"
"chroma_qp_offset: %d,"
"QM present flag: %d,"
"image struct: %s,"
"img_dec_fs_idc: %d,"
"\n",
(data[3] >> 24) & 0x1f,
(data[3] >> 16) & 0x1f,
(data[3] >> 10) & 0x1,
(img_struct == 0) ? "frame" : (img_struct == 2) ? "invalid" : (img_struct == 1) ? "top field" : "bottom field",
data[3] & 0xff);
instr_out(data, offset, 4,
"residual off: 0x%x,"
"16MV: %d,"
"chroma fmt: %d,"
"CABAC: %d,"
"non-ref: %d,"
"constrained intra: %d,"
"direct8x8: %d,"
"trans8x8: %d,"
"MB only: %d,"
"MBAFF: %d,"
"\n",
(data[4] >> 24) & 0xff,
(data[4] >> 12) & 0x1,
(data[4] >> 10) & 0x3,
(data[4] >> 7) & 0x1,
(data[4] >> 6) & 0x1,
(data[4] >> 5) & 0x1,
(data[4] >> 4) & 0x1,
(data[4] >> 3) & 0x1,
(data[4] >> 2) & 0x1,
(data[4] >> 1) & 0x1);
instr_out(data, offset, 5, "AVC-IT Command Header\n");
}
static void
dump_avc_bsd_qm_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
unsigned int length = ((data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH) + 2;
int i;
instr_out(data, offset, 1, "user default: %02x, QM list present: %02x\n",
(data[1] >> 8) & 0xff, data[1] & 0xff);
for (i = 2; i < length; i++) {
instr_out(data, offset, i, "dword %d\n", i);
}
}
static void
dump_avc_bsd_slice_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
}
static void
dump_avc_bsd_buf_base_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
int i;
instr_out(data, offset, 1, "BSD row store base address\n");
instr_out(data, offset, 2, "MPR row store base address\n");
instr_out(data, offset, 3, "AVC-IT command buffer base address\n");
instr_out(data, offset, 4, "AVC-IT data buffer: 0x%08x, write offset: 0x%x\n",
data[4] & 0xFFFFF000, data[4] & 0xFC0);
instr_out(data, offset, 5, "ILDB data buffer\n");
for (i = 6; i < 38; i++) {
instr_out(data, offset, i, "Direct MV read base address for reference frame %d\n", i - 6);
}
instr_out(data, offset, 38, "direct mv wr0 top\n");
instr_out(data, offset, 39, "direct mv wr0 bottom\n");
for (i = 40; i < 74; i++) {
instr_out(data, offset, i, "POC List %d\n", i - 40);
}
}
static void
dump_bsd_ind_obj_base_addr(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "AVC indirect object base address\n");
instr_out(data, offset, 2, "AVC Indirect Object Access Upper Bound\n");
}
static void
dump_ironlake_avc_bsd_object(unsigned int *data, unsigned int offset, int *failures)
{
int slice_type = data[3] & 0xf;
int i, is_phantom = ((data[1] & 0x3fffff) == 0);
if (!is_phantom) {
instr_out(data, offset, 1, "Encrypted: %d, bitsteam length: %d\n", data[1] >> 31, data[1] & 0x3fffff);
instr_out(data, offset, 2, "Indirect Data Start Address: %d\n", data[2] & 0x1fffffff);
instr_out(data, offset, 3, "%s Slice\n", slice_type == 0 ? "P" : slice_type == 1 ? "B" : "I");
instr_out(data, offset, 4,
"Num_Ref_Idx_L1: %d,"
"Num_Ref_Idx_L0: %d,"
"Log2WeightDenomChroma: %d,"
"Log2WeightDenomLuma: %d"
"\n",
(data[4] >> 24) & 0x3f,
(data[4] >> 16) & 0x3f,
(data[4] >> 8) & 0x3,
(data[4] >> 0) & 0x3);
instr_out(data, offset, 5,
"WeightedPredIdc: %d,"
"DirectPredType: %d,"
"DisableDeblockingFilter: %d,"
"CabacInitIdc: %d,"
"SliceQp: %d,"
"SliceBetaOffsetDiv2: %d,"
"SliceAlphaC0OffsetDiv2: %d"
"\n",
(data[5] >> 30) & 0x3,
(data[5] >> 29) & 0x1,
(data[5] >> 27) & 0x3,
(data[5] >> 24) & 0x3,
(data[5] >> 16) & 0x3f,
(data[5] >> 8) & 0xf,
(data[5] >> 0) & 0xf);
instr_out(data, offset, 6,
"Slice_MB_Start_Vert_Pos: %d,"
"Slice_MB_Start_Hor_Pos: %d,"
"Slice_Start_Mb_Num: %d"
"\n",
(data[6] >> 24) & 0xff,
(data[6] >> 16) & 0xff,
(data[6] >> 0) & 0x7fff);
instr_out(data, offset, 7,
"Fix_Prev_Mb_Skipped: %d,"
"First_MB_Bit_Offset: %d"
"\n",
(data[7] >> 7) & 0x1,
(data[7] >> 0) & 0x7);
for (i = 8; i < 16; i++)
instr_out(data, offset, i, "dword %d\n", i);
} else {
instr_out(data, offset, 1, "phantom slice\n");
for (i = 2; i < 6; i++)
instr_out(data, offset, i, "dword %d\n", i);
instr_out(data, offset, 6,
"Slice_Start_Mb_Num: %d"
"\n",
(data[6] >> 0) & 0x7fff);
for (i = 7; i < 16; i++)
instr_out(data, offset, i, "dword %d\n", i);
}
}
static void
dump_g4x_avc_bsd_object(unsigned int *data, unsigned int offset, int *failures)
{
}
static void
dump_avc_bsd_object(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
if (IS_IRONLAKE(device))
dump_ironlake_avc_bsd_object(data, offset, failures);
else
dump_g4x_avc_bsd_object(data, offset, failures);
}
static int
dump_bsd_avc(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
{
unsigned int subopcode;
int length, i;
struct {
unsigned int subopcode;
int min_len;
int max_len;
char *name;
void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int *failures);
} avc_commands[] = {
{ 0x00, 0x06, 0x06, "AVC_BSD_IMG_STATE", dump_avc_bsd_img_state },
{ 0x01, 0x02, 0x3a, "AVC_BSD_QM_STATE", dump_avc_bsd_qm_state },
{ 0x02, 0x02, 0xd2, "AVC_BSD_SLICE_STATE", NULL },
{ 0x03, 0x4a, 0x4a, "AVC_BSD_BUF_BASE_STATE", dump_avc_bsd_buf_base_state },
{ 0x04, 0x03, 0x03, "BSD_IND_OBJ_BASE_ADDR", dump_bsd_ind_obj_base_addr },
{ 0x08, 0x08, 0x10, "AVC_BSD_OBJECT", dump_avc_bsd_object },
};
subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE);
for (i = 0; i < sizeof(avc_commands) / sizeof(avc_commands[0]); i++) {
if (subopcode == avc_commands[i].subopcode) {
unsigned int index;
length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH;
length += 2;
instr_out(data, offset, 0, "%s\n", avc_commands[i].name);
if (length < avc_commands[i].min_len ||
length > avc_commands[i].max_len) {
fprintf(gout, "Bad length(%d) in %s [%d, %d]\n",
length, avc_commands[i].name,
avc_commands[i].min_len,
avc_commands[i].max_len);
}
if (length - 1 >= count)
BUFFER_FAIL(count, length, avc_commands[i].name);
if (avc_commands[i].detail)
avc_commands[i].detail(data, offset, device, failures);
else {
for (index = 1; index < length; index++)
instr_out(data, offset, index, "dword %d\n", index);
}
return length;
}
}
instr_out(data, offset, 0, "UNKNOWN AVC COMMAND\n");
(*failures)++;
return 1;
}
static int
dump_gfxpipe_bsd(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
{
int length;
switch ((data[0] & MASK_GFXPIPE_OPCODE) >> SHIFT_GFXPIPE_OPCODE) {
case OPCODE_BSD_AVC:
length = dump_bsd_avc(data, offset, count, device, failures);
break;
default:
length = 1;
(*failures)++;
instr_out(data, offset, 0, "UNKNOWN BSD OPCODE\n");
break;
}
return length;
}
static void
dump_mfx_mode_select(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1,
"decoder mode: %d(%s),"
"post deblocking output enable %d,"
"pre deblocking output enable %d,"
"codec select: %d(%s),"
"standard select: %d(%s)"
"\n",
(data[1] >> 16) & 0x1, ((data[1] >> 16) & 0x1) ? "IT" : "VLD",
(data[1] >> 9) & 0x1,
(data[1] >> 8) & 0x1,
(data[1] >> 4) & 0x1, ((data[1] >> 4) & 0x1) ? "Encode" : "Decode",
(data[1] >> 0) & 0x3, ((data[1] >> 0) & 0x3) == 0 ? "MPEG2" :
((data[1] >> 0) & 0x3) == 1 ? "VC1" :
((data[1] >> 0) & 0x3) == 2 ? "AVC" : "Reserved");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
}
static void
dump_mfx_surface_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "dword 01\n");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
instr_out(data, offset, 4, "dword 04\n");
instr_out(data, offset, 5, "dword 05\n");
}
static void
dump_mfx_pipe_buf_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "dword 01\n");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
instr_out(data, offset, 4, "dword 04\n");
instr_out(data, offset, 5, "dword 05\n");
instr_out(data, offset, 6, "dword 06\n");
instr_out(data, offset, 7, "dword 07\n");
instr_out(data, offset, 8, "dword 08\n");
instr_out(data, offset, 9, "dword 09\n");
instr_out(data, offset, 10, "dword 10\n");
instr_out(data, offset, 11, "dword 11\n");
instr_out(data, offset, 12, "dword 12\n");
instr_out(data, offset, 13, "dword 13\n");
instr_out(data, offset, 14, "dword 14\n");
instr_out(data, offset, 15, "dword 15\n");
instr_out(data, offset, 16, "dword 16\n");
instr_out(data, offset, 17, "dword 17\n");
instr_out(data, offset, 18, "dword 18\n");
instr_out(data, offset, 19, "dword 19\n");
instr_out(data, offset, 20, "dword 20\n");
instr_out(data, offset, 21, "dword 21\n");
instr_out(data, offset, 22, "dword 22\n");
instr_out(data, offset, 24, "dword 23\n");
}
static void
dump_mfx_ind_obj_base_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "dword 01\n");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
instr_out(data, offset, 4, "dword 04\n");
instr_out(data, offset, 5, "dword 05\n");
instr_out(data, offset, 6, "dword 06\n");
instr_out(data, offset, 7, "dword 07\n");
instr_out(data, offset, 8, "dword 08\n");
instr_out(data, offset, 9, "dword 09\n");
instr_out(data, offset, 10, "dword 10\n");
}
static void
dump_mfx_bsp_buf_base_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "dword 01\n");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
}
static void
dump_mfx_aes_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "dword 01\n");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
instr_out(data, offset, 4, "dword 04\n");
instr_out(data, offset, 5, "dword 05\n");
instr_out(data, offset, 6, "dword 06\n");
}
static void
dump_mfx_state_pointer(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "dword 01\n");
}
static int
dump_mfx_common(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
{
unsigned int subopcode;
int length, i;
struct {
unsigned int subopcode;
int min_len;
int max_len;
char *name;
void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int *failures);
} mfx_common_commands[] = {
{ SUBOPCODE_MFX(0, 0), 0x04, 0x04, "MFX_PIPE_MODE_SELECT", dump_mfx_mode_select },
{ SUBOPCODE_MFX(0, 1), 0x06, 0x06, "MFX_SURFACE_STATE", dump_mfx_surface_state },
{ SUBOPCODE_MFX(0, 2), 0x18, 0x18, "MFX_PIPE_BUF_ADDR_STATE", dump_mfx_pipe_buf_addr_state },
{ SUBOPCODE_MFX(0, 3), 0x0b, 0x0b, "MFX_IND_OBJ_BASE_ADDR_STATE", dump_mfx_ind_obj_base_addr_state },
{ SUBOPCODE_MFX(0, 4), 0x04, 0x04, "MFX_BSP_BUF_BASE_ADDR_STATE", dump_mfx_bsp_buf_base_addr_state },
{ SUBOPCODE_MFX(0, 5), 0x07, 0x07, "MFX_AES_STATE", dump_mfx_aes_state },
{ SUBOPCODE_MFX(0, 6), 0x00, 0x00, "MFX_STATE_POINTER", dump_mfx_state_pointer },
};
subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE);
for (i = 0; i < ARRAY_ELEMS(mfx_common_commands); i++) {
if (subopcode == mfx_common_commands[i].subopcode) {
unsigned int index;
length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH;
length += 2;
instr_out(data, offset, 0, "%s\n", mfx_common_commands[i].name);
if (length < mfx_common_commands[i].min_len ||
length > mfx_common_commands[i].max_len) {
fprintf(gout, "Bad length(%d) in %s [%d, %d]\n",
length, mfx_common_commands[i].name,
mfx_common_commands[i].min_len,
mfx_common_commands[i].max_len);
}
if (length - 1 >= count)
BUFFER_FAIL(count, length, mfx_common_commands[i].name);
if (mfx_common_commands[i].detail)
mfx_common_commands[i].detail(data, offset, device, failures);
else {
for (index = 1; index < length; index++)
instr_out(data, offset, index, "dword %d\n", index);
}
return length;
}
}
instr_out(data, offset, 0, "UNKNOWN MFX COMMON COMMAND\n");
(*failures)++;
return 1;
}
static void
dump_mfx_avc_img_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "dword 01\n");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
instr_out(data, offset, 4, "dword 04\n");
instr_out(data, offset, 5, "dword 05\n");
instr_out(data, offset, 6, "dword 06\n");
instr_out(data, offset, 7, "dword 07\n");
instr_out(data, offset, 8, "dword 08\n");
instr_out(data, offset, 9, "dword 09\n");
instr_out(data, offset, 10, "dword 10\n");
instr_out(data, offset, 11, "dword 11\n");
instr_out(data, offset, 12, "dword 12\n");
}
static void
dump_mfx_avc_qm_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
unsigned int length = ((data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH) + 2;
int i;
instr_out(data, offset, 1, "user default: %02x, QM list present: %02x\n",
(data[1] >> 8) & 0xff, data[1] & 0xff);
for (i = 2; i < length; i++) {
instr_out(data, offset, i, "dword %d\n", i);
}
}
static void
dump_mfx_avc_directmode_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
int i;
for (i = 1; i < 33; i++) {
instr_out(data, offset, i, "Direct MV Buffer Base Address for Picture %d\n", i - 1);
}
for (i = 33; i < 35; i++) {
instr_out(data, offset, i, "Direct MV Buffer Base Address for Current Decoding Frame/Field\n");
}
for (i = 35; i < 69; i++) {
instr_out(data, offset, i, "POC List\n");
}
}
static void
dump_mfx_avc_slice_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "dword 01\n");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
instr_out(data, offset, 4, "dword 04\n");
instr_out(data, offset, 5, "dword 05\n");
instr_out(data, offset, 6, "dword 06\n");
instr_out(data, offset, 7, "dword 07\n");
instr_out(data, offset, 8, "dword 08\n");
instr_out(data, offset, 9, "dword 09\n");
}
static void
dump_mfx_avc_ref_idx_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
instr_out(data, offset, 1, "dword 01\n");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
instr_out(data, offset, 4, "dword 04\n");
instr_out(data, offset, 5, "dword 05\n");
instr_out(data, offset, 6, "dword 06\n");
instr_out(data, offset, 7, "dword 07\n");
instr_out(data, offset, 8, "dword 08\n");
instr_out(data, offset, 9, "dword 09\n");
}
static void
dump_mfx_avc_weightoffset_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
int i;
instr_out(data, offset, 1,
"Weight and Offset L%d table\n",
(data[1] >> 0) & 0x1);
for (i = 2; i < 31; i++) {
instr_out(data, offset, i, "dword %d\n", i);
}
}
static void
dump_mfd_bsd_object(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
{
int is_phantom_slice = ((data[1] & 0x3fffff) == 0);
if (is_phantom_slice) {
instr_out(data, offset, 1, "phantom slice\n");
instr_out(data, offset, 2, "dword 02\n");
instr_out(data, offset, 3, "dword 03\n");
instr_out(data, offset, 4, "dword 04\n");
instr_out(data, offset, 5, "dword 05\n");
} else {
instr_out(data, offset, 1, "Indirect BSD Data Length: %d\n", data[1] & 0x3fffff);
instr_out(data, offset, 2, "Indirect BSD Data Start Address: 0x%08x\n", data[2] & 0x1fffffff);
instr_out(data, offset, 3, "dword 03\n");
instr_out(data, offset, 4,
"First_MB_Byte_Offset of Slice Data from Slice Header: 0x%08x,"
"slice header skip mode: %d"
"\n",
(data[4] >> 16),
(data[4] >> 6) & 0x1);
instr_out(data, offset, 5, "dword 05\n");
}
}
static int
dump_mfx_avc(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
{
unsigned int subopcode;
int length, i;
struct {
unsigned int subopcode;
int min_len;
int max_len;
char *name;
void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int *failures);
} mfx_avc_commands[] = {
{ SUBOPCODE_MFX(0, 0), 0x0d, 0x0d, "MFX_AVC_IMG_STATE", dump_mfx_avc_img_state },
{ SUBOPCODE_MFX(0, 1), 0x02, 0x3a, "MFX_AVC_QM_STATE", dump_mfx_avc_qm_state },
{ SUBOPCODE_MFX(0, 2), 0x45, 0x45, "MFX_AVC_DIRECTMODE_STATE", dump_mfx_avc_directmode_state },
{ SUBOPCODE_MFX(0, 3), 0x0b, 0x0b, "MFX_AVC_SLICE_STATE", dump_mfx_avc_slice_state },
{ SUBOPCODE_MFX(0, 4), 0x0a, 0x0a, "MFX_AVC_REF_IDX_STATE", dump_mfx_avc_ref_idx_state },
{ SUBOPCODE_MFX(0, 5), 0x32, 0x32, "MFX_AVC_WEIGHTOFFSET_STATE", dump_mfx_avc_weightoffset_state },
{ SUBOPCODE_MFX(1, 8), 0x06, 0x06, "MFD_AVC_BSD_OBJECT", dump_mfd_bsd_object },
};
subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE);
for (i = 0; i < ARRAY_ELEMS(mfx_avc_commands); i++) {
if (subopcode == mfx_avc_commands[i].subopcode) {
unsigned int index;
length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH;
length += 2;
instr_out(data, offset, 0, "%s\n", mfx_avc_commands[i].name);
if (length < mfx_avc_commands[i].min_len ||
length > mfx_avc_commands[i].max_len) {
fprintf(gout, "Bad length(%d) in %s [%d, %d]\n",
length, mfx_avc_commands[i].name,
mfx_avc_commands[i].min_len,
mfx_avc_commands[i].max_len);
}
if (length - 1 >= count)
BUFFER_FAIL(count, length, mfx_avc_commands[i].name);
if (mfx_avc_commands[i].detail)
mfx_avc_commands[i].detail(data, offset, device, failures);
else {
for (index = 1; index < length; index++)
instr_out(data, offset, index, "dword %d\n", index);
}
return length;
}
}
instr_out(data, offset, 0, "UNKNOWN MFX AVC COMMAND\n");
(*failures)++;
return 1;
}
static int
dump_gfxpipe_mfx(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
{
int length;
switch ((data[0] & MASK_GFXPIPE_OPCODE) >> SHIFT_GFXPIPE_OPCODE) {
case OPCODE_MFX_COMMON:
length = dump_mfx_common(data, offset, count, device, failures);
break;
case OPCODE_MFX_AVC:
length = dump_mfx_avc(data, offset, count, device, failures);
break;
default:
length = 1;
(*failures)++;
instr_out(data, offset, 0, "UNKNOWN MFX OPCODE\n");
break;
}
return length;
}
static int
dump_gfxpipe(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
{
int length;
switch ((data[0] & MASK_GFXPIPE_SUBTYPE) >> SHIFT_GFXPIPE_SUBTYPE) {
case GFXPIPE_3D:
length = dump_gfxpipe_3d(data, offset, count, device, failures);
break;
case GFXPIPE_BSD:
if (IS_GEN6(device))
length = dump_gfxpipe_mfx(data, offset, count, device, failures);
else
length = dump_gfxpipe_bsd(data, offset, count, device, failures);
break;
default:
length = 1;
(*failures)++;
instr_out(data, offset, 0, "UNKNOWN GFXPIPE COMMAND\n");
break;
}
return length;
}
int intel_batchbuffer_dump(unsigned int *data, unsigned int offset, int count, unsigned int device)
{
int index = 0;
int failures = 0;
gout = fopen("/tmp/bsd_command_dump.txt", "w+");
while (index < count) {
switch ((data[index] & MASK_CMD_TYPE) >> SHIFT_CMD_TYPE) {
case CMD_TYPE_MI:
index += dump_mi(data + index, offset + index * 4,
count - index, device, &failures);
break;
case CMD_TYPE_GFXPIPE:
index += dump_gfxpipe(data + index, offset + index * 4,
count - index, device, &failures);
break;
default:
instr_out(data, offset, index, "UNKNOWN COMMAND\n");
failures++;
index++;
break;
}
fflush(gout);
}
fclose(gout);
return failures;
}
#endif

View File

@@ -0,0 +1,59 @@
#ifndef _INTEL_BATCHBUFFER_DUMP_H_
#define _INTEL_BATCHBUFFER_DUMP_H_
#define MASK_CMD_TYPE 0xE0000000
#define SHIFT_CMD_TYPE 29
#define CMD_TYPE_GFXPIPE 3
#define CMD_TYPE_BLT 2
#define CMD_TYPE_MI 0
/* GFXPIPE */
#define MASK_GFXPIPE_SUBTYPE 0x18000000
#define MASK_GFXPIPE_OPCODE 0x07000000
#define MASK_GFXPIPE_SUBOPCODE 0x00FF0000
#define MASK_GFXPIPE_LENGTH 0x0000FFFF
#define SHIFT_GFXPIPE_SUBTYPE 27
#define SHIFT_GFXPIPE_OPCODE 24
#define SHIFT_GFXPIPE_SUBOPCODE 16
#define SHIFT_GFXPIPE_LENGTH 0
/* 3D */
#define GFXPIPE_3D 3
/* BSD */
#define GFXPIPE_BSD 2
#define OPCODE_BSD_AVC 4
#define SUBOPCODE_BSD_IMG 0
#define SUBOPCODE_BSD_QM 1
#define SUBOPCODE_BSD_SLICE 2
#define SUBOPCODE_BSD_BUF_BASE 3
#define SUBOPCODE_BSD_IND_OBJ 4
#define SUBOPCODE_BSD_OBJECT 8
/* MFX */
#define OPCODE_MFX_COMMON 0
#define OPCODE_MFX_AVC 1
#define SUBOPCODE_MFX(A, B) ((A) << 5 | (B))
/* MI */
#define MASK_MI_OPCODE 0x1F800000
#define SHIFT_MI_OPCODE 23
#define OPCODE_MI_FLUSH 0x04
#define OPCODE_MI_BATCH_BUFFER_END 0x0A
#ifdef I965_DEBUG
int intel_batchbuffer_dump(unsigned int *data, unsigned int offset, int count, unsigned int device);
#endif
#endif /* _INTEL_BATCHBUFFER_DUMP_H_ */

View File

@@ -0,0 +1,26 @@
#ifndef _INTEL_COMPILER_H_
#define _INTEL_COMPILER_H_
/**
* Function inlining
*/
#if defined(__GNUC__)
# define INLINE __inline__
#elif (__STDC_VERSION__ >= 199901L) /* C99 */
# define INLINE inline
#else
# define INLINE
#endif
/**
* Function visibility
*/
#if defined(__GNUC__)
# define DLL_HIDDEN __attribute__((visibility("hidden")))
# define DLL_EXPORT __attribute__((visibility("default")))
#else
# define DLL_HIDDEN
# define DLL_EXPORT
#endif
#endif /* _INTEL_COMPILER_H_ */

View File

@@ -0,0 +1,136 @@
/*
* Copyright <20> 2009 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
* Zou Nan hai <nanhai.zou@intel.com>
*
*/
#include "sysdeps.h"
#include <va/va_drmcommon.h>
#include "intel_batchbuffer.h"
#include "intel_memman.h"
#include "intel_driver.h"
uint32_t g_intel_debug_option_flags = 0;
static Bool
intel_driver_get_param(struct intel_driver_data *intel, int param, int *value)
{
struct drm_i915_getparam gp;
int ret;
gp.param = param;
gp.value = value;
return drmIoctl(intel->fd, DRM_I915_GETPARAM, &gp) == 0;
// return drmCommandWriteRead(intel->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)) == 0;
}
static void intel_driver_get_revid(struct intel_driver_data *intel, int *value)
{
#if 0
#define PCI_REVID 8
FILE *fp;
char config_data[16];
fp = fopen("/sys/devices/pci0000:00/0000:00:02.0/config", "r");
if (fp) {
if (fread(config_data, 1, 16, fp))
*value = config_data[PCI_REVID];
else
*value = 2; /* assume it is at least B-steping */
fclose(fp);
} else {
*value = 2; /* assume it is at least B-steping */
}
#endif
*value = 2;
return;
}
extern const struct intel_device_info *i965_get_device_info(int devid);
bool
intel_driver_init(VADriverContextP ctx)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
struct drm_state * const drm_state = (struct drm_state *)ctx->drm_state;
int has_exec2 = 0, has_bsd = 0, has_blt = 0, has_vebox = 0;
char *env_str = NULL;
g_intel_debug_option_flags = 0;
// if ((env_str = getenv("VA_INTEL_DEBUG")))
// g_intel_debug_option_flags = atoi(env_str);
// if (g_intel_debug_option_flags)
// fprintf(stderr, "g_intel_debug_option_flags:%x\n", g_intel_debug_option_flags);
assert(drm_state);
assert(VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI1) ||
VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2) ||
VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_CUSTOM));
intel->fd = drm_state->fd;
intel->dri2Enabled = 1;
if (!intel->dri2Enabled) {
return false;
}
intel->locked = 0;
// pthread_mutex_init(&intel->ctxmutex, NULL);
intel_driver_get_param(intel, I915_PARAM_CHIPSET_ID, &intel->device_id);
intel->device_info = i965_get_device_info(intel->device_id);
if (!intel->device_info)
return false;
if (intel_driver_get_param(intel, I915_PARAM_HAS_EXECBUF2, &has_exec2))
intel->has_exec2 = has_exec2;
if (intel_driver_get_param(intel, I915_PARAM_HAS_BSD, &has_bsd))
intel->has_bsd = has_bsd;
if (intel_driver_get_param(intel, I915_PARAM_HAS_BLT, &has_blt))
intel->has_blt = has_blt;
if (intel_driver_get_param(intel, I915_PARAM_HAS_VEBOX, &has_vebox))
intel->has_vebox = !!has_vebox;
intel_driver_get_revid(intel, &intel->revision);
intel_memman_init(intel);
return true;
}
void
intel_driver_terminate(VADriverContextP ctx)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
intel_memman_terminate(intel);
// pthread_mutex_destroy(&intel->ctxmutex);
}

View File

@@ -0,0 +1,195 @@
#ifndef _INTEL_DRIVER_H_
#define _INTEL_DRIVER_H_
#include <stddef.h>
//#include <pthread.h>
//#include <signal.h>
#include <stdbool.h>
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include <va/va_backend.h>
#include "va_backend_compat.h"
#include "intel_compiler.h"
#define BATCH_SIZE 0x80000
#define BATCH_RESERVED 0x10
#define CMD_MI (0x0 << 29)
#define CMD_2D (0x2 << 29)
#define CMD_3D (0x3 << 29)
#define MI_NOOP (CMD_MI | 0)
#define MI_BATCH_BUFFER_END (CMD_MI | (0xA << 23))
#define MI_BATCH_BUFFER_START (CMD_MI | (0x31 << 23))
#define MI_FLUSH (CMD_MI | (0x4 << 23))
#define MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE (0x1 << 0)
#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 0x2)
#define MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE (0x1 << 7)
#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x04)
#define XY_COLOR_BLT_WRITE_ALPHA (1 << 21)
#define XY_COLOR_BLT_WRITE_RGB (1 << 20)
#define XY_COLOR_BLT_DST_TILED (1 << 11)
#define GEN8_XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x05)
/* BR13 */
#define BR13_8 (0x0 << 24)
#define BR13_565 (0x1 << 24)
#define BR13_1555 (0x2 << 24)
#define BR13_8888 (0x3 << 24)
#define CMD_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | (0 << 16))
#define CMD_PIPE_CONTROL_CS_STALL (1 << 20)
#define CMD_PIPE_CONTROL_NOWRITE (0 << 14)
#define CMD_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define CMD_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define CMD_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define CMD_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define CMD_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define CMD_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define CMD_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define CMD_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define CMD_PIPE_CONTROL_DC_FLUSH (1 << 5)
#define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
struct intel_batchbuffer;
#define ALIGN(i, n) (((i) + (n) - 1) & ~((n) - 1))
#define IS_ALIGNED(i, n) (((i) & ((n)-1)) == 0)
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
#define Bool int
#define True 1
#define False 0
extern uint32_t g_intel_debug_option_flags;
#define VA_INTEL_DEBUG_OPTION_ASSERT (1 << 0)
#define VA_INTEL_DEBUG_OPTION_BENCH (1 << 1)
#define ASSERT_RET(value, fail_ret) do { \
if (!(value)) { \
if (g_intel_debug_option_flags & VA_INTEL_DEBUG_OPTION_ASSERT) \
assert(value); \
return fail_ret; \
} \
} while (0)
#define SET_BLOCKED_SIGSET() do { \
sigset_t bl_mask; \
sigfillset(&bl_mask); \
sigdelset(&bl_mask, SIGFPE); \
sigdelset(&bl_mask, SIGILL); \
sigdelset(&bl_mask, SIGSEGV); \
sigdelset(&bl_mask, SIGBUS); \
sigdelset(&bl_mask, SIGKILL); \
pthread_sigmask(SIG_SETMASK, &bl_mask, &intel->sa_mask); \
} while (0)
#define RESTORE_BLOCKED_SIGSET() do { \
pthread_sigmask(SIG_SETMASK, &intel->sa_mask, NULL); \
} while (0)
#define PPTHREAD_MUTEX_LOCK() do { \
SET_BLOCKED_SIGSET(); \
pthread_mutex_lock(&intel->ctxmutex); \
} while (0)
#define PPTHREAD_MUTEX_UNLOCK() do { \
pthread_mutex_unlock(&intel->ctxmutex); \
RESTORE_BLOCKED_SIGSET(); \
} while (0)
#define WARN_ONCE(...) do { \
static int g_once = 1; \
if (g_once) { \
g_once = 0; \
printf("WARNING: " __VA_ARGS__); \
} \
} while (0)
struct intel_device_info
{
int gen;
int gt;
unsigned int urb_size;
unsigned int max_wm_threads;
unsigned int is_g4x : 1; /* gen4 */
unsigned int is_ivybridge : 1; /* gen7 */
unsigned int is_baytrail : 1; /* gen7 */
unsigned int is_haswell : 1; /* gen7 */
unsigned int is_cherryview : 1; /* gen8 */
};
struct intel_driver_data
{
int fd;
int device_id;
int revision;
int dri2Enabled;
// sigset_t sa_mask;
// pthread_mutex_t ctxmutex;
int locked;
dri_bufmgr *bufmgr;
unsigned int has_exec2 : 1; /* Flag: has execbuffer2? */
unsigned int has_bsd : 1; /* Flag: has bitstream decoder for H.264? */
unsigned int has_blt : 1; /* Flag: has BLT unit? */
unsigned int has_vebox : 1; /* Flag: has VEBOX unit */
const struct intel_device_info *device_info;
};
bool intel_driver_init(VADriverContextP ctx);
void intel_driver_terminate(VADriverContextP ctx);
static INLINE struct intel_driver_data *
intel_driver_data(VADriverContextP ctx)
{
return (struct intel_driver_data *)ctx->pDriverData;
}
struct intel_region
{
int x;
int y;
unsigned int width;
unsigned int height;
unsigned int cpp;
unsigned int pitch;
unsigned int tiling;
unsigned int swizzle;
dri_bo *bo;
};
#define IS_G4X(device_info) (device_info->is_g4x)
#define IS_IRONLAKE(device_info) (device_info->gen == 5)
#define IS_GEN6(device_info) (device_info->gen == 6)
#define IS_HASWELL(device_info) (device_info->is_haswell)
#define IS_GEN7(device_info) (device_info->gen == 7)
#define IS_CHERRYVIEW(device_info) (device_info->is_cherryview)
#define IS_GEN8(device_info) (device_info->gen == 8)
#endif /* _INTEL_DRIVER_H_ */

View File

@@ -0,0 +1,50 @@
/*
* Copyright (C) 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef INTEL_MEDIA_H
#define INTEL_MEDIA_H
#include <stdint.h>
#include <stdlib.h>
#include <va/va.h>
#include <intel_bufmgr.h>
typedef struct gen_avc_surface GenAvcSurface;
struct gen_avc_surface
{
dri_bo *dmv_top;
dri_bo *dmv_bottom;
int dmv_bottom_flag;
int frame_store_id; /* only used for H.264 on earlier generations (<HSW) */
};
extern void gen_free_avc_surface(void **data);
extern int intel_format_convert(float src, int out_int_bits, int out_frac_bits,int out_sign_flag);
#endif /* INTEL_MEDIA_H */

View File

@@ -0,0 +1,84 @@
/*
* Copyright (C) 2006-2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include "intel_driver.h"
#include "intel_media.h"
//static pthread_mutex_t free_avc_surface_lock = PTHREAD_MUTEX_INITIALIZER;
void
gen_free_avc_surface(void **data)
{
GenAvcSurface *avc_surface;
// pthread_mutex_lock(&free_avc_surface_lock);
avc_surface = *data;
if (!avc_surface) {
// pthread_mutex_unlock(&free_avc_surface_lock);
return;
}
dri_bo_unreference(avc_surface->dmv_top);
avc_surface->dmv_top = NULL;
dri_bo_unreference(avc_surface->dmv_bottom);
avc_surface->dmv_bottom = NULL;
free(avc_surface);
*data = NULL;
// pthread_mutex_unlock(&free_avc_surface_lock);
}
/* This is to convert one float to the given format interger.
* For example: 1.25 to S1.6 or U2.6 and so on
*/
int intel_format_convert(float src, int out_int_bits, int out_frac_bits,int out_sign_flag)
{
unsigned char negative_flag = (src < 0.0) ? 1 : 0;
float src_1 = (!negative_flag)? src: -src ;
unsigned int factor = 1 << out_frac_bits;
int output_value = 0;
unsigned int integer_part = floorf(src_1);
unsigned int fraction_part = ((int)((src_1 - integer_part) * factor)) & (factor - 1) ;
output_value = (integer_part << out_frac_bits) | fraction_part;
if(negative_flag)
output_value = (~output_value + 1) & ((1 <<(out_int_bits + out_frac_bits)) -1);
if(out_sign_flag == 1 && negative_flag)
{
output_value |= negative_flag <<(out_int_bits + out_frac_bits);
}
return output_value;
}

View File

@@ -0,0 +1,49 @@
/*
* Copyright <20> 2009 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
* Zou Nan hai <nanhai.zou@intel.com>
*
*/
#include <assert.h>
#include "intel_driver.h"
Bool
intel_memman_init(struct intel_driver_data *intel)
{
intel->bufmgr = intel_bufmgr_gem_init(intel->fd, BATCH_SIZE);
assert(intel->bufmgr);
intel_bufmgr_gem_enable_reuse(intel->bufmgr);
return True;
}
Bool
intel_memman_terminate(struct intel_driver_data *intel)
{
drm_intel_bufmgr_destroy(intel->bufmgr);
return True;
}

View File

@@ -0,0 +1,7 @@
#ifndef _INTEL_MEMMAN_H_
#define _INTEL_MEMMAN_H_
Bool intel_memman_init(struct intel_driver_data *intel);
Bool intel_memman_terminate(struct intel_driver_data *intel);
#endif /* _INTEL_MEMMAN_H_ */

View File

@@ -0,0 +1,261 @@
/*
* Copyright (c) 2007 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "object_heap.h"
#include "assert.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ASSERT assert
#define LAST_FREE -1
#define ALLOCATED -2
/*
* Expands the heap
* Return 0 on success, -1 on error
*/
static int object_heap_expand( object_heap_p heap )
{
int i;
void *new_heap_index;
int next_free;
int new_heap_size = heap->heap_size + heap->heap_increment;
int bucket_index = new_heap_size / heap->heap_increment - 1;
if (bucket_index >= heap->num_buckets) {
int new_num_buckets = heap->num_buckets + 8;
void **new_bucket;
new_bucket = realloc(heap->bucket, new_num_buckets * sizeof(void *));
if (NULL == new_bucket) {
return -1;
}
heap->num_buckets = new_num_buckets;
heap->bucket = new_bucket;
}
new_heap_index = (void *) malloc( heap->heap_increment * heap->object_size );
if ( NULL == new_heap_index )
{
return -1; /* Out of memory */
}
heap->bucket[bucket_index] = new_heap_index;
next_free = heap->next_free;
for(i = new_heap_size; i-- > heap->heap_size; )
{
object_base_p obj = (object_base_p) (new_heap_index + (i - heap->heap_size) * heap->object_size);
obj->id = i + heap->id_offset;
obj->next_free = next_free;
next_free = i;
}
heap->next_free = next_free;
heap->heap_size = new_heap_size;
return 0; /* Success */
}
/*
* Return 0 on success, -1 on error
*/
int object_heap_init( object_heap_p heap, int object_size, int id_offset)
{
heap->object_size = object_size;
heap->id_offset = id_offset & OBJECT_HEAP_OFFSET_MASK;
heap->heap_size = 0;
heap->heap_increment = 16;
heap->next_free = LAST_FREE;
heap->num_buckets = 0;
heap->bucket = NULL;
if (object_heap_expand(heap) == 0) {
ASSERT(heap->heap_size);
_i965InitMutex(&heap->mutex);
return 0;
} else {
ASSERT(!heap->heap_size);
ASSERT(!heap->bucket || !heap->bucket[0]);
free(heap->bucket);
return -1;
}
}
/*
* Allocates an object
* Returns the object ID on success, returns -1 on error
*/
int object_heap_allocate( object_heap_p heap )
{
object_base_p obj;
int bucket_index, obj_index;
_i965LockMutex(&heap->mutex);
if ( LAST_FREE == heap->next_free )
{
if( -1 == object_heap_expand( heap ) )
{
_i965UnlockMutex(&heap->mutex);
return -1; /* Out of memory */
}
}
ASSERT( heap->next_free >= 0 );
bucket_index = heap->next_free / heap->heap_increment;
obj_index = heap->next_free % heap->heap_increment;
obj = (object_base_p) (heap->bucket[bucket_index] + obj_index * heap->object_size);
heap->next_free = obj->next_free;
_i965UnlockMutex(&heap->mutex);
obj->next_free = ALLOCATED;
return obj->id;
}
/*
* Lookup an object by object ID
* Returns a pointer to the object on success, returns NULL on error
*/
object_base_p object_heap_lookup( object_heap_p heap, int id )
{
object_base_p obj;
int bucket_index, obj_index;
_i965LockMutex(&heap->mutex);
if ( (id < heap->id_offset) || (id > (heap->heap_size+heap->id_offset)) )
{
_i965UnlockMutex(&heap->mutex);
return NULL;
}
id &= OBJECT_HEAP_ID_MASK;
bucket_index = id / heap->heap_increment;
obj_index = id % heap->heap_increment;
obj = (object_base_p) (heap->bucket[bucket_index] + obj_index * heap->object_size);
_i965UnlockMutex(&heap->mutex);
/* Check if the object has in fact been allocated */
if ( obj->next_free != ALLOCATED )
{
return NULL;
}
return obj;
}
/*
* Iterate over all objects in the heap.
* Returns a pointer to the first object on the heap, returns NULL if heap is empty.
*/
object_base_p object_heap_first( object_heap_p heap, object_heap_iterator *iter )
{
*iter = -1;
return object_heap_next( heap, iter );
}
/*
* Iterate over all objects in the heap.
* Returns a pointer to the next object on the heap, returns NULL if heap is empty.
*/
object_base_p object_heap_next( object_heap_p heap, object_heap_iterator *iter )
{
object_base_p obj;
int i = *iter + 1;
int bucket_index, obj_index;
_i965LockMutex(&heap->mutex);
while ( i < heap->heap_size)
{
bucket_index = i / heap->heap_increment;
obj_index = i % heap->heap_increment;
obj = (object_base_p) (heap->bucket[bucket_index] + obj_index * heap->object_size);
if (obj->next_free == ALLOCATED)
{
_i965UnlockMutex(&heap->mutex);
*iter = i;
return obj;
}
i++;
}
_i965UnlockMutex(&heap->mutex);
*iter = i;
return NULL;
}
/*
* Frees an object
*/
void object_heap_free( object_heap_p heap, object_base_p obj )
{
/* Don't complain about NULL pointers */
if (NULL != obj)
{
/* Check if the object has in fact been allocated */
ASSERT( obj->next_free == ALLOCATED );
_i965LockMutex(&heap->mutex);
obj->next_free = heap->next_free;
heap->next_free = obj->id & OBJECT_HEAP_ID_MASK;
_i965UnlockMutex(&heap->mutex);
}
}
/*
* Destroys a heap, the heap must be empty.
*/
void object_heap_destroy( object_heap_p heap )
{
object_base_p obj;
int i;
int bucket_index, obj_index;
if (heap->heap_size) {
_i965DestroyMutex(&heap->mutex);
/* Check if heap is empty */
for (i = 0; i < heap->heap_size; i++)
{
/* Check if object is not still allocated */
bucket_index = i / heap->heap_increment;
obj_index = i % heap->heap_increment;
obj = (object_base_p) (heap->bucket[bucket_index] + obj_index * heap->object_size);
ASSERT( obj->next_free != ALLOCATED );
}
for (i = 0; i < heap->heap_size / heap->heap_increment; i++) {
free(heap->bucket[i]);
}
free(heap->bucket);
}
heap->bucket = NULL;
heap->heap_size = 0;
heap->next_free = LAST_FREE;
}

View File

@@ -0,0 +1,93 @@
/*
* Copyright (c) 2007 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _OBJECT_HEAP_H_
#define _OBJECT_HEAP_H_
#include "i965_mutext.h"
#define OBJECT_HEAP_OFFSET_MASK 0x7F000000
#define OBJECT_HEAP_ID_MASK 0x00FFFFFF
typedef struct object_base *object_base_p;
typedef struct object_heap *object_heap_p;
struct object_base {
int id;
int next_free;
};
struct object_heap {
int object_size;
int id_offset;
int next_free;
int heap_size;
int heap_increment;
_I965Mutex mutex;
void **bucket;
int num_buckets;
};
typedef int object_heap_iterator;
/*
* Return 0 on success, -1 on error
*/
int object_heap_init( object_heap_p heap, int object_size, int id_offset);
/*
* Allocates an object
* Returns the object ID on success, returns -1 on error
*/
int object_heap_allocate( object_heap_p heap );
/*
* Lookup an allocated object by object ID
* Returns a pointer to the object on success, returns NULL on error
*/
object_base_p object_heap_lookup( object_heap_p heap, int id );
/*
* Iterate over all objects in the heap.
* Returns a pointer to the first object on the heap, returns NULL if heap is empty.
*/
object_base_p object_heap_first( object_heap_p heap, object_heap_iterator *iter );
/*
* Iterate over all objects in the heap.
* Returns a pointer to the next object on the heap, returns NULL if heap is empty.
*/
object_base_p object_heap_next( object_heap_p heap, object_heap_iterator *iter );
/*
* Frees an object
*/
void object_heap_free( object_heap_p heap, object_base_p obj );
/*
* Destroys a heap, the heap must be empty.
*/
void object_heap_destroy( object_heap_p heap );
#endif /* _OBJECT_HEAP_H_ */

View File

@@ -0,0 +1,6 @@
SUBDIRS = h264 mpeg2 render post_processing vme utils
EXTRA_DIST = gpp.py
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in

View File

@@ -0,0 +1,632 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = src/shaders
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/src/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
SOURCES =
DIST_SOURCES =
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
install-exec-recursive install-html-recursive \
install-info-recursive install-pdf-recursive \
install-ps-recursive install-recursive installcheck-recursive \
installdirs-recursive pdf-recursive ps-recursive \
tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
am__recursive_targets = \
$(RECURSIVE_TARGETS) \
$(RECURSIVE_CLEAN_TARGETS) \
$(am__extra_recursive_targets)
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
distdir
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DIST_SUBDIRS = $(SUBDIRS)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
sed_rest='s,^[^/]*/*,,'; \
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
sed_butlast='s,/*[^/]*$$,,'; \
while test -n "$$dir1"; do \
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
if test "$$first" != "."; then \
if test "$$first" = ".."; then \
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
else \
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
if test "$$first2" = "$$first"; then \
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
else \
dir2="../$$dir2"; \
fi; \
dir0="$$dir0"/"$$first"; \
fi; \
fi; \
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
done; \
reldir="$$dir2"
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DRM_CFLAGS = @DRM_CFLAGS@
DRM_LIBS = @DRM_LIBS@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGL_CFLAGS = @EGL_CFLAGS@
EGL_LIBS = @EGL_LIBS@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GEN4ASM = @GEN4ASM@
GEN4ASM_CFLAGS = @GEN4ASM_CFLAGS@
GEN4ASM_LIBS = @GEN4ASM_LIBS@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INTEL_DRIVER_LT_LDFLAGS = @INTEL_DRIVER_LT_LDFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBDRM_VERSION = @LIBDRM_VERSION@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIBVA_DEPS_CFLAGS = @LIBVA_DEPS_CFLAGS@
LIBVA_DEPS_LIBS = @LIBVA_DEPS_LIBS@
LIBVA_DRIVERS_PATH = @LIBVA_DRIVERS_PATH@
LIBVA_DRM_DEPS_CFLAGS = @LIBVA_DRM_DEPS_CFLAGS@
LIBVA_DRM_DEPS_LIBS = @LIBVA_DRM_DEPS_LIBS@
LIBVA_PACKAGE_VERSION = @LIBVA_PACKAGE_VERSION@
LIBVA_WAYLAND_DEPS_CFLAGS = @LIBVA_WAYLAND_DEPS_CFLAGS@
LIBVA_WAYLAND_DEPS_LIBS = @LIBVA_WAYLAND_DEPS_LIBS@
LIBVA_X11_DEPS_CFLAGS = @LIBVA_X11_DEPS_CFLAGS@
LIBVA_X11_DEPS_LIBS = @LIBVA_X11_DEPS_LIBS@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
WAYLAND_LIBS = @WAYLAND_LIBS@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
wayland_protocoldir = @wayland_protocoldir@
wayland_scanner = @wayland_scanner@
SUBDIRS = h264 mpeg2 render post_processing vme utils
EXTRA_DIST = gpp.py
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in
all: all-recursive
.SUFFIXES:
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/shaders/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --foreign src/shaders/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
# To change the values of 'make' variables: instead of editing Makefiles,
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(am__recursive_targets):
@fail=; \
if $(am__make_keepgoing); then \
failcom='fail=yes'; \
else \
failcom='exit 1'; \
fi; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-recursive
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
$(am__make_dryrun) \
|| test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
dir1=$$subdir; dir2="$(top_distdir)"; \
$(am__relativize); \
new_top_distdir=$$reldir; \
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
($(am__cd) $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$new_top_distdir" \
distdir="$$new_distdir" \
am__remove_distdir=: \
am__skip_length_check=: \
am__skip_mode_fix=: \
distdir) \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-recursive
all-am: Makefile
installdirs: installdirs-recursive
installdirs-am:
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
clean: clean-recursive
clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-recursive
-rm -f Makefile
distclean-am: clean-am distclean-generic distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
html-am:
info: info-recursive
info-am:
install-data-am:
install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am:
install-html: install-html-recursive
install-html-am:
install-info: install-info-recursive
install-info-am:
install-man:
install-pdf: install-pdf-recursive
install-pdf-am:
install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am:
.MAKE: $(am__recursive_targets) install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
check-am clean clean-generic clean-libtool cscopelist-am ctags \
ctags-am distclean distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
installdirs-am maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
ps ps-am tags tags-am uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python
#coding=UTF-8
# Copyright © 2011 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Authors:
# Chen, Yangyang <yangyang.chen@intel.com>
# Han, Haofu <haofu.han@intel.com>
#
import sys
class Block:
def __init__(self, ln=0, s=None):
assert type(ln) == int
assert type(s) == str or s == None
self.lineno = ln
self.text = s
self.subblocks = []
def append(self, block):
self.subblocks.append(block)
def checkfor(self, line):
import re
p = r'\$\s*for\s*'
if re.match(p, line) == None:
raise Exception(self.__errmsg('syntax error'))
tail = line.split('(', 1)[1].rsplit(')', 1)
conds = tail[0].split(';')
lb = tail[1]
if lb.strip() != '{':
raise Exception(self.__errmsg('missing "{"'))
if len(conds) != 3:
raise Exception(self.__errmsg('syntax error(miss ";"?)'))
init = conds[0]
cond = conds[1]
step = conds[2]
self.__parse_init(init)
self.__parse_cond(cond)
self.__parse_step(step)
def __parse_init(self, init):
inits = init.split(',')
self.param_init = []
for ini in inits:
try:
val = eval(ini)
self.param_init.append(val)
except:
raise Exception(self.__errmsg('non an exp: %s'%ini))
self.param_num = len(inits)
def __parse_cond(self, cond):
cond = cond.strip()
if cond[0] in ['<', '>']:
if cond[1] == '=':
self.param_op = cond[:2]
limit = cond[2:]
else:
self.param_op = cond[0]
limit = cond[1:]
try:
self.param_limit = eval(limit)
except:
raise Exception(self.__errmsg('non an exp: %s'%limit))
else:
raise Exception(self.__errmsg('syntax error'))
def __parse_step(self, step):
steps = step.split(',')
if len(steps) != self.param_num:
raise Exception(self.__errmsg('params number no match'))
self.param_step = []
for st in steps:
try:
val = eval(st)
self.param_step.append(val)
except:
raise Exception(self.__errmsg('non an exp: %s'%st))
def __errmsg(self, msg=''):
return '%d: %s' % (self.lineno, msg)
def readlines(f):
lines = f.readlines()
buf = []
for line in lines:
if '\\n' in line:
tmp = line.split('\\n')
buf.extend(tmp)
else:
buf.append(line)
return buf
def parselines(lines):
root = Block(0)
stack = [root]
lineno = 0
for line in lines:
lineno += 1
line = line.strip()
if line.startswith('$'):
block = Block(lineno)
block.checkfor(line)
stack[-1].append(block)
stack.append(block)
elif line.startswith('}'):
stack.pop()
elif line and not line.startswith('#'):
stack[-1].append(Block(lineno, line))
return root
def writeblocks(outfile, blocks):
buf = []
def check_cond(op, cur, lim):
assert op in ['<', '>', '<=', '>=']
assert type(cur) == int
assert type(lim) == int
return eval('%d %s %d' % (cur, op, lim))
def do_writeblock(block, curs):
if block.text != None:
import re
p = r'\%(\d+)'
newline = block.text
params = set(re.findall(p, block.text))
for param in params:
index = int(param) - 1
if index >= len(curs):
raise Exception('%d: too many param(%%%d)'%(block.lineno, index+1))
newline = newline.replace('%%%d'%(index+1), str(curs[index]))
if newline and \
not newline.startswith('.') and \
not newline.endswith(':') and \
not newline.endswith(';'):
newline += ';'
buf.append(newline)
else:
for_curs = block.param_init
while check_cond(block.param_op, for_curs[0], block.param_limit):
for sblock in block.subblocks:
do_writeblock(sblock, for_curs)
for i in range(0, block.param_num):
for_curs[i] += block.param_step[i]
for block in blocks.subblocks:
do_writeblock(block, [])
outfile.write('\n'.join(buf))
outfile.write('\n')
if __name__ == '__main__':
argc = len(sys.argv)
if argc == 1:
print >>sys.stderr, 'no input file'
sys.exit(0)
try:
infile = open(sys.argv[1], 'r')
except IOError:
print >>sys.stderr, 'can not open %s' % sys.argv[1]
sys.exit(1)
if argc == 2:
outfile = sys.stdout
else:
try:
outfile = open(sys.argv[2], 'w')
except IOError:
print >>sys.stderr, 'can not write to %s' % sys.argv[2]
sys.exit(1)
lines = readlines(infile)
try:
infile.close()
except IOError:
pass
blocks = parselines(lines)
writeblocks(outfile, blocks)

View File

@@ -0,0 +1,4 @@
SUBDIRS = ildb mc
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in

View File

@@ -0,0 +1,631 @@
# Makefile.in generated by automake 1.14.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = src/shaders/h264
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/src/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
SOURCES =
DIST_SOURCES =
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
install-exec-recursive install-html-recursive \
install-info-recursive install-pdf-recursive \
install-ps-recursive install-recursive installcheck-recursive \
installdirs-recursive pdf-recursive ps-recursive \
tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
am__recursive_targets = \
$(RECURSIVE_TARGETS) \
$(RECURSIVE_CLEAN_TARGETS) \
$(am__extra_recursive_targets)
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
distdir
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DIST_SUBDIRS = $(SUBDIRS)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
sed_rest='s,^[^/]*/*,,'; \
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
sed_butlast='s,/*[^/]*$$,,'; \
while test -n "$$dir1"; do \
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
if test "$$first" != "."; then \
if test "$$first" = ".."; then \
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
else \
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
if test "$$first2" = "$$first"; then \
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
else \
dir2="../$$dir2"; \
fi; \
dir0="$$dir0"/"$$first"; \
fi; \
fi; \
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
done; \
reldir="$$dir2"
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DRM_CFLAGS = @DRM_CFLAGS@
DRM_LIBS = @DRM_LIBS@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGL_CFLAGS = @EGL_CFLAGS@
EGL_LIBS = @EGL_LIBS@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GEN4ASM = @GEN4ASM@
GEN4ASM_CFLAGS = @GEN4ASM_CFLAGS@
GEN4ASM_LIBS = @GEN4ASM_LIBS@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INTEL_DRIVER_LT_LDFLAGS = @INTEL_DRIVER_LT_LDFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBDRM_VERSION = @LIBDRM_VERSION@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIBVA_DEPS_CFLAGS = @LIBVA_DEPS_CFLAGS@
LIBVA_DEPS_LIBS = @LIBVA_DEPS_LIBS@
LIBVA_DRIVERS_PATH = @LIBVA_DRIVERS_PATH@
LIBVA_DRM_DEPS_CFLAGS = @LIBVA_DRM_DEPS_CFLAGS@
LIBVA_DRM_DEPS_LIBS = @LIBVA_DRM_DEPS_LIBS@
LIBVA_PACKAGE_VERSION = @LIBVA_PACKAGE_VERSION@
LIBVA_WAYLAND_DEPS_CFLAGS = @LIBVA_WAYLAND_DEPS_CFLAGS@
LIBVA_WAYLAND_DEPS_LIBS = @LIBVA_WAYLAND_DEPS_LIBS@
LIBVA_X11_DEPS_CFLAGS = @LIBVA_X11_DEPS_CFLAGS@
LIBVA_X11_DEPS_LIBS = @LIBVA_X11_DEPS_LIBS@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
WAYLAND_LIBS = @WAYLAND_LIBS@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
wayland_protocoldir = @wayland_protocoldir@
wayland_scanner = @wayland_scanner@
SUBDIRS = ildb mc
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in
all: all-recursive
.SUFFIXES:
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/shaders/h264/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --foreign src/shaders/h264/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
# To change the values of 'make' variables: instead of editing Makefiles,
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(am__recursive_targets):
@fail=; \
if $(am__make_keepgoing); then \
failcom='fail=yes'; \
else \
failcom='exit 1'; \
fi; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-recursive
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
$(am__make_dryrun) \
|| test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
dir1=$$subdir; dir2="$(top_distdir)"; \
$(am__relativize); \
new_top_distdir=$$reldir; \
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
($(am__cd) $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$new_top_distdir" \
distdir="$$new_distdir" \
am__remove_distdir=: \
am__skip_length_check=: \
am__skip_mode_fix=: \
distdir) \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-recursive
all-am: Makefile
installdirs: installdirs-recursive
installdirs-am:
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
clean: clean-recursive
clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-recursive
-rm -f Makefile
distclean-am: clean-am distclean-generic distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
html-am:
info: info-recursive
info-am:
install-data-am:
install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am:
install-html: install-html-recursive
install-html-am:
install-info: install-info-recursive
install-info-am:
install-man:
install-pdf: install-pdf-recursive
install-pdf-am:
install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am:
.MAKE: $(am__recursive_targets) install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
check-am clean clean-generic clean-libtool cscopelist-am ctags \
ctags-am distclean distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
installdirs-am maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
ps ps-am tags tags-am uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@@ -0,0 +1,718 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
#if !defined(__AVC_ILDB_HEADER__) // Make sure this file is only included once
#define __AVC_ILDB_HEADER__
// Module name: AVC_ILDB.inc
#undef ORIX
#undef ORIY
//========== Root thread input parameters ==================================================
#define RootParam r1 // :w
#define MBsCntX r1.0 // :w, MB count per row
#define MBsCntY r1.1 // :w, MB count per col
//#define PicType r1.2 // :w, Picture type
#define MaxThreads r1.3 // :w, Max Thread limit
#define EntrySignature r1.4 // :w, Debug flag
#define BitFields r1.5 // :uw
#define MbaffFlag BIT0 // :w, mbaff flag, bit 0 in BitFields
#define BotFieldFlag BIT1 // :w, bottom field flag, bit 1 in BitFields
#define CntlDataExpFlag BIT2 // :w, Control Data Expansion Flag, bit 2 in BitFields
#define RampConst r1.12 // 8 :ub, Ramp constant, r1.12 - r1.19:ub
#define StepToNextMB r1.20 // :b, 2 bytes
#define Minus2Minus1 r1.22 // :b, 2 bytes
// next one starts at r1.11:w
#define TopFieldFlag 0xFFFD // :w, top field flag, used to set bit1 to 0.
//========== Root Locals =============================================================
// Variables in root kernel for launching child therad
#define ChildParam r2.0 // :w
//Not used #define URBOffset r2.3 // :w, Each row occupies 4 URB entries. All children in the same row use the same set of URB entries
#define CurCol r2.10 // :w, current col
#define CurColB r2.20 // :b, current col
#define CurRow r2.11 // :w, current row
#define CurRowB r2.22 // :b, current row
#define LastCol r2.12 // :w, last col
#define LastRow r2.13 // :w, last row
// Root local constants during spawning process
#define Col_Boundary r3.0 // :w,
#define Row_Boundary r3.1 // :w,
//#define TotalBlocks r3.2 // :w, Total blocks in the frame
#define URB_EntriesPerMB_2 r3.3 // :w, = URB entries per MB, but in differnt form
#define URBOffsetUVBase r3.4 // :w, UV Base offset in URB
#define Temp1_D r3.6 // :d:
#define Temp1_W r3.12 // :w, Temp1
#define Temp1_B r3.24 // :b, = Temp1_W
#define Temp2_W r3.13 // :w, Temp2
#define Temp2_B r3.26 // :b, = Temp2_W
// Root local variables
#define JumpTable r4 // :d, jump table
#define JUMPTABLE_BASE 4*32
#define JumpAddr a0.7
#define TopRowForScan r5.0 // :w, track the top row for scan. All rows above this row is deblocked already.
// Child Thread R0 Header Field
#define MRF0 m0
#define CT_R0Hdr m1
/*
.declare GatewayAperture Base=r50.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
#define GatewayApertureB 1600 // r50 byte offset from r0.0
// Chroma root thread updates luma root's ThreadLimit at r10.0:w via gateway
#define ThreadLimit r62.0 // :w, thread limit //r56.0
#define THREAD_LIMIT_OFFSET 0x01800000 // Offset from r50 to r56 = 12*32 = 384 = 0x0180. 0x180 << 16 = 0x01800000
//#define THREAD_LIMIT_OFFSET 0x00C00000 // Offset from r50 to r56 = 6*32 = 192 = 0x00C0. 0xC0 << 16 = 0x00C00000
*/
// Gateway size is 16 GRF. 68 rows of MBs takes 9 GRFs (r6 - r14)
// For CTG: Expended to support 1280 rows of pixel (80 rows of MBs). It requires 10 GRFs (r6 - r15)
.declare GatewayAperture Base=r6.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
#define GatewayApertureB 192 // r0.0 byte offset from r0.0
// Chroma root thread updates luma root's ThreadLimit at r10.0:w via gateway
#define ThreadLimit r18.0 // :w, thread limit
#define THREAD_LIMIT_OFFSET 0x01800000 // Offset from r50 to r56 = 12*32 = 384 = 0x0180. 0x180 << 16 = 0x01800000
#define TotalBlocks r18.1 // :w, Total blocks in the frame
// Root local variables
#define ChildThreadsID r19.0 // :w, Child thread ID, unique to each child
#define OutstandingThreads r20.0 // :w, Outstanding threads
#define ProcessedMBs r20.1 // :w, # of MBs processed
#define URBOffset r21.0 // :w, Each row occupies 4 URB entries. All children in the same row use the same set of URB entries
//=================================================================================
#define ScoreBd_Size 128 //96 // size of Status[] or ProcCol[]
#define ScoreBd_Idx 2
//#define Saved_Col 0
#define StatusAddr a0.4 // :w, point to r50
//=================================================================================
// Gateway payload
#define GatewayPayload r48.0 // :ud
#define GatewayPayloadKey r48.8 // :uw
#define DispatchID r48.20 // :ub
#define RegBase_GatewaySize r48.5 // :ud, used in open a gateway
#define Offset_Length r48.5 // :ud, used in forwardmsg back to root
#define EUID_TID r48.9 // :uw, used in forwardmsg back to root
// Gateway response
#define GatewayResponse r49.0 // :ud, one GRF
#define URBWriteMsgDesc a0.0 // Used in URB write, :ud
#define URBWriteMsgDescLow a0.0 // Used in URB write, :uw
#define URBWriteMsgDescHigh a0.1 // Used in URB write, :uw
.declare WritebackResponse Base=r50 ElementSize=4 SrcRegion=REGION(8,1) Type=ud // 1 GRF for write backs
/////////////////////////////////////////////////////////////////////////////////////////////
// IDesc Order Offset
//
// 0) luma root 0 from luma root
// 1) luma child 16 from luma root
// 2) chroma root 32 from luma root
// 3) chroma child 16 from chroma root
// 4) luma field root 0 from luma field root
// 5) luma field child 16 from luma field root
// 6) chroma field root 32 from luma field root
// 7) chroma field child 16 from chroma field root
// 8) luma Mbaff root 0 from luma Mbaff root
// 9) luma Mbaff child 16 from luma Mbaff root
// 10) chroma Mbaff root 32 from luma Mbaff root
// 11) chroma Mbaff child 16 from chroma Mbaff root
// IDesc offset within non-mbaff or mbaff mode
#define CHROMA_ROOT_OFFSET 32 // Offset from luma root to chroma root
#define CHILD_OFFSET 16 // Offset from luma root to luma child,
// and from chroma root to chroma child
/////////////////////////////////////////////////////////////////////////////////////////////
//========== End of Root Variables ======================================================
//========== Child thread input parameters ==============================================
//#define MBsCntX r1.0 // :w, MB count per row (same as root)
//#define MBsCntY r1.1 // :w, MB count per col (same as root)
//#define PicTypeC r1.2 // :w, Picture type same as root thread (same as root)
#define URBOffsetC r1.3 // :w,
#define EntrySignatureC r1.4 // :w, Debug field (same as root)
//#define BitFields r1.5 // :w (same as root)
//#define MbaffFlag BIT0 // :w, mbaff flag, bit 0 in BitFields
//#define BotFieldFlag BIT1 // :w, bottom field flag, bit 1 in BitFields
//#define CntlDataExpFlag BIT2 // :w, Control Data Expansion Flag, bit 2 in BitFields
#define RampConstC r1.12 // 8 :ub, Ramp constant, r1.12 - r1.19:ub.
#define ORIX r1.10 // :w, carry over from root r1 in MB count
#define ORIY r1.11 // :w, carry over from root r1 in MB count
#define LastColC r1.12 // :w, last col
#define LastRowC r1.13 // :w, last row
.declare GatewayApertureC Base=r1.0 ElementSize=4 SrcRegion=REGION(8,1) Type=ud
#define GatewayApertureCBase 32 // r1 byte offset from r0.0
//========== Child Variables ============================================================
// Mbaff Alpha, Beta, Tc0 vectors for an edge
.declare Mbaff_ALPHA Base=r14.0 ElementSize=2 SrcRegion=REGION(8,1) Type=uw // r14
.declare Mbaff_BETA Base=r15.0 ElementSize=2 SrcRegion=REGION(8,1) Type=uw // r15
.declare Mbaff_TC0 Base=r16.0 ElementSize=2 SrcRegion=REGION(8,1) Type=uw // r16
.declare RRampW Base=r17.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w // r17
.declare Mbaff_ALPHA2 Base=r45.0 ElementSize=2 SrcRegion=REGION(8,1) Type=uw // alpha2 = (alpha >> 2) + 2
#define ORIX_CUR r46.0 // :w, current block origin X in bytes
#define ORIY_CUR r46.1 // :w, current block origin Y in bytes
#define ORIX_LEFT r46.2 // :w, left block origin X in bytes
#define ORIY_LEFT r46.3 // :w, left block origin Y in bytes
#define ORIX_TOP r46.4 // :w, top block origin X in bytes
#define ORIY_TOP r46.5 // :w, top block origin Y in bytes
//#define FilterSampleFlag r46.6 // :uw,
#define CTemp0_W r46.7 // :w, child Temp0
#define alpha r46.8 // :w, Scaler version for non Mbaff
#define beta r46.9 // :w, Scaler version for non Mbaff
#define tc0 r46.20 // 4 :ub, r46.20 ~ r46.23, Scaler version for non Mbaff
#define MaskA r46.12 // :uw
#define MaskB r46.13 // :uw
// Child control flags
#define DualFieldMode r47.0 // Cur MB is frame based, above MB is field based in mbaff mode
// :uw, 0 = not in dual field mode, 1 = in dual field mode, filter both top and bot fields
#define GateWayOffsetC r47.1 // :w, Gateway offset for child writing into root space
#define CntrlDataOffsetY r47.1 // :ud, MB control data data offset
#define alpha2 r47.4 // :uw, alpha2 = (alpha >> 2) + 2
#define VertEdgePattern r47.5 // :uw,
#define CTemp1_W r47.6 // :w, child Temp1
#define CTemp1_B r47.12 // :b, = child Temp1_W
#define CTemp2_W r47.7 // :w, child Temp2
#define CTemp2_B r47.14 // :b, = child Temp2_W
// Used in child
#define ECM_AddrReg a0.4 // Edge Control Map register
#define P_AddrReg a0.6 // point to P samples in left or top MB
#define Q_AddrReg a0.7 // point to Q samples in cur MB
.declare RTempD Base=r26.0 ElementSize=4 SrcRegion=REGION(8,1) Type=d // r26-27
.declare RTempB Base=r26.0 ElementSize=1 SrcRegion=REGION(8,4) Type=ub // r26-27
.declare RTempW Base=r26.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w // r26-27
#define LEFT_TEMP_D RTempD
#define LEFT_TEMP_B RTempB
#define LEFT_TEMP_W RTempW
.declare TempRow0 Base=r26.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare TempRow0B Base=r26.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
.declare TempRow1 Base=r27.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare TempRow1B Base=r27.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
.declare CUR_TEMP_D Base=r28.0 ElementSize=4 SrcRegion=REGION(8,1) Type=d // 8 GRFs
.declare CUR_TEMP_B Base=r28.0 ElementSize=1 SrcRegion=REGION(8,4) Type=ub
.declare CUR_TEMP_W Base=r28.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
#define FilterSampleFlag r28.0 // :uw,
.declare A Base=r28.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w
.declare BB Base=r29.0 ElementSize=2 SrcRegion=REGION(16,1) Type=w
.declare TempRow3 Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare TempRow3B Base=r30.0 ElementSize=1 SrcRegion=REGION(8,2) Type=ub
.declare tc0_exp Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare tc8 Base=r30.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare tc_exp Base=r31.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare tx_exp_8 Base=r31.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare q0_p0 Base=r32.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare ABS_q0_p0 Base=r33.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare ap Base=r34.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare aq Base=r35.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
// These buffers have the src data for each edge to be beblocked.
// They have modified pixels from previous edges.
//
// Y:
// +----+----+----+----+----+----+----+----+
// | p3 | p2 | P1 | p0 | q0 | q1 | q2 | q3 |
// +----+----+----+----+----+----+----+----+
//
// p3 = r[P_AddrReg, 0]<16;16,1>
// p2 = r[P_AddrReg, 16]<16;16,1>
// p1 = r[P_AddrReg, 32]<16;16,1>
// p0 = r[P_AddrReg, 48]<16;16,1>
// q0 = r[Q_AddrReg, 0]<16;16,1>
// q1 = r[Q_AddrReg, 16]<16;16,1>
// q2 = r[Q_AddrReg, 32]<16;16,1>
// q3 = r[Q_AddrReg, 48]<16;16,1>
.declare p0123_W Base=r36.0 ElementSize=2 SrcRegion=REGION(16,1) Type=uw // r36, r37
.declare q0123_W Base=r38.0 ElementSize=2 SrcRegion=REGION(16,1) Type=uw // r38, r39
.declare p3 Base=r36.0 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare p2 Base=r36.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare p1 Base=r37.0 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare p0 Base=r37.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare q0 Base=r38.0 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare q1 Base=r38.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare q2 Base=r39.0 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare q3 Base=r39.16 ElementSize=1 SrcRegion=REGION(8,1) Type=ub
.declare TempRow2 Base=r38.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
// Temp space for mbaff dual field mode
#define ABOVE_CUR_MB_BASE 40*GRFWIB // Byte offset to r40
.declare ABOVE_CUR_MB_YW Base=r40 ElementSize=2 SrcRegion=REGION(8,1) Type=uw
.declare ABOVE_CUR_MB_UW Base=r40 ElementSize=2 SrcRegion=REGION(8,1) Type=uw
.declare P0_plus_P1 Base=r41.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare Q0_plus_Q1 Base=r42.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare P2_plus_P3 Base=r43.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
.declare Q2_plus_Q3 Base=r44.0 ElementSize=2 SrcRegion=REGION(8,1) Type=w
//////////////////////////////////////////////////////////////////////////////////////////
// MB control data reference
// Expanded control data is in r18 - r25
.declare CNTRL_DATA_D Base=r18 ElementSize=4 SrcRegion=REGION(8,1) Type=ud // For read, 8 GRFs
#define CNTRL_DATA_BASE 18*GRFWIB // Base offset to r18
// Bit mask for extracting bits
#define MbaffFrameFlag 0x01
#define FieldModeCurrentMbFlag 0x02
#define FieldModeLeftMbFlag 0x04
#define FieldModeAboveMbFlag 0x08
#define FilterInternal8x8EdgesFlag 0x10
#define FilterInternal4x4EdgesFlag 0x20
#define FilterLeftMbEdgeFlag 0x40
#define FilterTopMbEdgeFlag 0x80
#define DISABLE_ILDB_FLAG 0x01
// Exact bit pattern for left and cur MB coding mode (frame vs. field)
#define LEFT_FRAME_CUR_FRAME 0x00
#define LEFT_FRAME_CUR_FIELD 0x02
#define LEFT_FIELD_CUR_FRAME 0x04
#define LEFT_FIELD_CUR_FIELD 0x06
// Exact bit pattern for above and cur MB coding mode (frame vs. field)
#define ABOVE_FRAME_CUR_FRAME 0x00
#define ABOVE_FRAME_CUR_FIELD 0x02
#define ABOVE_FIELD_CUR_FRAME 0x08
#define ABOVE_FIELD_CUR_FIELD 0x0A
//========== MB control data field offset in byte ==========
#if !defined(_APPLE)
// GRF0 - GRF1 holds original control data
// GRF0
#define HorizOrigin 0
#define VertOrigin 1
#define BitFlags 2 // Bit flags
#define bbSinternalLeftVert 4 // Internal left vertical bS, 2 bits per bS for 4 Y pixels and 2 U/V pixels
#define bbSinternalMidVert 5 // Internal mid vertical bS
#define bbSinternalRightVert 6 // Internal right vertical bS
#define bbSinternalTopHorz 7 // Internal top horizontal bS
#define bbSinternalMidHorz 8 // Internal mid horizontal bS
#define bbSinternalBotHorz 9 // Internal bottom horizontal bS
#define wbSLeft0 10 // External left vertical bS (0), 4 bits per bS for 4 Y pixels and 2 U/V pixels, and byte 11
#define wbSLeft1 12 // External left vertical bS (1), and byte 13
#define wbSTop0 14 // External top horizontal bS (0), and byte 15
#define wbSTop1 16 // Externaltop horizontal bS (1), and byte 17
#define bIndexAinternal_Y 18 // Internal index A for Y
#define bIndexBinternal_Y 19 // Internal index B for Y
#define bIndexAleft0_Y 20 // Left index A for Y (0)
#define bIndexBleft0_Y 21 // Left index B for Y (0)
#define bIndexAleft1_Y 22 // Left index A for Y (1)
#define bIndexBleft1_Y 23 // Left index B for Y (1)
#define bIndexAtop0_Y 24 // Top index A for Y (0)
#define bIndexBtop0_Y 25 // Top index B for Y (0)
#define bIndexAtop1_Y 26 // Top index A for Y (1)
#define bIndexBtop1_Y 27 // Top index B for Y (1)
#define bIndexAinternal_Cb 28 // Internal index A for Cb
#define bIndexBinternal_Cb 29 // Internal index B for Cb
#define bIndexAleft0_Cb 30 // Left index A for Cb (0)
#define bIndexBleft0_Cb 31 // Left index B for Cb (0)
// GRF1
#define bIndexAleft1_Cb 32 // Left index A for Cb (1)
#define bIndexBleft1_Cb 33 // Left index B for Cb (1)
#define bIndexAtop0_Cb 34 // Top index A for Cb (0)
#define bIndexBtop0_Cb 35 // Top index B for Cb (0)
#define bIndexAtop1_Cb 36 // Top index A for Cb (1)
#define bIndexBtop1_Cb 37 // Top index B for Cb (1)
#define bIndexAinternal_Cr 38 // Internal index A for Cr
#define bIndexBinternal_Cr 39 // Internal index B for Cr
#define bIndexAleft0_Cr 40 // Left index A for Cr (0)
#define bIndexBleft0_Cr 41 // Left index B for Cr (0)
#define bIndexAleft1_Cr 42 // Left index A for Cr (1)
#define bIndexBleft1_Cr 43 // Left index B for Cr (1)
#define bIndexAtop0_Cr 44 // Top index A for Cr (0)
#define bIndexBtop0_Cr 45 // Top index B for Cr (0)
#define bIndexAtop1_Cr 46 // Top index A for Cr (1)
#define bIndexBtop1_Cr 47 // Top index B for Cr (1)
#define ExtBitFlags 48 // Extended bit flags, such as disable ILDB bits
// Offset 49 - 63 not used
//===== GRF2 - GRF7 hold expanded control data =====
// GRF2
#define wEdgeCntlMap_IntLeftVert 64 // Derived from bbSinternalLeftVert, 1 bit per pixel
#define wEdgeCntlMap_IntMidVert 66 // Derived from bbSinternalLeftVert
#define wEdgeCntlMap_IntRightVert 68 // Derived from bbSinternalRightVert
#define wEdgeCntlMap_IntTopHorz 70 // Derived from bbSinternalTopHorz, 1bit per pixel
#define wEdgeCntlMap_IntMidHorz 72 // Derived from bbSinternalMidHorz
#define wEdgeCntlMap_IntBotHorz 74 // Derived from bbSinternalBotHorz
// Offset 76 - 79 not used
#define wEdgeCntlMapA_ExtLeftVert0 80 // Derived from wbSLeft0, 1bit per pixel
#define wEdgeCntlMapB_ExtLeftVert0 82 // Derived from wbSLeft0
#define wEdgeCntlMapA_ExtTopHorz0 84 // Derived from wbSTop0, 1bit per pixel
#define wEdgeCntlMapB_ExtTopHorz0 86 // Derived from wbSTop0
#define wEdgeCntlMapA_ExtLeftVert1 88 // Derived from wbSLeft1, 1bit per pixel
#define wEdgeCntlMapB_ExtLeftVert1 90 // Derived from wbSLeft1
#define wEdgeCntlMapA_ExtTopHorz1 92 // Derived from wbSTop1, 1bit per pixel
#define wEdgeCntlMapB_ExtTopHorz1 94 // Derived from wbSTop1
// GRF3
#define bTc0_v00_0_Y 96 // Derived from bSv00_0 and bIndexAleft0_Y, 4 pixels per tc0
#define bTc0_v10_0_Y 97 // Derived from bSv10_0 and bIndexAleft0_Y
#define bTc0_v20_0_Y 98 // Derived from bSv20_0 and bIndexAleft0_Y
#define bTc0_v30_0_Y 99 // Derived from bSv30_0 and bIndexAleft0_Y
#define bTc0_v01_Y 100 // Derived from bSv01 and bIndexAinternal_Y
#define bTc0_v11_Y 101 // Derived from bSv11 and bIndexAinternal_Y
#define bTc0_v21_Y 102 // Derived from bSv21 and bIndexAinternal_Y
#define bTc0_v31_Y 103 // Derived from bSv31 and bIndexAinternal_Y
#define bTc0_v02_Y 104 // Derived from bSv02 and bIndexAinternal_Y
#define bTc0_v12_Y 105 // Derived from bSv12 and bIndexAinternal_Y
#define bTc0_v22_Y 106 // Derived from bSv22 and bIndexAinternal_Y
#define bTc0_v32_Y 107 // Derived from bSv32 and bIndexAinternal_Y
#define bTc0_v03_Y 108 // Derived from bSv03 and bIndexAinternal_Y
#define bTc0_v13_Y 109 // Derived from bSv13 and bIndexAinternal_Y
#define bTc0_v23_Y 110 // Derived from bSv23 and bIndexAinternal_Y
#define bTc0_v33_Y 111 // Derived from bSv33 and bIndexAinternal_Y
#define bTc0_h00_0_Y 112 // Derived from bSh00_0 and bIndexAleft0_Y
#define bTc0_h01_0_Y 113 // Derived from bSh01_0 and bIndexAleft0_Y
#define bTc0_h02_0_Y 114 // Derived from bSh02_0 and bIndexAleft0_Y
#define bTc0_h03_0_Y 115 // Derived from bSh03_0 and bIndexAleft0_Y
#define bTc0_h10_Y 116 // Derived from bSh10 and bIndexAinternal_Y
#define bTc0_h11_Y 117 // Derived from bSh11 and bIndexAinternal_Y
#define bTc0_h12_Y 118 // Derived from bSh12 and bIndexAinternal_Y
#define bTc0_h13_Y 119 // Derived from bSh13 and bIndexAinternal_Y
#define bTc0_h20_Y 120 // Derived from bSh20 and bIndexAinternal_Y
#define bTc0_h21_Y 121 // Derived from bSh21 and bIndexAinternal_Y
#define bTc0_h22_Y 122 // Derived from bSh22 and bIndexAinternal_Y
#define bTc0_h23_Y 123 // Derived from bSh23 and bIndexAinternal_Y
#define bTc0_h30_Y 124 // Derived from bSh30 and bIndexAinternal_Y
#define bTc0_h31_Y 125 // Derived from bSh31 and bIndexAinternal_Y
#define bTc0_h32_Y 126 // Derived from bSh32 and bIndexAinternal_Y
#define bTc0_h33_Y 127 // Derived from bSh33 and bIndexAinternal_Y
// GRF4
#define bAlphaLeft0_Y 128 // Derived from bIndexAleft0_Y
#define bBetaLeft0_Y 129 // Derived from bIndexBleft0_Y
#define bAlphaTop0_Y 130 // Derived from bIndexAtop0_Y
#define bBetaTop0_Y 131 // Derived from bIndexBtop0_Y
#define bAlphaInternal_Y 132 // Derived from bIndexAinternal_Y
#define bBetaInternal_Y 133 // Derived from bIndexBinternal_Y
// Offset 134 - 135 not used
// Offset 136 - 143 not used
#define bAlphaLeft1_Y 144 // Derived from bIndexAleft1_Y Used in Mbaff mode only
#define bBetaLeft1_Y 145 // Derived from bIndexBleft1_Y Used in Mbaff mode only
#define bAlphaTop1_Y 146 // Derived from bIndexAtop1_Y Used in Mbaff mode only
#define bBetaTop1_Y 147 // Derived from bIndexBtop1_Y Used in Mbaff mode only
// Offset 148 - 151 not used
#define bTc0_v00_1_Y 152 // Derived from bSv00_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_v10_1_Y 153 // Derived from bSv10_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_v20_1_Y 154 // Derived from bSv20_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_v30_1_Y 155 // Derived from bSv30_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_h00_1_Y 156 // Derived from bSh00_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_h01_1_Y 157 // Derived from bSh01_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_h02_1_Y 158 // Derived from bSh02_1 and bIndexAleft1_Y Used in Mbaff mode only
#define bTc0_h03_1_Y 159 // Derived from bSh03_1 and bIndexAleft1_Y Used in Mbaff mode only
// GRF5
#define bTc0_v00_0_Cb 160 // Derived from bSv00_0 and bIndexAleft0_Cb, 2 pixels per tc0 Left0
#define bTc0_v10_0_Cb 161 // Derived from bSv10_0 and bIndexAleft0_Cb
#define bTc0_v20_0_Cb 162 // Derived from bSv20_0 and bIndexAleft0_Cb
#define bTc0_v30_0_Cb 163 // Derived from bSv30_0 and bIndexAleft0_Cb
#define bTc0_v02_Cb 164 // Derived from bSv02 and bIndexAinternal_Cb MidVert
#define bTc0_v12_Cb 165 // Derived from bSv12 and bIndexAinternal_Cb
#define bTc0_v22_Cb 166 // Derived from bSv22 and bIndexAinternal_Cb
#define bTc0_v32_Cb 167 // Derived from bSv32 and bIndexAinternal_Cb
#define bTc0_h00_0_Cb 168 // Derived from bSh00_0 and bIndexAleft0_Cb Top0
#define bTc0_h01_0_Cb 169 // Derived from bSh01_0 and bIndexAleft0_Cb
#define bTc0_h02_0_Cb 170 // Derived from bSh02_0 and bIndexAleft0_Cb
#define bTc0_h03_0_Cb 171 // Derived from bSh03_0 and bIndexAleft0_Cb
#define bTc0_h20_Cb 172 // Derived from bSh20 and bIndexAinternal_Cb MidHorz
#define bTc0_h21_Cb 173 // Derived from bSh21 and bIndexAinternal_Cb
#define bTc0_h22_Cb 174 // Derived from bSh22 and bIndexAinternal_Cb
#define bTc0_h23_Cb 175 // Derived from bSh23 and bIndexAinternal_Cb
#define bTc0_v00_0_Cr 176 // Derived from bSv00_0 and bIndexAleft0_Cr, 2 pixels per tc0 Left0
#define bTc0_v10_0_Cr 177 // Derived from bSv10_0 and bIndexAleft0_Cr
#define bTc0_v20_0_Cr 178 // Derived from bSv20_0 and bIndexAleft0_Cr
#define bTc0_v30_0_Cr 179 // Derived from bSv30_0 and bIndexAleft0_Cr
#define bTc0_v02_Cr 180 // Derived from bSv02 and bIndexAinternal_Cr Mid Vert
#define bTc0_v12_Cr 181 // Derived from bSv12 and bIndexAinternal_Cr
#define bTc0_v22_Cr 182 // Derived from bSv22 and bIndexAinternal_Cr
#define bTc0_v32_Cr 183 // Derived from bSv32 and bIndexAinternal_Cr
#define bTc0_h00_0_Cr 184 // Derived from bSh00_0 and bIndexAleft0_Cr, 2 pixels per tc0 Top0
#define bTc0_h01_0_Cr 185 // Derived from bSh01_0 and bIndexAleft0_Cr
#define bTc0_h02_0_Cr 186 // Derived from bSh02_0 and bIndexAleft0_Cr
#define bTc0_h03_0_Cr 187 // Derived from bSh03_0 and bIndexAleft0_Cr
#define bTc0_h20_Cr 188 // Derived from bSh20 and bIndexAinternal_Cr Mid Horz
#define bTc0_h21_Cr 189 // Derived from bSh21 and bIndexAinternal_Cr
#define bTc0_h22_Cr 190 // Derived from bSh22 and bIndexAinternal_Cr
#define bTc0_h23_Cr 191 // Derived from bSh23 and bIndexAinternal_Cr
// GRF6
#define bAlphaLeft0_Cb 192 // Derived from bIndexAleft0_Cb
#define bBetaLeft0_Cb 193 // Derived from bIndexBleft0_Cb
#define bAlphaTop0_Cb 194 // Derived from bIndexAtop0_Cb
#define bBetaTop0_Cb 195 // Derived from bIndexBtop0_Cb
#define bAlphaInternal_Cb 196 // Derived from bIndexAinternal_Cb
#define bBetaInternal_Cb 197 // Derived from bIndexBinternal_Cb
// Offset 198 - 199 not used
#define bAlphaLeft0_Cr 200 // Derived from bIndexAleft0_Cr
#define bBetaLeft0_Cr 201 // Derived from bIndexBleft0_Cr
#define bAlphaTop0_Cr 202 // Derived from bIndexAtop0_Cr
#define bBetaTop0_Cr 203 // Derived from bIndexBtop0_Cr
#define bAlphaInternal_Cr 204 // Derived from bIndexAinternal_Cr
#define bBetaInternal_Cr 205 // Derived from bIndexBinternal_Cr
// Offset 206 - 223 not used
// GRF7
#define bAlphaLeft1_Cb 224 // Derived from bIndexAleft1_Cb Used in Mbaff mode only
#define bBetaLeft1_Cb 225 // Derived from bIndexBleft1_Cb Used in Mbaff mode only
#define bAlphaTop1_Cb 226 // Derived from bIndexAtop1_Cb Used in Mbaff mode only
#define bBetaTop1_Cb 227 // Derived from bIndexBtop1_Cb Used in Mbaff mode only
// Offset 228 - 231 not used
#define bTc0_v00_1_Cb 232 // Derived from bSv00_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_v10_1_Cb 233 // Derived from bSv10_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_v20_1_Cb 234 // Derived from bSv20_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_v30_1_Cb 235 // Derived from bSv30_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_h00_1_Cb 236 // Derived from bSh00_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_h01_1_Cb 237 // Derived from bSh01_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_h02_1_Cb 238 // Derived from bSh02_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bTc0_h03_1_Cb 239 // Derived from bSh03_1 and bIndexAleft1_Cb Used in Mbaff mode only
#define bAlphaLeft1_Cr 240 // Derived from bIndexAleft1_Cr Used in Mbaff mode only
#define bBetaLeft1_Cr 241 // Derived from bIndexBleft1_Cr Used in Mbaff mode only
#define bAlphaTop1_Cr 242 // Derived from bIndexAtop1_Cr Used in Mbaff mode only
#define bBetaTop1_Cr 243 // Derived from bIndexBtop1_Cr Used in Mbaff mode only
// Offset 244 - 247 not used
#define bTc0_v00_1_Cr 248 // Derived from bSv00_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_v10_1_Cr 249 // Derived from bSv10_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_v20_1_Cr 250 // Derived from bSv20_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_v30_1_Cr 251 // Derived from bSv30_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_h00_1_Cr 252 // Derived from bSh00_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_h01_1_Cr 253 // Derived from bSh01_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_h02_1_Cr 254 // Derived from bSh02_1 and bIndexAleft1_Cr Used in Mbaff mode only
#define bTc0_h03_1_Cr 255 // Derived from bSh03_1 and bIndexAleft1_Cr Used in Mbaff mode only
#else // _APPLE is defined
//******** Crestline for Apple, progressive only, 88 bytes **********
// GRF0
#define HorizOrigin 0
#define VertOrigin 1
#define BitFlags 2 // Bit flags
#define wEdgeCntlMap_IntLeftVert 4 // Derived from bbSinternalLeftVert, 1 bit per pixel
#define wEdgeCntlMap_IntMidVert 6 // Derived from bbSinternalLeftVert
#define wEdgeCntlMap_IntRightVert 8 // Derived from bbSinternalRightVert
#define wEdgeCntlMap_IntTopHorz 10 // Derived from bbSinternalTopHorz, 1bit per pixel
#define wEdgeCntlMap_IntMidHorz 12 // Derived from bbSinternalMidHorz
#define wEdgeCntlMap_IntBotHorz 14 // Derived from bbSinternalBotHorz
#define wEdgeCntlMapA_ExtLeftVert0 16 // Derived from wbSLeft0, 1bit per pixel
#define wEdgeCntlMapB_ExtLeftVert0 18 // Derived from wbSLeft0
#define wEdgeCntlMapA_ExtTopHorz0 20 // Derived from wbSTop0, 1bit per pixel
#define wEdgeCntlMapB_ExtTopHorz0 22 // Derived from wbSTop0
#define bAlphaLeft0_Y 24 // Derived from bIndexAleft0_Y
#define bBetaLeft0_Y 25 // Derived from bIndexBleft0_Y
#define bAlphaTop0_Y 26 // Derived from bIndexAtop0_Y
#define bBetaTop0_Y 27 // Derived from bIndexBtop0_Y
#define bAlphaInternal_Y 28 // Derived from bIndexAinternal_Y
#define bBetaInternal_Y 29 // Derived from bIndexBinternal_Y
// GRF1
#define bTc0_v00_0_Y 32 // Derived from bSv00_0 and bIndexAleft0_Y, 4 pixels per tc0
#define bTc0_v10_0_Y 33 // Derived from bSv10_0 and bIndexAleft0_Y
#define bTc0_v20_0_Y 34 // Derived from bSv20_0 and bIndexAleft0_Y
#define bTc0_v30_0_Y 35 // Derived from bSv30_0 and bIndexAleft0_Y
#define bTc0_v01_Y 36 // Derived from bSv01 and bIndexAinternal_Y
#define bTc0_v11_Y 37 // Derived from bSv11 and bIndexAinternal_Y
#define bTc0_v21_Y 38 // Derived from bSv21 and bIndexAinternal_Y
#define bTc0_v31_Y 39 // Derived from bSv31 and bIndexAinternal_Y
#define bTc0_v02_Y 40 // Derived from bSv02 and bIndexAinternal_Y
#define bTc0_v12_Y 41 // Derived from bSv12 and bIndexAinternal_Y
#define bTc0_v22_Y 42 // Derived from bSv22 and bIndexAinternal_Y
#define bTc0_v32_Y 43 // Derived from bSv32 and bIndexAinternal_Y
#define bTc0_v03_Y 44 // Derived from bSv03 and bIndexAinternal_Y
#define bTc0_v13_Y 45 // Derived from bSv13 and bIndexAinternal_Y
#define bTc0_v23_Y 46 // Derived from bSv23 and bIndexAinternal_Y
#define bTc0_v33_Y 47 // Derived from bSv33 and bIndexAinternal_Y
#define bTc0_h00_0_Y 48 // Derived from bSh00_0 and bIndexAleft0_Y
#define bTc0_h01_0_Y 49 // Derived from bSh01_0 and bIndexAleft0_Y
#define bTc0_h02_0_Y 50 // Derived from bSh02_0 and bIndexAleft0_Y
#define bTc0_h03_0_Y 51 // Derived from bSh03_0 and bIndexAleft0_Y
#define bTc0_h10_Y 52 // Derived from bSh10 and bIndexAinternal_Y
#define bTc0_h11_Y 53 // Derived from bSh11 and bIndexAinternal_Y
#define bTc0_h12_Y 54 // Derived from bSh12 and bIndexAinternal_Y
#define bTc0_h13_Y 55 // Derived from bSh13 and bIndexAinternal_Y
#define bTc0_h20_Y 56 // Derived from bSh20 and bIndexAinternal_Y
#define bTc0_h21_Y 57 // Derived from bSh21 and bIndexAinternal_Y
#define bTc0_h22_Y 58 // Derived from bSh22 and bIndexAinternal_Y
#define bTc0_h23_Y 59 // Derived from bSh23 and bIndexAinternal_Y
#define bTc0_h30_Y 60 // Derived from bSh30 and bIndexAinternal_Y
#define bTc0_h31_Y 61 // Derived from bSh31 and bIndexAinternal_Y
#define bTc0_h32_Y 62 // Derived from bSh32 and bIndexAinternal_Y
#define bTc0_h33_Y 63 // Derived from bSh33 and bIndexAinternal_Y
// GRF2,
#define bTc0_v00_0_Cb 64 // Derived from bSv00_0 and bIndexAleft0_Cb, 2 pixels per tc0 Left0
#define bTc0_v10_0_Cb 65 // Derived from bSv10_0 and bIndexAleft0_Cb
#define bTc0_v20_0_Cb 66 // Derived from bSv20_0 and bIndexAleft0_Cb
#define bTc0_v30_0_Cb 67 // Derived from bSv30_0 and bIndexAleft0_Cb
#define bTc0_v02_Cb 68 // Derived from bSv02 and bIndexAinternal_Cb MidVert
#define bTc0_v12_Cb 69 // Derived from bSv12 and bIndexAinternal_Cb
#define bTc0_v22_Cb 70 // Derived from bSv22 and bIndexAinternal_Cb
#define bTc0_v32_Cb 71 // Derived from bSv32 and bIndexAinternal_Cb
#define bTc0_h00_0_Cb 72 // Derived from bSh00_0 and bIndexAleft0_Cb Top0
#define bTc0_h01_0_Cb 73 // Derived from bSh01_0 and bIndexAleft0_Cb
#define bTc0_h02_0_Cb 74 // Derived from bSh02_0 and bIndexAleft0_Cb
#define bTc0_h03_0_Cb 75 // Derived from bSh03_0 and bIndexAleft0_Cb
#define bTc0_h20_Cb 76 // Derived from bSh20 and bIndexAinternal_Cb MidHorz
#define bTc0_h21_Cb 77 // Derived from bSh21 and bIndexAinternal_Cb
#define bTc0_h22_Cb 78 // Derived from bSh22 and bIndexAinternal_Cb
#define bTc0_h23_Cb 79 // Derived from bSh23 and bIndexAinternal_Cb
#define bAlphaLeft0_Cb 80 // Derived from bIndexAleft0_Cb
#define bBetaLeft0_Cb 81 // Derived from bIndexBleft0_Cb
#define bAlphaTop0_Cb 82 // Derived from bIndexAtop0_Cb
#define bBetaTop0_Cb 83 // Derived from bIndexBtop0_Cb
#define bAlphaInternal_Cb 84 // Derived from bIndexAinternal_Cb
#define bBetaInternal_Cb 85 // Derived from bIndexBinternal_Cb
#define ExtBitFlags 86 // Extended bit flags, such as disable ILDB bits
// Shared between Cb and Cr
#define bTc0_v00_0_Cr bTc0_v00_0_Cb
#define bTc0_v10_0_Cr bTc0_v10_0_Cb
#define bTc0_v20_0_Cr bTc0_v20_0_Cb
#define bTc0_v30_0_Cr bTc0_v30_0_Cb
#define bTc0_v02_Cr bTc0_v02_Cb
#define bTc0_v12_Cr bTc0_v12_Cb
#define bTc0_v22_Cr bTc0_v22_Cb
#define bTc0_v32_Cr bTc0_v32_Cb
#define bTc0_h00_0_Cr bTc0_h00_0_Cb
#define bTc0_h01_0_Cr bTc0_h01_0_Cb
#define bTc0_h02_0_Cr bTc0_h02_0_Cb
#define bTc0_h03_0_Cr bTc0_h03_0_Cb
#define bTc0_h20_Cr bTc0_h20_Cb
#define bTc0_h21_Cr bTc0_h21_Cb
#define bTc0_h22_Cr bTc0_h22_Cb
#define bTc0_h23_Cr bTc0_h23_Cb
#define bAlphaLeft0_Cr bAlphaLeft0_Cb
#define bBetaLeft0_Cr bBetaLeft0_Cb
#define bAlphaTop0_Cr bAlphaTop0_Cb
#define bBetaTop0_Cr bBetaTop0_Cb
#define bAlphaInternal_Cr bAlphaInternal_Cb
#define bBetaInternal_Cr bBetaInternal_Cb
#endif
//========== End of Child Variables ===============================================================
#if !defined(COMBINED_KERNEL)
#define ILDB_LABEL(x) x // No symbol extension for standalone kernels
#endif
#endif // !defined(__AVC_ILDB_HEADER__)

View File

@@ -0,0 +1,9 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
#include "AVC_ILDB_Child_UV.asm"

View File

@@ -0,0 +1,9 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
#include "AVC_ILDB_Child_Y.asm"

View File

@@ -0,0 +1,173 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)
//
// First de-block vertical edges from left to right.
// Second de-block horizontal edge from top to bottom.
//
// For 4:2:0, chroma is always de-blocked at 8x8.
// NV12 format allows to filter U and V together.
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_MBAFF_UV
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_UV):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xE997:w
#endif
// Setup temp buf used by load and save code
#define BUF_B RTempB
#define BUF_W RTempW
#define BUF_D RTempD
// Init local variables
mul (4) ORIX_CUR<2>:w ORIX<0;1,0>:w 16:w { NoDDClr } // Expand X addr to bytes, repeat 4 times
mul (4) ORIY_CUR<2>:w ORIY<0;1,0>:w 32:w { NoDDChk } // Expand Y addr to bytes, repeat 4 times
mov (2) f0.0<1>:w 0:w
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
//=== Null Kernel ===============================================================
// jmpi ILDB_LABEL(POST_ILDB_UV)
//===============================================================================
//====================================================================================
// Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.
// Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16
// Each MB has 256 bytes of control data
// For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (256 << Mbaff_flag), Mbaff_flag = 0 or 1.
// Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes
// where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)
// MBCntrlDataOffsetY holds y'.
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (64 << Mbaff_flag), Mbaff_flag = 0 or 1.
// MBCntrlDataOffsetY holds globel byte offset.
#if !defined(DEV_CL)
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 128:uw
#endif
//====================================================================================
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
//=========== Process Top MB ============
and (1) BitFields:w BitFields:w TopFieldFlag:w // Reset BotFieldFlag
// Build a ramp from 0 to 15
mov (16) RRampW(0)<1> RampConstC<0;8,1>:ub
add (8) RRampW(0,8)<1> RRampW(0,8) 8:w // RRampW = ramp 15-0
ILDB_LABEL(RE_ENTRY_UV): // for bootom field
// Load current MB control data
#if defined(DEV_CL)
#include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#else
#include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init ECM_AddrReg
// Use free cycles here
// Check loaded control data
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
// Set DualFieldMode for all data read, write and deblocking
and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
// Get Vert Edge Pattern (frame vs. field MBs)
and (1) VertEdgePattern:uw r[ECM_AddrReg, BitFlags]:ub FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
(f0.1.all16h) jmpi ILDB_LABEL(SKIP_ILDB_UV) // Skip ILDB
(f0.0) jmpi ILDB_LABEL(SKIP_ILDB_UV) // Skip ILDB
// Set DualFieldMode for all data read, write and deblocking
// and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
cmp.z.f0.0 (1) null:w CTemp1_W:uw ABOVE_FIELD_CUR_FRAME:w
and (1) DualFieldMode:w f0.0:w 0x0001:w
#include "load_Cur_UV_8x8T_Mbaff.asm" // Load transposed data 8x8
#include "load_Left_UV_2x8T_Mbaff.asm" // Load left MB (2x8) UV data from memory if exists
#include "Transpose_Cur_UV_8x8.asm"
#include "Transpose_Left_UV_2x8.asm"
//---------- Perform vertical ILDB filting on UV ----------
#include "AVC_ILDB_Filter_Mbaff_UV_v.asm"
//---------------------------------------------------------
#include "save_Left_UV_8x2T_Mbaff.asm" // Write left MB (2x8) Y data to memory if exists
#include "load_Top_UV_8x2_Mbaff.asm" // Load top MB (8x2) Y data from memory if exists
#include "Transpose_Cur_UV_8x8.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on UV ----------
#include "AVC_ILDB_Filter_Mbaff_UV_h.asm"
//-----------------------------------------------------------
#include "save_Cur_UV_8x8_Mbaff.asm" // Write 8x8
#include "save_Top_UV_8x2_Mbaff.asm" // Write top MB (8x2) if not the top row
//-----------------------------------------------------------
ILDB_LABEL(SKIP_ILDB_UV):
and.z.f0.0 (1) null:w BitFields:w BotFieldFlag:w
//=========== Process Bottom MB ============
or (1) BitFields:w BitFields:w BotFieldFlag:w // Set BotFieldFlag to 1
(f0.0) jmpi ILDB_LABEL(RE_ENTRY_UV) // Loop back for bottom deblocking
// Fall through to finish
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
ILDB_LABEL(POST_ILDB_UV):
// Send notification thru Gateway to root thread, update chroma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
#include "AVC_ILDB_Chroma_Core_Mbaff.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,188 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB Y comp)
//
// First, de-block vertical edges from left to right.
// Second, de-block horizontal edge from top to bottom.
//
// ***** MBAFF Mode *****
// This version deblocks top MB first, followed by bottom MB.
//
// Need variable CurMB to indicate top MB or bottom MB (CurMB = 0 or 1).
// We can use BotFieldFlag in BitFields to represent it.
//
// Usage:
// 1) Access control data for top
// CntrlDataOffsetY + CurMB * Control data block size (64 DWs for CL, 16 DWs for BLC)
//
// 2) Load frame/field video data based on flags: FieldModeCurrentMbFlag, FieldModeLeftMbFlag, FieldModeaboveMbFlag,
//
// E.g.
// if (pCntlData->BitField & FieldModeCurrentMbFlag)
// cur_y = ORIX_CUR.y + CurMB * 1; // Add field vertical offset for bot field MB .
// else
// cur_y = ORIX_CUR.y + CurMB * MB_Rows_Y; // Add bottom MB vertical offset for bot MB
//
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_MBAFF_Y
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_Y):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xE998:w
#endif
// Setup temp buf used by load and save code
#define BUF_B RTempB
#define BUF_D RTempD
// Init local variables
// These coordinates are in progressive fashion
mul (4) ORIX_CUR<2>:w ORIX<0;1,0>:w 16:w { NoDDClr } // Expand X addr to bytes, repeat 4 times
mul (4) ORIY_CUR<2>:w ORIY<0;1,0>:w 32:w { NoDDChk } // Expand Y addr to bytes, repeat 4 times
mov (2) f0.0<1>:w 0:w
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
//=== Null Kernel ===============================================================
// jmpi POST_ILDB
//===============================================================================
//====================================================================================
// Assuming the MB control data is laid out in scan line order in a rectangle with width = 16 bytes.
// Control data has dimension of X x Y = 16 x N bytes, where N = W x H / 16
// Each MB has 256 bytes of control data
// For CRESTLINE, 256 bytes are stored in memory and fetched into GRF.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (256 << Mbaff_flag), Mbaff_flag = 0 or 1.
// Base address of a control data block = (x, y) = (0, y'=y/x), region width is 16 bytes
// where y' = Byte_offset / 16 = MB_offset * (16 << Mbaff_flag)
// MBCntrlDataOffsetY holds y'.
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// Byte_offset = MB_offset * (64 << Mbaff_flag), Mbaff_flag = 0 or 1.
// MBCntrlDataOffsetY holds globel byte offset.
#if !defined(DEV_CL)
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 128:uw
#endif
//====================================================================================
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
//=========== Process Top MB ============
and (1) BitFields:w BitFields:w TopFieldFlag:w // Reset BotFieldFlag
RE_ENTRY: // for bootom field
// Load current MB control data
#if defined(DEV_CL)
#include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#else
#include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init edge control map AddrReg
// Check loaded control data
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
// Use free cycles here
// Set DualFieldMode for all data read, write and deblocking
and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
// Get Vert Edge Pattern (frame vs. field MBs)
and (1) VertEdgePattern:uw r[ECM_AddrReg, BitFlags]:ub FieldModeLeftMbFlag+FieldModeCurrentMbFlag:uw
(f0.1.all16h) jmpi SKIP_ILDB // Skip ILDB
(f0.0) jmpi SKIP_ILDB // Skip ILDB
// Set DualFieldMode for all data read, write and deblocking
// and (1) CTemp1_W:uw r[ECM_AddrReg, BitFlags]:ub FieldModeAboveMbFlag+FieldModeCurrentMbFlag:uw
cmp.z.f0.0 (1) null:w CTemp1_W:uw ABOVE_FIELD_CUR_FRAME:w
and (1) DualFieldMode:w f0.0:w 0x0001:w
// Load current MB // DDD1
#include "load_Cur_Y_16x16T_Mbaff.asm" // Load cur Y, 16x16, transpose
#include "load_Left_Y_4x16T_Mbaff.asm" // Load left MB (4x16) Y data from memory if exists
#include "Transpose_Cur_Y_16x16.asm"
#include "Transpose_Left_Y_4x16.asm"
//---------- Perform vertical ILDB filting on Y----------
#include "AVC_ILDB_Filter_Mbaff_Y_v.asm"
//-------------------------------------------------------
#include "save_Left_Y_16x4T_Mbaff.asm" // Write left MB (4x16) Y data to memory if exists
#include "load_Top_Y_16x4_Mbaff.asm" // Load top MB (16x4) Y data from memory if exists
#include "Transpose_Cur_Y_16x16.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on Y ----------
#include "AVC_ILDB_Filter_Mbaff_Y_h.asm"
//----------------------------------------------------------
#include "save_Cur_Y_16x16_Mbaff.asm" // Write cur MB (16x16)
#include "save_Top_Y_16x4_Mbaff.asm" // Write top MB (16x4) if not the top row
SKIP_ILDB:
//----------------------------------------------------------
and.z.f0.0 (1) null:w BitFields:w BotFieldFlag:w
//=========== Process Bottom MB ============
or (1) BitFields:w BitFields:w BotFieldFlag:w // Set BotFieldFlag to 1
(f0.0) jmpi RE_ENTRY // Loop back for bottom deblocking
// Fall through to finish
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
POST_ILDB:
//---------------------------------------------------------------------------
// Send notification thru Gateway to root thread, update luma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
#include "AVC_ILDB_Luma_Core_Mbaff.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,186 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB UV comp)
//
// First de-block vertical edges from left to right.
// Second de-block horizontal edge from top to bottom.
//
// For 4:2:0, chroma is always de-blocked at 8x8.
// NV12 format allows to filter U and V together.
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_UV
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_UV):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x9997:w
#endif
// Init local variables
shl (8) ORIX_CUR<1>:w ORIX<0;2,1>:w 4:w // Expand addr to bytes, repeat (x,y) 4 times
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init ECM_AddrReg
//=== Null Kernel ===============================================================
// jmpi ILDB_LABEL(POST_ILDB_UV_UV)
//===============================================================================
#if defined(DEV_CL)
mov (1) acc0.0:w 240:w
#else
//====================================================================================
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
// Assign to MSGSRC.2:ud for memory access
// mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 64:uw
mul (1) MSGSRC.2:ud CntrlDataOffsetY:ud 64:uw
mov (1) acc0.0:w 320:w
#endif
mac (1) URBOffsetC:w ORIY:w 4:w // UV URB entries are right after Y entries
// Init local variables
// shl (8) ORIX_CUR<1>:w ORIX<0;2,1>:w 4:w // Expand addr to bytes, repeat (x,y) 4 times
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
// Build a ramp from 0 to 15
mov (16) RRampW(0)<1> RampConstC<0;8,1>:ub
add (8) RRampW(0,8)<1> RRampW(0,8) 8:w // RRampW = ramp 15-0
// Load current MB control data
#if defined(DEV_CL)
#if defined(_APPLE)
#include "Load_ILDB_Cntrl_Data_22DW.asm" // Crestline for Apple, progressive only
#else
#include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#endif
#else
#include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Check loaded control data
#if defined(_APPLE)
and.z.f0.1 (8) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw 0xFFFF:uw // Skip ILDB?
(f0.1) and.z.f0.1 (2) null<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw 0xFFFF:uw // Skip ILDB?
#else
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
#endif
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
#if defined(_APPLE)
(f0.1.all8h) jmpi ILDB_LABEL(READ_FOR_URB_UV) // Skip ILDB
#else
(f0.1.all16h) jmpi ILDB_LABEL(READ_FOR_URB_UV) // Skip ILDB
#endif
(f0.0) jmpi ILDB_LABEL(READ_FOR_URB_UV) // Skip ILDB
#include "load_Cur_UV_8x8T.asm" // Load transposed data 8x8
// #include "load_Left_UV_2x8T.asm"
#include "load_Top_UV_8x2.asm" // Load top MB (8x2) Y data from memory if exists
#include "Transpose_Cur_UV_8x8.asm"
// #include "Transpose_Left_UV_2x8.asm"
//---------- Perform vertical ILDB filting on UV ----------
#include "AVC_ILDB_Filter_UV_v.asm"
//---------------------------------------------------------
#include "save_Left_UV_8x2T.asm" // Write left MB (2x8) Y data to memory if exists
#include "Transpose_Cur_UV_8x8.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on UV ----------
#include "AVC_ILDB_Filter_UV_h.asm"
//-----------------------------------------------------------
#include "save_Cur_UV_8x8.asm" // Write 8x8
#include "save_Top_UV_8x2.asm" // Write top MB (8x2) if not the top row
//---------- Write right most 4 columns of cur MB to URB ----------
// Transpose the right most 2 cols 2x8 (word) in GRF to 8x2 in BUF_D. It is 2 left most cols in cur MB.
#include "Transpose_Cur_UV_2x8.asm"
ILDB_LABEL(WRITE_URB_UV):
mov (8) m1<1>:ud LEFT_TEMP_D(1)<8;8,1> // Copy 1 GRF to 1 URB entry (U+V)
#include "writeURB_UV_Child.asm"
//-----------------------------------------------------------------
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
ILDB_LABEL(POST_ILDB_UV):
//---------------------------------
// Send notification thru Gateway to root thread, update chroma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
ILDB_LABEL(READ_FOR_URB_UV):
// Still need to prepare URB data for the right neighbor MB
#include "load_Cur_UV_Right_Most_2x8.asm" // Load cur MB ( right most 4x16) Y data from memory
#include "Transpose_Cur_UV_Right_Most_2x8.asm"
// jmpi ILDB_LABEL(WRITE_URB_UV)
mov (8) m1<1>:ud LEFT_TEMP_D(1)<8;8,1> // Copy 1 GRF to 1 URB entry (U+V)
#include "writeURB_UV_Child.asm"
//-----------------------------------------------------------------
// Send notification thru Gateway to root thread, update chroma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
// #include "AVC_ILDB_Luma_Core.asm"
#include "AVC_ILDB_Chroma_Core.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,176 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// AVC Child Kernel (Vertical and horizontal de-block a 4:2:0 MB Y comp)
//
// First, de-block vertical edges from left to right.
// Second, de-block horizontal edge from top to bottom.
//
// If transform_size_8x8_flag = 1, luma is de-blocked at 8x8. Otherwise, luma is de-blocked at 4x4.
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define AVC_ILDB
.kernel AVC_ILDB_CHILD_Y
#if defined(COMBINED_KERNEL)
ILDB_LABEL(AVC_ILDB_CHILD_Y):
#endif
#include "SetupVPKernel.asm"
#include "AVC_ILDB.inc"
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x9998:w
#endif
// Init local variables
shl (8) ORIX_CUR<1>:w ORIX<0;2,1>:w 4:w // Expand addr to bytes, repeat (x,y) 4 times
// Init addr register for vertical control data
mov (1) ECM_AddrReg<1>:w CNTRL_DATA_BASE:w // Init edge control map AddrReg
//=== Null Kernel ===============================================================
// jmpi ILDB_LABEL(POST_ILDB_Y)
//===============================================================================
mul (1) URBOffsetC:uw ORIY:uw 4:w
#if !defined(DEV_CL)
//====================================================================================
// For BearLake-C, 64 bytes are stored in memory and dataport expands to 256 bytes. Need to use a special read command on BL-C.
// MB_offset = MBsCntX * CurRow + CurCol
// MBCntrlDataOffsetY = globel_byte_offset = MB_offset * 64
mul (1) CntrlDataOffsetY:ud MBsCntX:w ORIY:w
add (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud ORIX:w
// Assign to MSGSRC.2:ud for memory access
// mul (1) CntrlDataOffsetY:ud CntrlDataOffsetY:ud 64:uw
mul (1) MSGSRC.2:ud CntrlDataOffsetY:ud 64:uw
#endif
// Load current MB control data
#if defined(DEV_CL)
#if defined(_APPLE)
#include "Load_ILDB_Cntrl_Data_22DW.asm" // Crestline for Apple, progressive only
#else
#include "Load_ILDB_Cntrl_Data_64DW.asm" // Crestline
#endif
#else
#include "Load_ILDB_Cntrl_Data_16DW.asm" // Cantiga and beyond
#endif
// Check loaded control data
#if defined(_APPLE)
and.z.f0.1 (8) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<8;8,1>:uw 0xFFFF:uw // Skip ILDB?
(f0.1) and.z.f0.1 (2) null<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw 0xFFFF:uw // Skip ILDB?
#else
and.z.f0.1 (16) null<1>:uw r[ECM_AddrReg, wEdgeCntlMap_IntLeftVert]<16;16,1>:uw 0xFFFF:uw // Skip ILDB?
#endif
and.nz.f0.0 (1) null:w r[ECM_AddrReg, ExtBitFlags]:ub DISABLE_ILDB_FLAG:w // Skip ILDB?
// Use free cycles here
add (1) ORIX_LEFT:w ORIX_LEFT:w -4:w
// add (1) ORIY_TOP:w ORIY_TOP:w -4:w
mov (1) GateWayOffsetC:uw ORIY:uw // Use row # as Gateway offset
#if defined(_APPLE)
(f0.1.all8h) jmpi ILDB_LABEL(READ_FOR_URB_Y) // Skip ILDB
#else
(f0.1.all16h) jmpi ILDB_LABEL(READ_FOR_URB_Y) // Skip ILDB
#endif
(f0.0) jmpi ILDB_LABEL(READ_FOR_URB_Y) // Skip ILDB
add (1) ORIY_TOP:w ORIY_TOP:w -4:w
// Bettr performance is observed if boundary MBs are not checked and skipped.
#include "load_Cur_Y_16x16T.asm" // Load cur MB Y, 16x16, transpose
// #include "load_Left_Y_4x16T.asm" // Load left MB (4x16) Y data from memory
#include "load_Top_Y_16x4.asm" // Load top MB (16x4) Y data from memory
#include "Transpose_Cur_Y_16x16.asm"
// #include "Transpose_Left_Y_4x16.asm"
//---------- Perform vertical ILDB filting on Y ---------
#include "AVC_ILDB_Filter_Y_v.asm"
//-------------------------------------------------------
#include "save_Left_Y_16x4T.asm" // Write left MB (4x16) Y data to memory
#include "Transpose_Cur_Y_16x16.asm" // Transpose a MB for horizontal edge de-blocking
//---------- Perform horizontal ILDB filting on Y -------
#include "AVC_ILDB_Filter_Y_h.asm"
//-------------------------------------------------------
#include "save_Cur_Y_16x16.asm" // Write cur MB (16x16)
#include "save_Top_Y_16x4.asm" // Write top MB (16x4)
//---------- Write right most 4 columns of cur MB to URB ----------
// Transpose the right most 4 cols 4x16 in GRF to 16x4 in LEFT_TEMP_B. It is 4 left most cols in cur MB.
#include "Transpose_Cur_Y_4x16.asm"
ILDB_LABEL(WRITE_URB_Y):
// Note: LEFT_TEMP_B(2) = TOP_TEMP_B(0), TOP_TEMP_B must be avail
mov (16) m1<1>:ud LEFT_TEMP_D(2)<8;8,1> // Copy 2 GRFs to 2 URB entries (Y)
#include "writeURB_Y_Child.asm"
//-----------------------------------------------------------------
//=========== Check write commit of the last write ============
mov (8) WritebackResponse(0)<1> WritebackResponse(0)
ILDB_LABEL(POST_ILDB_Y):
// Send notification thru Gateway to root thread, update luma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
ILDB_LABEL(READ_FOR_URB_Y):
// Still need to prepare URB data for the right neighbor MB
#include "load_Cur_Y_Right_Most_4x16.asm" // Load cur MB ( right most 4x16) Y data from memory
#include "Transpose_Cur_Y_Right_Most_4x16.asm"
// jmpi ILDB_LABEL(WRITE_URB_Y)
// Note: LEFT_TEMP_B(2) = TOP_TEMP_B(0), TOP_TEMP_B must be avail
mov (16) m1<1>:ud LEFT_TEMP_D(2)<8;8,1> // Copy 2 GRFs to 2 URB entries (Y)
#include "writeURB_Y_Child.asm"
//-----------------------------------------------------------------
// Send notification thru Gateway to root thread, update luma Status[CurRow]
#include "AVC_ILDB_ForwardMsg.asm"
#if !defined(GW_DCN) // For non-ILK chipsets
//child send EOT : Request type = 1
END_CHILD_THREAD
#endif // !defined(DEV_ILK)
// The thread finishs here
//------------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Include other subrutines being called
#include "AVC_ILDB_Luma_Core.asm"
// #include "AVC_ILDB_Chroma_Core.asm"
#if !defined(COMBINED_KERNEL) // For standalone kernel only
.end_code
.end_kernel
#endif

View File

@@ -0,0 +1,165 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
#if !defined(__AVC_ILDB_CHROMA_CORE__) // Make sure this file is only included once
#define __AVC_ILDB_CHROMA_CORE__
////////// AVC ILDB Chroma Core /////////////////////////////////////////////////////////////////////////////////
//
// This core performs AVC U or V ILDB filtering on one horizontal edge (8 pixels) of a MB.
// If data is transposed, it can also de-block a vertical edge.
//
// Bafore calling this subroutine, caller needs to set the following parameters.
//
// - EdgeCntlMap1 // Edge control map A
// - EdgeCntlMap2 // Edge control map B
// - P_AddrReg // Src and dest address register for P pixels
// - Q_AddrReg // Src and dest address register for Q pixels
// - alpha // alpha corresponding to the edge to be filtered
// - beta // beta corresponding to the edge to be filtered
// - tc0 // tc0 corresponding to the edge to be filtered
//
// U or V:
// +----+----+----+----+
// | P1 | p0 | q0 | q1 |
// +----+----+----+----+
//
// p1 = r[P_AddrReg, 0]<16;8,2>
// p0 = r[P_AddrReg, 16]<16;8,2>
// q0 = r[Q_AddrReg, 0]<16;8,2>
// q1 = r[Q_AddrReg, 16]<16;8,2>
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// The region is both src and dest
// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values
#undef P1
#undef P0
#undef Q0
#undef Q1
#define P1 r[P_AddrReg, 0]<16;8,2>:ub
#define P0 r[P_AddrReg, 16]<16;8,2>:ub
#define Q0 r[Q_AddrReg, 0]<16;8,2>:ub
#define Q1 r[Q_AddrReg, 16]<16;8,2>:ub
// New region as dest
#undef NewP0
#undef NewQ0
#define NewP0 r[P_AddrReg, 16]<2>:ub
#define NewQ0 r[Q_AddrReg, 0]<2>:ub
// Filter one chroma edge
FILTER_UV:
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x1112:w
#endif
//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
// bS is in MaskA
// Src copy of the p1, p0, q0, q1
// mov (8) p1(0)<1> r[P_AddrReg, 0]<16;8,2>:ub
// mov (8) p0(0)<1> r[P_AddrReg, 16]<16;8,2>:ub
// mov (8) q0(0)<1> r[Q_AddrReg, 0]<16;8,2>:ub
// mov (8) q1(0)<1> r[Q_AddrReg, 16]<16;8,2>:ub
// mov (1) f0.0:uw MaskA:uw
add (8) q0_p0(0)<1> Q0 -P0 // q0-p0
add (8) TempRow0(0)<1> P1 -P0 // p1-p0
add (8) TempRow1(0)<1> Q1 -Q0 // q1-q0
// Build FilterSampleFlag
// abs(q0-p0) < alpha
(f0.0) cmp.l.f0.0 (16) null:w (abs)q0_p0(0) alpha:w
// abs(p1-p0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow0(0) beta:w
// abs(q1-q0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow1(0) beta:w
//-----------------------------------------------------------------------------------------
// if
(f0.0) if (8) UV_ENDIF1
// For channels whose edge control map1 = 1 ---> perform de-blocking
// mov (1) f0.1:w MaskB:w {NoMask} // Now check for which algorithm to apply
(f0.1) if (8) UV_ELSE2
// For channels whose edge control map2 = 1 ---> bS = 4 algorithm
// p0' = (2*p1 + p0 + q1 + 2) >> 2
// q0' = (2*q1 + q0 + p1 + 2) >> 2
// Optimized version:
// A = (p1 + q1 + 2)
// p0' = (p0 + p1 + A) >> 2
// q0' = (q0 + q1 + A) >> 2
//------------------------------------------------------------------------------------
// p0' = (2*p1 + p0 + q1 + 2) >> 2
add (8) acc0<1>:w Q1 2:w
mac (8) acc0<1>:w P1 2:w
add (8) acc0<1>:w acc0<8;8,1>:w P0
shr.sat (8) TempRow0B(0)<2> acc0<8;8,1>:w 2:w
// q0' = (2*q1 + q0 + p1 + 2) >> 2
add (8) acc0<1>:w P1 2:w
mac (8) acc0<1>:w Q1 2:w
add (8) acc0<1>:w acc0<8;8,1>:w Q0
shr.sat (8) TempRow1B(0)<2> acc0<8;8,1>:w 2:w
mov (8) NewP0 TempRow0B(0) // p0'
mov (8) NewQ0 TempRow1B(0) // q0'
UV_ELSE2:
else (8) UV_ENDIF2
// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
// Expand tc0 (tc0 has 4 bytes)
// mov (8) tc0_exp(0)<1> tc0<1;2,0>:ub {NoMask} // tc0_exp = tc0, each tc0 is duplicated 2 times for 2 adjcent pixels
mov (8) acc0<1>:w tc0<1;2,0>:ub {NoMask} // tc0_exp = tc0, each tc0 is duplicated 2 times for 2 adjcent pixels
// tc_exp = tc0_exp + 1
// add (8) tc_exp(0)<1> tc0_exp(0) 1:w
add (8) tc_exp(0)<1> acc0<8;8,1>:w 1:w
// delta = Clip3(-tc, tc, ((((q0 - p0)<<2) + (p1-q1) + 4) >> 3))
// 4 * (q0-p0) + p1 - q1 + 4
add (8) acc0<1>:w P1 4:w
mac (8) acc0<1>:w q0_p0(0) 4:w
add (8) acc0<1>:w acc0<8;8,1>:w -Q1
shr (8) TempRow0(0)<1> acc0<8;8,1>:w 3:w
// tc clip
cmp.g.f0.0 (8) null:w TempRow0(0) tc_exp(0) // Clip if > tc0
cmp.l.f0.1 (8) null:w TempRow0(0) -tc_exp(0) // Clip if < -tc0
(f0.0) mov (8) TempRow0(0)<1> tc_exp(0)
(f0.1) mov (8) TempRow0(0)<1> -tc_exp(0)
// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
add.sat (8) TempRow1B(0)<2> P0 TempRow0(0) // p0+delta
// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
add.sat (8) TempRow0B(0)<2> Q0 -TempRow0(0) // q0-delta
mov (8) NewP0 TempRow1B(0) // p0'
mov (8) NewQ0 TempRow0B(0) // q0'
endif
UV_ENDIF2:
UV_ENDIF1:
endif
RETURN
#endif // !defined(__AVC_ILDB_CHROMA_CORE__)

View File

@@ -0,0 +1,146 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
////////// AVC ILDB Chroma Core Mbaff /////////////////////////////////////////////////////////////////////////////////
//
// This core performs AVC U or V ILDB filtering on one horizontal edge (8 pixels) of a MB.
// If data is transposed, it can also de-block a vertical edge.
//
// Bafore calling this subroutine, caller needs to set the following parameters.
//
// - EdgeCntlMap1 // Edge control map A
// - EdgeCntlMap2 // Edge control map B
// - P_AddrReg // Src and dest address register for P pixels
// - Q_AddrReg // Src and dest address register for Q pixels
// - alpha // alpha corresponding to the edge to be filtered
// - beta // beta corresponding to the edge to be filtered
// - tc0 // tc0 corresponding to the edge to be filtered
//
// U or V:
// +----+----+----+----+
// | P1 | p0 | q0 | q1 |
// +----+----+----+----+
//
// p1 = r[P_AddrReg, 0]<16;8,2>
// p0 = r[P_AddrReg, 16]<16;8,2>
// q0 = r[Q_AddrReg, 0]<16;8,2>
// q1 = r[Q_AddrReg, 16]<16;8,2>
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// The region is both src and dest
// P0-P3 and Q0-Q3 should be only used if they have not been modified to new values
#undef P1
#undef P0
#undef Q0
#undef Q1
#define P1 r[P_AddrReg, 0]<16;8,2>:ub
#define P0 r[P_AddrReg, 16]<16;8,2>:ub
#define Q0 r[Q_AddrReg, 0]<16;8,2>:ub
#define Q1 r[Q_AddrReg, 16]<16;8,2>:ub
// New region as dest
#undef NewP0
#undef NewQ0
#define NewP0 r[P_AddrReg, 16]<2>:ub
#define NewQ0 r[Q_AddrReg, 0]<2>:ub
// Filter one chroma edge - mbaff
FILTER_UV_MBAFF:
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0x1112:w
#endif
//---------- Derive filterSampleflag in AVC spec, equition (8-469) ----------
//===== Assume f0.0 contains MaskA when entering this routine
// mov (1) f0.0:uw MaskA:uw
add (8) q0_p0(0)<1> Q0 -P0 // q0-p0
add (8) TempRow0(0)<1> P1 -P0 // p1-p0
add (8) TempRow1(0)<1> Q1 -Q0 // q1-q0
// Build FilterSampleFlag
// abs(q0-p0) < alpha
(f0.0) cmp.l.f0.0 (16) null:w (abs)q0_p0(0) Mbaff_ALPHA(0)
// abs(p1-p0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow0(0) Mbaff_BETA(0)
// abs(q1-q0) < Beta
(f0.0) cmp.l.f0.0 (16) null:w (abs)TempRow1(0) Mbaff_BETA(0)
//-----------------------------------------------------------------------------------------
// if
(f0.0) if (8) MBAFF_UV_ENDIF1
// For channels whose edge control map1 = 1 ---> perform de-blocking
// mov (1) f0.1:w MaskB:w {NoMask} // Now check for which algorithm to apply
(f0.1) if (8) MBAFF_UV_ELSE2
// For channels whose edge control map2 = 1 ---> bS = 4 algorithm
// p0' = (2*p1 + P0 + q1 + 2) >> 2
// q0' = (2*q1 + q0 + p1 + 2) >> 2
//------------------------------------------------------------------------------------
// p0' = (2*p1 + p0 + q1 + 2) >> 2
add (8) acc0<1>:w Q1 2:w
mac (8) acc0<1>:w P1 2:w
add (8) acc0<1>:w acc0<8;8,1>:w P0
shr.sat (8) TempRow0B(0)<2> acc0<8;8,1>:w 2:w
// q0' = (2*q1 + q0 + p1 + 2) >> 2
add (8) acc0<1>:w P1 2:w
mac (8) acc0<1>:w Q1 2:w
add (8) acc0<1>:w acc0<8;8,1>:w Q0
shr.sat (8) TempRow1B(0)<2> acc0<8;8,1>:w 2:w
mov (8) NewP0 TempRow0B(0) // p0'
mov (8) NewQ0 TempRow1B(0) // q0'
MBAFF_UV_ELSE2:
else (8) MBAFF_UV_ENDIF2
// For channels whose edge control map2 = 0 ---> bS < 4 algorithm
// tc_exp = tc0_exp + 1
add (8) tc_exp(0)<1> Mbaff_TC0(0) 1:w
// delta = Clip3(-tc, tc, ((((q0 - p0)<<2) + (p1-q1) + 4) >> 3))
// 4 * (q0-p0) + p1 - q1 + 4
add (8) acc0<1>:w P1 4:w
mac (8) acc0<1>:w q0_p0(0) 4:w
add (8) acc0<1>:w acc0<8;8,1>:w -Q1
shr (8) TempRow0(0)<1> acc0<8;8,1>:w 3:w
// tc clip
cmp.g.f0.0 (8) null:w TempRow0(0) tc_exp(0) // Clip if > tc0
cmp.l.f0.1 (8) null:w TempRow0(0) -tc_exp(0) // Clip if < -tc0
(f0.0) mov (8) TempRow0(0)<1> tc_exp(0)
(f0.1) mov (8) TempRow0(0)<1> -tc_exp(0)
// p0' = Clip1(p0 + delta) = Clip3(0, 0xFF, p0 + delta)
add.sat (8) TempRow1B(0)<2> P0 TempRow0(0) // p0+delta
// q0' = Clip1(q0 - delta) = Clip3(0, 0xFF, q0 - delta)
add.sat (8) TempRow0B(0)<2> Q0 -TempRow0(0) // q0-delta
mov (8) NewP0 TempRow1B(0) // p0'
mov (8) NewQ0 TempRow0B(0) // q0'
endif
MBAFF_UV_ENDIF2:
MBAFF_UV_ENDIF1:
endif
RETURN

View File

@@ -0,0 +1,22 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//----- Close a Message Gateway -----
#if defined(_DEBUG)
mov (1) EntrySignature:b 0x4444:w
#endif
// Message descriptor
// bit 31 EOD
// 27:24 FFID = 0x0011 for msg gateway
// 23:20 msg length = 1 MRF
// 19:16 Response length = 0
// 1:0 SubFuncID = 01 for CloseGateway
// Message descriptor: 0 000 0011 0001 0000 + 0 0 000000000000 01 ==> 0000 0011 0001 0000 0000 0000 0000 0001
send (8) null:ud m7 r0.0<0;1,0>:ud MSG_GW CGWMSGDSC

View File

@@ -0,0 +1,186 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
//---------- Check dependency and spawn all MBs ----------
// Launch the 1st round of child threads for Vertical ILDB
#if defined(_DEBUG)
mov (1) EntrySignature:w 0x3333:w
#endif
//=====================================================================
// Jump Table 1
// 0 0 ---> Goto ALL_SPAWNED
// 0 1 ---> Goto ALL_SPAWNED
// 1 0 ---> Goto SLEEP_ENTRY
// 1 1 ---> Goto POST_SLEEP
mov (2) JumpTable.0<1>:d 0:d { NoDDClr }
#if defined(CHROMA_ROOT)
mov (1) JumpTable.2:d SLEEP_ENTRY_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d { NoDDClr, NoDDChk }
mov (1) JumpTable.3:d POST_SLEEP_UV_ILDB_FRAME_IP-ALL_SPAWNED_UV_ILDB_FRAME_IP:d { NoDDChk }
#else
mov (1) JumpTable.2:d SLEEP_ENTRY_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d { NoDDClr, NoDDChk }
mov (1) JumpTable.3:d POST_SLEEP_Y_ILDB_FRAME_IP-ALL_SPAWNED_Y_ILDB_FRAME_IP:d { NoDDChk }
#endif
//=====================================================================
mov (2) f0.0<1>:w 0:w
// Get m0 most of fields ready for URB write
mov (8) MRF0<1>:ud MSGSRC.0<8;8,1>:ud
// Add child kernel offset
add (1) CT_R0Hdr.2:ud r0.2:ud CHILD_OFFSET:w
// Init
mov (1) Col_Boundary:w 2:w
mov (1) Row_Boundary:w LastRow:w
mov (1) TopRowForScan:w 0:w
mov (2) OutstandingThreads<1>:w 0:w
// Init Scoreboard (idle = 0x00FF, busy = 0x0000)
// Low word is saved col. High word is busy/idle status
mov (16) GatewayAperture(0)<1> 0x00FF00FF:ud // Init r6-r7
mov (16) GatewayAperture(2)<1> 0x00FF00FF:ud // Init r8-r9
mov (16) GatewayAperture(4)<1> 0x00FF00FF:ud // Init r10-r11
mov (16) GatewayAperture(6)<1> 0x00FF00FF:ud // Init r12-r13
mov (16) GatewayAperture(8)<1> 0x00FF00FF:ud // Init r14-r15
mul (1) StatusAddr:w CurRow:w 4:w // dword to bytes offset conversion
//=====================================================================
//SPAWN_LOOP:
//===== OutstandingThreads < ThreadLimit ? ============================
cmp.l.f0.1 (1) null:w OutstandingThreads:w ThreadLimit:w // Check the thread limit
#if defined(CHROMA_ROOT)
(f0.1) jmpi ILDB_LABEL(POST_SLEEP_UV)
#else // LUMA_ROOT
(f0.1) jmpi ILDB_LABEL(POST_SLEEP_Y)
#endif
#if defined(CHROMA_ROOT)
ILDB_LABEL(SLEEP_ENTRY_UV):
#else // LUMA_ROOT
ILDB_LABEL(SLEEP_ENTRY_Y):
#endif
//===== Goto Sleep ====================================================
// Either reached max thread limit or no child thread can be spawned due to dependency.
add (1) OutstandingThreads:w OutstandingThreads:w -1:w // Do this before wait is faster
wait n0.0:d
#if defined(CHROMA_ROOT)
ILDB_LABEL(POST_SLEEP_UV):
#else // LUMA_ROOT
ILDB_LABEL(POST_SLEEP_Y):
#endif
//===== Luma Status[CurRow] == busy ? =====
cmp.z.f0.0 (1) null:uw r[StatusAddr, GatewayApertureB+ScoreBd_Idx]:uw 0:uw // Check west neighbor
cmp.g.f0.1 (1) null:w CurCol:w LastCol:w // Check if the curCol > LastCol
#if defined(CHROMA_ROOT)
mov (16) acc0.0<1>:w URBOffsetUVBase<0;1,0>:w // Add offset to UV base (MBsCntY * URB_EBTRIES_PER_MB)
mac (1) URBOffset:w CurRow:w 4:w // 4 entries per row
#else
mul (1) URBOffset:w CurRow:w 4:w // 4 entries per row
#endif
#if defined(CHROMA_ROOT)
(f0.0) jmpi ILDB_LABEL(SLEEP_ENTRY_UV) // Current row has a child thread running, can not spawn a new child thread, go back to sleep
(f0.1) jmpi ILDB_LABEL(NEXT_MB_UV) // skip MB if the curCol > LastCol
#else // LUMA_ROOT
(f0.0) jmpi ILDB_LABEL(SLEEP_ENTRY_Y) // Current row has a child thread running, can not spawn a new child thread, go back to sleep
(f0.1) jmpi ILDB_LABEL(NEXT_MB_Y) // skip MB if the curCol > LastCol
#endif
//========== Spwan a child thread ========================================
// Save cur col and set Status[CurRow] to busy
mov (2) r[StatusAddr, GatewayApertureB]<1>:uw CurColB<2;2,1>:ub // Store the new col
// Increase OutstandingThreads and ProcessedMBs by 1
add (2) OutstandingThreads<1>:w OutstandingThreads<2;2,1>:w 1:w
#include "AVC_ILDB_SpawnChild.asm"
//===== Find next MB ===================================================
#if defined(CHROMA_ROOT)
ILDB_LABEL(NEXT_MB_UV):
#else // LUMA_ROOT
ILDB_LABEL(NEXT_MB_Y):
#endif
// Check pic boundary, results are in f0.0 bit0 and bit1
cmp.ge.f0.0 (2) null<1>:w CurCol<2;2,1>:w Col_Boundary<2;2,1>:w
// Update TopRowForScan if the curCol = LastCol
(f0.1) add (1) TopRowForScan:w CurRow:w 1:w
// cmp.l.f0.1 (1) null<1>:w ProcessedMBs:w TotalBlocks:w // Processed all blocks ?
// 2 sets compare
// ProcessedMBs:w < TotalBlocks:w OutstandingThreads:w < ThreadLimit:wProcessedMBs:w
// 0 0 ---> Goto ALL_SPAWNED
// 0 1 ---> Goto ALL_SPAWNED
// 1 0 ---> Goto SLEEP_ENTRY
// 1 1 ---> Goto POST_SLEEP
cmp.l.f0.1 (2) null<1>:w OutstandingThreads<2;2,1>:w ThreadLimit<2;2,1>:w
// Just do it in stalled cycles
mov (1) acc0.0:w 4:w
mac (1) StatusAddr:w CurRow:w 4:w // dword to bytes offset conversion
add (2) CurCol<1>:w CurCol<2;2,1>:w StepToNextMB<2;2,1>:b // CurCol -= 2 and CurRow += 1
// Set f0.0 if turning around is needed, assuming bit 15 - 2 are zeros for correct comparison.
cmp.nz.f0.0 (1) null<1>:w f0.0:w 0x01:w
mul (1) JumpAddr:w f0.1:w 4:w // byte offet in dword count
// The next MB is at the row TopRowForScan
(f0.0) mul (1) StatusAddr:w TopRowForScan:w 4:w // dword to bytes offset conversion
(f0.0) mov (1) CurRow:w TopRowForScan:w { NoDDClr } // Restart from the top row that has MBs not deblocked yet.
(f0.0) add (1) CurCol:w r[StatusAddr, GatewayApertureB]:uw 1:w { NoDDChk }
//===== Processed all blocks ? =========================================
// (f0.1) jmpi SPAWN_LOOP
jmpi r[JumpAddr, JUMPTABLE_BASE]:d
//JUMP_BASE:
//======================================================================
// All MB are spawned at this point, check for outstanding thread count
#if defined(CHROMA_ROOT)
ILDB_LABEL(ALL_SPAWNED_UV):
#else // LUMA_ROOT
ILDB_LABEL(ALL_SPAWNED_Y):
#endif
cmp.e.f0.1 (1) null:w OutstandingThreads:w 0:w // Check before goto sleep
#if defined(CHROMA_ROOT)
(f0.1) jmpi ILDB_LABEL(ALL_DONE_UV)
#else // LUMA_ROOT
(f0.1) jmpi ILDB_LABEL(ALL_DONE_Y)
#endif
wait n0.0:d // Wake up by a finished child thread
add (1) OutstandingThreads:w OutstandingThreads:w -1:w
#if defined(CHROMA_ROOT)
// One thread is free and give it to luma thread limit --- Increase luma thread limit by one.
#include "AVC_ILDB_LumaThrdLimit.asm"
#endif
#if defined(CHROMA_ROOT)
jmpi ILDB_LABEL(ALL_SPAWNED_UV) // Waked up and goto dependency check
#else // LUMA_ROOT
jmpi ILDB_LABEL(ALL_SPAWNED_Y) // Waked up and goto dependency check
#endif
// All child threads are finsihed at this point
#if defined(CHROMA_ROOT)
ILDB_LABEL(ALL_DONE_UV):
#else // LUMA_ROOT
ILDB_LABEL(ALL_DONE_Y):
#endif

View File

@@ -0,0 +1,223 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
////////// AVC ILDB filter horizontal Mbaff UV ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of UV.
//
// It sssumes the data for horizontal de-blocking is already transposed.
//
// Chroma:
//
// +-------+-------+ H0 Edge
// | | |
// | | |
// | | |
// +-------+-------+ H1 Edge
// | | |
// | | |
// | | |
// +-------+-------+
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBC:w
#endif
//=============== Chroma deblocking ================
//---------- Deblock UV external top edge ----------
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterTopMbEdgeFlag:w // Check for FilterTopMbEdgeFlag
mov (1) f0.1:w DualFieldMode:w // Check for dual field mode
// Get Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<0;1,0>:uw RRampW(0)
shr (16) TempRow1(0)<1> r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz0]<0;1,0>:uw RRampW(0)
(f0.0) jmpi H0_UV_DONE // Skip H0 UV edge
(f0.1) jmpi DUAL_FIELD_UV
// Non dual field mode
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
// Ext U
// p1 = Prev MB U row 0
// p0 = Prev MB U row 1
// q0 = Cur MB U row 0
// q1 = Cur MB U row 1
mov (1) P_AddrReg:w PREV_MB_U_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_U_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Cb]<1;2,0>:ub
// Store UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
// Ext V
mov (1) P_AddrReg:w PREV_MB_V_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_V_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Cr]<1;2,0>:ub
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
jmpi H0_UV_DONE
DUAL_FIELD_UV:
// Dual field mode, FieldModeCurrentMbFlag=0 && FieldModeAboveMbFlag=1
//===== Ext U, Top field
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+32:w { NoDDChk }
mov (16) ABOVE_CUR_MB_UW(0)<1> PREV_MB_UW(0, 0)<16;8,1> // Copy p1, p0
mov (16) ABOVE_CUR_MB_UW(1)<1> SRC_UW(0, 0)<16;8,1> // Copy q1, q0
//===== Ext U, top field
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Cb]<1;2,0>:ub
// Store UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1) // Ext U, top field
//===== Ext V, top field
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE+1:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+33:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Cr]<1;2,0>:ub
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1) // Ext U, top field
// Prefetch for bottom field
// Get bot field Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz1]<0;1,0>:uw RRampW(0)
shr (16) TempRow1(0)<1> r[ECM_AddrReg, wEdgeCntlMapB_ExtTopHorz1]<0;1,0>:uw RRampW(0)
// Save deblocked top field rows
mov (8) PREV_MB_UW(1, 0)<1> ABOVE_CUR_MB_UW(0, 8) // Copy p0
mov (8) SRC_UW(0, 0)<1> ABOVE_CUR_MB_UW(1, 0) // Copy q0
//==========================================================================
//===== Ext U, Bot field
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+32:w { NoDDChk }
mov (16) ABOVE_CUR_MB_UW(0)<1> PREV_MB_UW(0, 8)<16;8,1> // Copy p1, p0
mov (16) ABOVE_CUR_MB_UW(1)<1> SRC_UW(0, 8)<16;8,1> // Copy q1, q0
//===== Ext U, bottom field
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop1_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop1_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_1_Cb]<1;2,0>:ub
// Store UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1) // Ext U, bottom field
//===== Ext V, bot field
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE+1:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+33:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop1_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop1_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_1_Cr]<1;2,0>:ub
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1) // Ext V, bottom field
// Save deblocked bot field rows
mov (8) PREV_MB_UW(1, 8)<1> ABOVE_CUR_MB_UW(0, 8) // Copy p0
mov (8) SRC_UW(0, 8)<1> ABOVE_CUR_MB_UW(1, 0) // Copy q0
//========================================
H0_UV_DONE:
//---------- Deblock U internal horz middle edge ----------
//***** Need to take every other bit to form U maskA in core
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]<0;1,0>:uw RRampW(0)
// p1 = Cur MB U row 2
// p0 = Cur MB U row 3
// q0 = Cur MB U row 4
// q1 = Cur MB U row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h20_Cb]<1;2,0>:ub
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
// Store UV MaskA and MaskB
mov (1) f0.1:uw 0:w
mov (1) MaskB:uw 0:w { NoDDClr }
mov (1) MaskA:uw f0.0:uw { NoDDChk }
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------
//---------- Deblock V internal horz middle edge ----------
// p1 = Cur MB V row 2
// p0 = Cur MB V row 3
// q0 = Cur MB V row 4
// q1 = Cur MB V row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h20_Cr]<1;2,0>:ub
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------

View File

@@ -0,0 +1,209 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
////////// AVC LDB filter vertical Mbaff UV ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all vertical edges of UV.
//
// It sssumes the data for vertical de-blocking is already transposed.
//
// Chroma:
//
// +-------+-------+
// | | |
// | | |
// | | |
// +-------+-------+
// | | |
// | | |
// | | |
// +-------+-------+
//
// V0 V1
// Edge Edge
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBC:w
#endif
//=============== Chroma deblocking ================
//---------- Deblock U external left edge ----------
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterLeftMbEdgeFlag:w // Check for FilterLeftMbEdgeFlag
cmp.z.f0.1 (1) null:w VertEdgePattern:uw LEFT_FIELD_CUR_FRAME:w
// Get Luma maskA and maskB
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMapA_ExtLeftVert0]<0;1,0>:uw RRampW(0)
shr (16) TempRow1(0)<1> r[ECM_AddrReg, wEdgeCntlMapB_ExtLeftVert0]<0;1,0>:uw RRampW(0)
(f0.0) jmpi BYPASS_V0_UV // Do not deblock Left ext edge
cmp.z.f0.0 (1) null:w VertEdgePattern:uw LEFT_FRAME_CUR_FIELD:w
(-f0.1) jmpi V0_U_NEXT1 // Jump if not LEFT_FIELD_CUR_FRAME
//----- For LEFT_FIELD_CUR_FRAME
// Extract UV MaskA and MaskB from every other 2 bits of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<4;2,1> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<4;2,1> 1:w
// For FieldModeLeftMbFlag=1 && FieldModeCurrentMbFlag=0
mov (4) Mbaff_ALPHA(0,0)<2> r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub { NoDDClr }
mov (4) Mbaff_ALPHA(0,1)<2> r[ECM_AddrReg, bAlphaLeft1_Cb]<0;1,0>:ub { NoDDChk }
mov (4) Mbaff_BETA(0,0)<2> r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub { NoDDClr }
mov (4) Mbaff_BETA(0,1)<2> r[ECM_AddrReg, bBetaLeft1_Cb]<0;1,0>:ub { NoDDChk }
mov (4) Mbaff_TC0(0,0)<2> r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub { NoDDClr }
mov (4) Mbaff_TC0(0,1)<2> r[ECM_AddrReg, bTc0_v00_1_Cb]<4;4,1>:ub { NoDDChk }
jmpi V0_U_NEXT3
V0_U_NEXT1:
(-f0.0) jmpi V0_U_NEXT2 // Jump if not LEFT_FRAME_CUR_FIELD
//----- For LEFT_FRAME_CUR_FIELD
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
// For FieldModeLeftMbFlag=0 && FieldModeCurrentMbFlag=1
mov (4) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub { NoDDClr }
mov (4) Mbaff_ALPHA(0,4)<1> r[ECM_AddrReg, bAlphaLeft1_Cb]<0;1,0>:ub { NoDDChk }
mov (4) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub { NoDDClr }
mov (4) Mbaff_BETA(0,4)<1> r[ECM_AddrReg, bBetaLeft1_Cb]<0;1,0>:ub { NoDDChk }
mov (4) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Cb]<4;4,1>:ub { NoDDClr }
mov (4) Mbaff_TC0(0,4)<1> r[ECM_AddrReg, bTc0_v00_1_Cb]<4;4,1>:ub { NoDDChk }
jmpi V0_U_NEXT3
V0_U_NEXT2:
// Extract UV MaskA and MaskB from every other bit of Y masks
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
and.nz.f0.1 (8) null:w TempRow1(0)<16;8,2> 1:w
// Both are frames or fields
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Cb]<1;2,0>:ub
V0_U_NEXT3:
// p1 = Prev MB U row 0
// p0 = Prev MB U row 1
// q0 = Cur MB U row 0
// q1 = Cur MB U row 1
mov (1) P_AddrReg:w PREV_MB_U_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_U_BASE:w { NoDDChk }
// Store UV MaskA and MaskB
mov (2) MaskA<1>:uw f0.0<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------
//---------- Deblock V external left edge ----------
// No change to MaskA and MaskB
cmp.z.f0.0 (4) null:w VertEdgePattern:uw LEFT_FIELD_CUR_FRAME:w
cmp.z.f0.1 (4) null:w VertEdgePattern:uw LEFT_FRAME_CUR_FIELD:w
// both are frame or field
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Cr]<1;2,0>:ub
// p1 = Prev MB V row 0
// p0 = Prev MB V row 1
// q0 = Cur MB V row 0
// q1 = Cur MB V row 1
mov (1) P_AddrReg:w PREV_MB_V_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_V_BASE:w { NoDDChk }
// For FieldModeLeftMbFlag=1 && FieldModeCurrentMbFlag=0
(f0.0) mov (4) Mbaff_ALPHA(0,0)<2> r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub { NoDDClr }
(f0.0) mov (4) Mbaff_ALPHA(0,1)<2> r[ECM_AddrReg, bAlphaLeft1_Cr]<0;1,0>:ub { NoDDChk }
(f0.0) mov (4) Mbaff_BETA(0,0)<2> r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub { NoDDClr }
(f0.0) mov (4) Mbaff_BETA(0,1)<2> r[ECM_AddrReg, bBetaLeft1_Cr]<0;1,0>:ub { NoDDChk }
(f0.0) mov (4) Mbaff_TC0(0,0)<2> r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub { NoDDClr }
(f0.0) mov (4) Mbaff_TC0(0,1)<2> r[ECM_AddrReg, bTc0_v00_1_Cr]<4;4,1>:ub { NoDDChk }
// For FieldModeLeftMbFlag=0 && FieldModeCurrentMbFlag=1
(f0.1) mov (4) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaLeft0_Cr]<0;1,0>:ub { NoDDClr }
(f0.1) mov (4) Mbaff_ALPHA(0,4)<1> r[ECM_AddrReg, bAlphaLeft1_Cr]<0;1,0>:ub { NoDDChk }
(f0.1) mov (4) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaLeft0_Cr]<0;1,0>:ub { NoDDClr }
(f0.1) mov (4) Mbaff_BETA(0,4)<1> r[ECM_AddrReg, bBetaLeft1_Cr]<0;1,0>:ub { NoDDChk }
(f0.1) mov (4) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v00_0_Cr]<4;4,1>:ub { NoDDClr }
(f0.1) mov (4) Mbaff_TC0(0,4)<1> r[ECM_AddrReg, bTc0_v00_1_Cr]<4;4,1>:ub { NoDDChk }
// Set UV MaskA and MaskB
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------
BYPASS_V0_UV:
// Set EdgeCntlMap2 = 0, so it always uses bS < 4 algorithm.
// Same alpha and beta for all internal vert and horiz edges
//---------- Deblock U internal vert middle edge ----------
//***** Need to take every other bit to form U or V maskA
shr (16) TempRow0(0)<1> r[ECM_AddrReg, wEdgeCntlMap_IntMidVert]<0;1,0>:uw RRampW(0)
// p1 = Cur MB U row 2
// p0 = Cur MB U row 3
// q0 = Cur MB U row 4
// q1 = Cur MB U row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDClr } // Skip 2 U rows and 2 V rows
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_U_BASE:w { NoDDChk }
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Cb]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Cb]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v02_Cb]<1;2,0>:ub
and.nz.f0.0 (8) null:w TempRow0(0)<16;8,2> 1:w
// Store MaskA and MaskB
mov (1) f0.1:uw 0:w
mov (1) MaskB:uw 0:w { NoDDClr }
mov (1) MaskA:uw f0.0:uw { NoDDChk }
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------
//---------- Deblock V internal vert middle edge ----------
// P1 = Cur MB V row 2
// P0 = Cur MB V row 3
// Q0 = Cur MB V row 4
// Q1 = Cur MB V row 5
mov (1) P_AddrReg:w 4*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDClr } // Skip 2 U rows and 2 V rows
mov (1) Q_AddrReg:w 8*UV_ROW_WIDTH+SRC_MB_V_BASE:w { NoDDChk }
// Put MaskA into f0.0
// Put MaskB into f0.1
mov (2) f0.0<1>:uw MaskA<2;2,1>:uw
mov (8) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Cr]<0;1,0>:ub
mov (8) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Cr]<0;1,0>:ub
mov (8) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_v02_Cr]<1;2,0>:ub
CALL(FILTER_UV_MBAFF, 1)
//-----------------------------------------------

View File

@@ -0,0 +1,234 @@
/*
* Copyright © <2010>, Intel Corporation.
*
* This program is licensed under the terms and conditions of the
* Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
* http://www.opensource.org/licenses/eclipse-1.0.php.
*
*/
////////// AVC ILDB filter horizontal Mbaff Y ///////////////////////////////////////////////////////
//
// This filter code prepares the src data and control data for ILDB filtering on all horizontal edges of Y.
//
// It sssumes the data for horizontal de-blocking is already transposed.
//
// Luma:
//
// +-------+-------+-------+-------+ H0 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+ H1 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+ H2 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+ H3 Edge
// | | | | |
// | | | | |
// | | | | |
// +-------+-------+-------+-------+
//
/////////////////////////////////////////////////////////////////////////////
#if defined(_DEBUG)
mov (1) EntrySignatureC:w 0xBBBB:w
#endif
//========== Luma deblocking ==========
//---------- Deblock Y external top edge (H0) ----------
// Bypass deblocking if it is the top edge of the picture.
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterTopMbEdgeFlag:w // Check for FilterTopMbEdgeFlag
mov (1) f0.1:w DualFieldMode:w // Check for dual field mode
// Non dual field mode
// Get (alpha >> 2) + 2
shr (16) Mbaff_ALPHA2(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Y]<0;1,0>:ub 2:w // alpha >> 2
mov (2) MaskA<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz0]<2;2,1>:uw
// Ext Y
mov (16) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop0_Y]<0;1,0>:ub
mov (16) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop0_Y]<0;1,0>:ub
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_0_Y]<1;4,0>:ub
add (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA2(0,0)<16;16,1> 2:w // alpha2 = (alpha >> 2) + 2
(f0.0) jmpi H0_Y_DONE // Skip Ext Y deblocking
(f0.1) jmpi DUAL_FIELD_Y
mov (1) P_AddrReg:w PREV_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w SRC_MB_Y_BASE:w { NoDDChk }
CALL(FILTER_Y_MBAFF, 1) // Non dual field deblocking
jmpi H0_Y_DONE
DUAL_FIELD_Y:
// Dual field mode, FieldModeCurrentMbFlag=0 && FieldModeAboveMbFlag=1
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+64:w { NoDDChk }
// Must use PREV_MB_YW. TOP_MB_YW is not big enough.
// Get top field rows
mov (16) ABOVE_CUR_MB_YW(0)<1> PREV_MB_YW(0, 0)<16;8,1> // Copy p3, p2
mov (16) ABOVE_CUR_MB_YW(1)<1> PREV_MB_YW(2, 0)<16;8,1> // Copy p1, p0
mov (16) ABOVE_CUR_MB_YW(2)<1> SRC_YW(0, 0)<16;8,1> // Copy q0, q1
mov (16) ABOVE_CUR_MB_YW(3)<1> SRC_YW(2, 0)<16;8,1> // Copy q2, q3
CALL(FILTER_Y_MBAFF, 1) // Ext Y, top field
// Save deblocked top field rows
mov (8) PREV_MB_YW(1, 0)<1> ABOVE_CUR_MB_YW(0, 8) // Copy p2
mov (8) PREV_MB_YW(2, 0)<1> ABOVE_CUR_MB_YW(1, 0) // Copy p1
mov (8) PREV_MB_YW(3, 0)<1> ABOVE_CUR_MB_YW(1, 8) // Copy p0
mov (8) SRC_YW(0, 0)<1> ABOVE_CUR_MB_YW(2, 0) // Copy q0
mov (8) SRC_YW(1, 0)<1> ABOVE_CUR_MB_YW(2, 8) // Copy q1
mov (8) SRC_YW(2, 0)<1> ABOVE_CUR_MB_YW(3, 0) // Copy q2
//==================================================================================
// Bottom field
// Get (alpha >> 2) + 2
shr (16) Mbaff_ALPHA2(0,0)<1> r[ECM_AddrReg, bAlphaTop1_Y]<0;1,0>:ub 2:w // alpha >> 2
mov (1) P_AddrReg:w ABOVE_CUR_MB_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w ABOVE_CUR_MB_BASE+64:w { NoDDChk }
// Get bot field rows
mov (16) ABOVE_CUR_MB_YW(0)<1> PREV_MB_YW(0, 8)<16;8,1> // Copy p3, p2
mov (16) ABOVE_CUR_MB_YW(1)<1> PREV_MB_YW(2, 8)<16;8,1> // Copy p1, p0
mov (16) ABOVE_CUR_MB_YW(2)<1> SRC_YW(0, 8)<16;8,1> // Copy q0, q1
mov (16) ABOVE_CUR_MB_YW(3)<1> SRC_YW(2, 8)<16;8,1> // Copy q2, q3
mov (2) MaskA<1>:uw r[ECM_AddrReg, wEdgeCntlMapA_ExtTopHorz1]<2;2,1>:uw
mov (16) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaTop1_Y]<0;1,0>:ub
mov (16) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaTop1_Y]<0;1,0>:ub
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h00_1_Y]<1;4,0>:ub
add (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA2(0,0)<16;16,1> 2:w // alpha2 = (alpha >> 2) + 2
CALL(FILTER_Y_MBAFF, 1) // Ext Y, bot field
// Save deblocked top field rows
mov (8) PREV_MB_YW(1, 8)<1> ABOVE_CUR_MB_YW(0, 8) // Copy p2
mov (8) PREV_MB_YW(2, 8)<1> ABOVE_CUR_MB_YW(1, 0) // Copy p1
mov (8) PREV_MB_YW(3, 8)<1> ABOVE_CUR_MB_YW(1, 8) // Copy p0
mov (8) SRC_YW(0, 8)<1> ABOVE_CUR_MB_YW(2, 0) // Copy q0
mov (8) SRC_YW(1, 8)<1> ABOVE_CUR_MB_YW(2, 8) // Copy q1
mov (8) SRC_YW(2, 8)<1> ABOVE_CUR_MB_YW(3, 0) // Copy q2
//==================================================================================
H0_Y_DONE:
//BYPASS_H0_Y:
//------------------------------------------------------------------
// Same alpha, alpha2, beta and MaskB for all internal edges
// Get (alpha >> 2) + 2
shr (16) Mbaff_ALPHA2(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub 2:w // alpha >> 2
// alpha = bAlphaInternal_Y
// beta = bBetaInternal_Y
mov (16) Mbaff_ALPHA(0,0)<1> r[ECM_AddrReg, bAlphaInternal_Y]<0;1,0>:ub
mov (16) Mbaff_BETA(0,0)<1> r[ECM_AddrReg, bBetaInternal_Y]<0;1,0>:ub
mov (1) MaskB:uw 0:w // Set MaskB = 0 for all 3 edges, so it always uses bS < 4 algorithm.
add (16) Mbaff_ALPHA2(0,0)<1> Mbaff_ALPHA2(0,0)<16;16,1> 2:w // alpha2 = (alpha >> 2) + 2
//---------- Deblock Y internal top edge (H1) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_H1_Y
// p3 = Cur MB Y row 0 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 1 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 2 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 3 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 4 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 5 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 6 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 7 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntTopHorz]:uw
// tc0 has bTc0_h13_Y + bTc0_h12_Y + bTc0_h11_Y + bTc0_h10_Y
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h10_Y]<1;4,0>:ub
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
//BYPASS_H1_Y:
//------------------------------------------------------------------
//---------- Deblock Y internal mid horizontal edge (H2) ----------
// Bypass deblocking if FilterInternal8x8EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal8x8EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_H2_Y
// p3 = Cur MB Y row 4 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 5 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 6 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 7 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 8 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 9 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 10 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 11 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 4*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntMidHorz]:uw
// tc0 has bTc0_h23_Y + bTc0_h22_Y + bTc0_h21_Y + bTc0_h20_Y
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h20_Y]<1;4,0>:ub
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
//BYPASS_H2_Y:
//-----------------------------------------------
//---------- Deblock Y internal bottom edge (H3) ----------
// Bypass deblocking if FilterInternal4x4EdgesFlag = 0
and.z.f0.0 (1) null:w r[ECM_AddrReg, BitFlags]:ub FilterInternal4x4EdgesFlag:w // Check for FilterInternal4x4EdgesFlag
// (f0.0) jmpi BYPASS_H3_Y
// p3 = Cur MB Y row 8 = r[P_AddrReg, 0]<16;16,1>
// p2 = Cur MB Y row 9 = r[P_AddrReg, 16]<16;16,1>
// p1 = Cur MB Y row 10 = r[P_AddrReg, 32]<16;16,1>
// p0 = Cur MB Y row 11 = r[P_AddrReg, 48]<16;16,1>
// q0 = Cur MB Y row 12 = r[Q_AddrReg, 0]<16;16,1>
// q1 = Cur MB Y row 13 = r[Q_AddrReg, 16]<16;16,1>
// q2 = Cur MB Y row 14 = r[Q_AddrReg, 32]<16;16,1>
// q3 = Cur MB Y row 15 = r[Q_AddrReg, 48]<16;16,1>
mov (1) P_AddrReg:w 8*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDClr }
mov (1) Q_AddrReg:w 12*Y_ROW_WIDTH+SRC_MB_Y_BASE:w { NoDDChk }
mov (1) MaskA:uw r[ECM_AddrReg, wEdgeCntlMap_IntBotHorz]:uw
// tc0 has bTc0_h33_Y + bTc0_h32_Y + bTc0_h31_Y + bTc0_h30_Y
mov (16) Mbaff_TC0(0,0)<1> r[ECM_AddrReg, bTc0_h30_Y]<1;4,0>:ub
// CALL(FILTER_Y_MBAFF, 1)
PRED_CALL(-f0.0, FILTER_Y_MBAFF, 1)
//BYPASS_H3_Y:
//-----------------------------------------------

Some files were not shown because too many files have changed in this diff Show More