kolibrios/drivers/video/i965/i965_avc_bsd.c

1052 lines
44 KiB
C
Raw Normal View History

/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Xiang Haihao <haihao.xiang@intel.com>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#ifndef HAVE_GEN_AVC_SURFACE
#define HAVE_GEN_AVC_SURFACE 1
#endif
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_avc_bsd.h"
#include "i965_media_h264.h"
#include "i965_media.h"
#include "i965_decoder_utils.h"
static void
i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx,
struct object_surface *obj_surface,
VAPictureParameterBufferH264 *pic_param,
struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
GenAvcSurface *avc_bsd_surface = obj_surface->private_data;
obj_surface->free_private_data = gen_free_avc_surface;
if (!avc_bsd_surface) {
avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1);
assert((obj_surface->size & 0x3f) == 0);
obj_surface->private_data = avc_bsd_surface;
}
avc_bsd_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
!pic_param->seq_fields.bits.direct_8x8_inference_flag);
if (avc_bsd_surface->dmv_top == NULL) {
avc_bsd_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
"direct mv w/r buffer",
DMV_SIZE,
0x1000);
}
if (avc_bsd_surface->dmv_bottom_flag &&
avc_bsd_surface->dmv_bottom == NULL) {
avc_bsd_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
"direct mv w/r buffer",
DMV_SIZE,
0x1000);
}
}
static void
i965_bsd_ind_obj_base_address(VADriverContextP ctx,
struct decode_state *decode_state,
int slice,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
dri_bo *ind_bo = decode_state->slice_datas[slice]->bo;
BEGIN_BCS_BATCH(batch, 3);
OUT_BCS_BATCH(batch, CMD_BSD_IND_OBJ_BASE_ADDR | (3 - 2));
OUT_BCS_RELOC(batch, ind_bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0);
OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
}
static void
i965_avc_bsd_img_state(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int qm_present_flag;
int img_struct;
int mbaff_frame_flag;
unsigned int avc_it_command_header;
unsigned int width_in_mbs, height_in_mbs;
VAPictureParameterBufferH264 *pic_param;
if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
qm_present_flag = 1;
else
qm_present_flag = 0; /* built-in QM matrices */
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
img_struct = 1;
else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
img_struct = 3;
else
img_struct = 0;
if ((img_struct & 0x1) == 0x1) {
assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
} else {
assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
}
if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
assert(pic_param->pic_fields.bits.field_pic_flag == 0);
} else {
assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
}
mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
!pic_param->pic_fields.bits.field_pic_flag);
width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
/* BSD unit doesn't support 4:2:2 and 4:4:4 picture */
assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
avc_it_command_header = (CMD_MEDIA_OBJECT_EX | (12 - 2));
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_IMG_STATE | (6 - 2));
OUT_BCS_BATCH(batch,
((width_in_mbs * height_in_mbs) & 0x7fff));
OUT_BCS_BATCH(batch,
(height_in_mbs << 16) |
(width_in_mbs << 0));
OUT_BCS_BATCH(batch,
((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
(SCAN_RASTER_ORDER << 15) | /* AVC ILDB Data */
(SCAN_SPECIAL_ORDER << 14) | /* AVC IT Command */
(SCAN_RASTER_ORDER << 13) | /* AVC IT Data */
(1 << 12) | /* always 1, hardware requirement */
(qm_present_flag << 10) |
(img_struct << 8) |
(16 << 0)); /* FIXME: always support 16 reference frames ??? */
OUT_BCS_BATCH(batch,
(RESIDUAL_DATA_OFFSET << 24) | /* residual data offset */
(0 << 17) | /* don't overwrite SRT */
(0 << 16) | /* Un-SRT (Unsynchronized Root Thread) */
(0 << 12) | /* FIXME: no 16MV ??? */
(pic_param->seq_fields.bits.chroma_format_idc << 10) |
(i965_h264_context->enable_avc_ildb << 8) | /* Enable ILDB writing output */
(pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
(pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
(pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
(pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
(pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
(mbaff_frame_flag << 1) |
(pic_param->pic_fields.bits.field_pic_flag << 0));
OUT_BCS_BATCH(batch, avc_it_command_header);
ADVANCE_BCS_BATCH(batch);
}
static void
i965_avc_bsd_qm_state(VADriverContextP ctx,
struct decode_state *decode_state,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int cmd_len;
VAIQMatrixBufferH264 *iq_matrix;
VAPictureParameterBufferH264 *pic_param;
if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
return;
iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
BEGIN_BCS_BATCH(batch, cmd_len);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_QM_STATE | (cmd_len - 2));
if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
OUT_BCS_BATCH(batch,
(0x0 << 8) | /* don't use default built-in matrices */
(0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
else
OUT_BCS_BATCH(batch,
(0x0 << 8) | /* don't use default built-in matrices */
(0x3f << 0)); /* six 4x4 scaling matrices */
intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
ADVANCE_BCS_BATCH(batch);
}
static void
i965_avc_bsd_slice_state(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int present_flag, cmd_len, list, j;
uint8_t ref_idx_state[32];
char weightoffsets[32 * 6];
/* don't issue SLICE_STATE for intra-prediction decoding */
if (slice_param->slice_type == SLICE_TYPE_I ||
slice_param->slice_type == SLICE_TYPE_SI)
return;
cmd_len = 2;
if (slice_param->slice_type == SLICE_TYPE_P ||
slice_param->slice_type == SLICE_TYPE_SP) {
present_flag = PRESENT_REF_LIST0;
cmd_len += 8;
} else {
present_flag = PRESENT_REF_LIST0 | PRESENT_REF_LIST1;
cmd_len += 16;
}
if ((slice_param->slice_type == SLICE_TYPE_P ||
slice_param->slice_type == SLICE_TYPE_SP) &&
(pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
present_flag |= PRESENT_WEIGHT_OFFSET_L0;
cmd_len += 48;
}
if ((slice_param->slice_type == SLICE_TYPE_B) &&
(pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
present_flag |= PRESENT_WEIGHT_OFFSET_L0 | PRESENT_WEIGHT_OFFSET_L1;
cmd_len += 96;
}
BEGIN_BCS_BATCH(batch, cmd_len);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_SLICE_STATE | (cmd_len - 2));
OUT_BCS_BATCH(batch, present_flag);
for (list = 0; list < 2; list++) {
int flag, num_va_pics;
VAPictureH264 *va_pic;
if (list == 0) {
flag = PRESENT_REF_LIST0;
va_pic = slice_param->RefPicList0;
num_va_pics = slice_param->num_ref_idx_l0_active_minus1 + 1;
} else {
flag = PRESENT_REF_LIST1;
va_pic = slice_param->RefPicList1;
num_va_pics = slice_param->num_ref_idx_l1_active_minus1 + 1;
}
if (!(present_flag & flag))
continue;
gen5_fill_avc_ref_idx_state(
ref_idx_state,
va_pic, num_va_pics,
i965_h264_context->fsid_list
);
intel_batchbuffer_data(batch, ref_idx_state, sizeof(ref_idx_state));
}
i965_h264_context->weight128_luma_l0 = 0;
i965_h264_context->weight128_luma_l1 = 0;
i965_h264_context->weight128_chroma_l0 = 0;
i965_h264_context->weight128_chroma_l1 = 0;
i965_h264_context->weight128_offset0_flag = 0;
i965_h264_context->weight128_offset0 = 0;
if (present_flag & PRESENT_WEIGHT_OFFSET_L0) {
for (j = 0; j < 32; j++) {
weightoffsets[j * 6 + 0] = slice_param->luma_offset_l0[j];
weightoffsets[j * 6 + 1] = slice_param->luma_weight_l0[j];
weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l0[j][0];
weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l0[j][0];
weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l0[j][1];
weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l0[j][1];
if (pic_param->pic_fields.bits.weighted_pred_flag == 1 ||
pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
if (i965_h264_context->use_hw_w128) {
if (slice_param->luma_weight_l0[j] == 128)
i965_h264_context->weight128_luma_l0 |= (1 << j);
if (slice_param->chroma_weight_l0[j][0] == 128 ||
slice_param->chroma_weight_l0[j][1] == 128)
i965_h264_context->weight128_chroma_l0 |= (1 << j);
} else {
/* FIXME: workaround for weight 128 */
if (slice_param->luma_weight_l0[j] == 128 ||
slice_param->chroma_weight_l0[j][0] == 128 ||
slice_param->chroma_weight_l0[j][1] == 128)
i965_h264_context->weight128_offset0_flag = 1;
}
}
}
intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
}
if (present_flag & PRESENT_WEIGHT_OFFSET_L1) {
for (j = 0; j < 32; j++) {
weightoffsets[j * 6 + 0] = slice_param->luma_offset_l1[j];
weightoffsets[j * 6 + 1] = slice_param->luma_weight_l1[j];
weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l1[j][0];
weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l1[j][0];
weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l1[j][1];
weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l1[j][1];
if (pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
if (i965_h264_context->use_hw_w128) {
if (slice_param->luma_weight_l1[j] == 128)
i965_h264_context->weight128_luma_l1 |= (1 << j);
if (slice_param->chroma_weight_l1[j][0] == 128 ||
slice_param->chroma_weight_l1[j][1] == 128)
i965_h264_context->weight128_chroma_l1 |= (1 << j);
} else {
if (slice_param->luma_weight_l0[j] == 128 ||
slice_param->chroma_weight_l0[j][0] == 128 ||
slice_param->chroma_weight_l0[j][1] == 128)
i965_h264_context->weight128_offset0_flag = 1;
}
}
}
intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
}
ADVANCE_BCS_BATCH(batch);
}
static void
i965_avc_bsd_buf_base_state(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = i965_h264_context->batch;
struct i965_avc_bsd_context *i965_avc_bsd_context;
int i, j;
VAPictureH264 *va_pic;
struct object_surface *obj_surface;
GenAvcSurface *avc_bsd_surface;
i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
BEGIN_BCS_BATCH(batch, 74);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_BUF_BASE_STATE | (74 - 2));
OUT_BCS_RELOC(batch, i965_avc_bsd_context->bsd_raw_store.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
OUT_BCS_RELOC(batch, i965_avc_bsd_context->mpr_row_store.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
OUT_BCS_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
OUT_BCS_RELOC(batch, i965_h264_context->avc_it_data.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
(i965_h264_context->avc_it_data.write_offset << 6));
if (i965_h264_context->enable_avc_ildb)
OUT_BCS_RELOC(batch, i965_h264_context->avc_ildb_data.bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
else
OUT_BCS_BATCH(batch, 0);
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) {
int found = 0;
for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
va_pic = &pic_param->ReferenceFrames[j];
if (va_pic->flags & VA_PICTURE_H264_INVALID)
continue;
if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
found = 1;
break;
}
}
assert(found == 1);
if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) {
obj_surface = SURFACE(va_pic->picture_id);
assert(obj_surface);
avc_bsd_surface = obj_surface->private_data;
if (avc_bsd_surface == NULL) {
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
} else {
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0);
if (avc_bsd_surface->dmv_bottom_flag == 1)
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0);
else
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
I915_GEM_DOMAIN_INSTRUCTION, 0,
0);
}
}
} else {
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
}
}
va_pic = &pic_param->CurrPic;
assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
obj_surface = SURFACE(va_pic->picture_id);
assert(obj_surface);
obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
/* initial uv component for YUV400 case */
if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
unsigned int uv_offset = obj_surface->width * obj_surface->height;
unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
dri_bo_map(obj_surface->bo, 1);
memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
dri_bo_unmap(obj_surface->bo);
}
i965_avc_bsd_init_avc_bsd_surface(ctx, obj_surface, pic_param, i965_h264_context);
avc_bsd_surface = obj_surface->private_data;
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
if (avc_bsd_surface->dmv_bottom_flag == 1)
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
else
OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
0);
/* POC List */
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) {
int found = 0;
for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
va_pic = &pic_param->ReferenceFrames[j];
if (va_pic->flags & VA_PICTURE_H264_INVALID)
continue;
if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
found = 1;
break;
}
}
assert(found == 1);
if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) {
OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
}
} else {
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
}
}
va_pic = &pic_param->CurrPic;
OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
ADVANCE_BCS_BATCH(batch);
}
static void
g4x_avc_bsd_object(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
int slice_index,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
if (slice_param) {
int encrypted, counter_value, cmd_len;
int slice_hor_pos, slice_ver_pos;
int num_ref_idx_l0, num_ref_idx_l1;
int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
unsigned int slice_data_bit_offset;
int weighted_pred_idc = 0;
int first_mb_in_slice = 0;
int slice_type;
encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
if (encrypted) {
cmd_len = 9;
counter_value = 0; /* FIXME: ??? */
} else
cmd_len = 8;
slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
decode_state->slice_datas[slice_index]->bo,
slice_param,
pic_param->pic_fields.bits.entropy_coding_mode_flag
);
if (slice_param->slice_type == SLICE_TYPE_I ||
slice_param->slice_type == SLICE_TYPE_SI)
slice_type = SLICE_TYPE_I;
else if (slice_param->slice_type == SLICE_TYPE_P ||
slice_param->slice_type == SLICE_TYPE_SP)
slice_type = SLICE_TYPE_P;
else {
assert(slice_param->slice_type == SLICE_TYPE_B);
slice_type = SLICE_TYPE_B;
}
if (slice_type == SLICE_TYPE_I) {
assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
num_ref_idx_l0 = 0;
num_ref_idx_l1 = 0;
} else if (slice_type == SLICE_TYPE_P) {
assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
num_ref_idx_l1 = 0;
} else {
num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
}
if (slice_type == SLICE_TYPE_P)
weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
else if (slice_type == SLICE_TYPE_B)
weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
slice_hor_pos = first_mb_in_slice % width_in_mbs;
slice_ver_pos = first_mb_in_slice / width_in_mbs;
BEGIN_BCS_BATCH(batch, cmd_len);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (cmd_len - 2));
OUT_BCS_BATCH(batch,
(encrypted << 31) |
((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
OUT_BCS_BATCH(batch,
(slice_param->slice_data_offset +
(slice_data_bit_offset >> 3)));
OUT_BCS_BATCH(batch,
(0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
(0 << 14) | /* ignore BSDPrematureComplete Error handling */
(0 << 13) | /* FIXME: ??? */
(0 << 12) | /* ignore MPR Error handling */
(0 << 10) | /* ignore Entropy Error handling */
(0 << 8) | /* ignore MB Header Error handling */
(slice_type << 0));
OUT_BCS_BATCH(batch,
(num_ref_idx_l1 << 24) |
(num_ref_idx_l0 << 16) |
(slice_param->chroma_log2_weight_denom << 8) |
(slice_param->luma_log2_weight_denom << 0));
OUT_BCS_BATCH(batch,
(weighted_pred_idc << 30) |
(slice_param->direct_spatial_mv_pred_flag << 29) |
(slice_param->disable_deblocking_filter_idc << 27) |
(slice_param->cabac_init_idc << 24) |
((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
OUT_BCS_BATCH(batch,
(slice_ver_pos << 24) |
(slice_hor_pos << 16) |
(first_mb_in_slice << 0));
OUT_BCS_BATCH(batch,
(1 << 7) |
((0x7 - (slice_data_bit_offset & 0x7)) << 0));
if (encrypted) {
OUT_BCS_BATCH(batch, counter_value);
}
ADVANCE_BCS_BATCH(batch);
} else {
BEGIN_BCS_BATCH(batch, 8);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (8 - 2));
OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */
OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
}
}
static void
ironlake_avc_bsd_object(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
int slice_index,
struct i965_h264_context *i965_h264_context)
{
struct intel_batchbuffer *batch = i965_h264_context->batch;
int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
if (slice_param) {
int encrypted, counter_value;
int slice_hor_pos, slice_ver_pos;
int num_ref_idx_l0, num_ref_idx_l1;
int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
unsigned int slice_data_bit_offset;
int weighted_pred_idc = 0;
int first_mb_in_slice;
int slice_type;
encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
if (encrypted) {
counter_value = 0; /* FIXME: ??? */
} else
counter_value = 0;
slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
decode_state->slice_datas[slice_index]->bo,
slice_param,
pic_param->pic_fields.bits.entropy_coding_mode_flag
);
if (slice_param->slice_type == SLICE_TYPE_I ||
slice_param->slice_type == SLICE_TYPE_SI)
slice_type = SLICE_TYPE_I;
else if (slice_param->slice_type == SLICE_TYPE_P ||
slice_param->slice_type == SLICE_TYPE_SP)
slice_type = SLICE_TYPE_P;
else {
assert(slice_param->slice_type == SLICE_TYPE_B);
slice_type = SLICE_TYPE_B;
}
if (slice_type == SLICE_TYPE_I) {
assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
num_ref_idx_l0 = 0;
num_ref_idx_l1 = 0;
} else if (slice_type == SLICE_TYPE_P) {
assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
num_ref_idx_l1 = 0;
} else {
num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
}
if (slice_type == SLICE_TYPE_P)
weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
else if (slice_type == SLICE_TYPE_B)
weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
slice_hor_pos = first_mb_in_slice % width_in_mbs;
slice_ver_pos = first_mb_in_slice / width_in_mbs;
BEGIN_BCS_BATCH(batch, 16);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2));
OUT_BCS_BATCH(batch,
(encrypted << 31) |
(0 << 30) | /* FIXME: packet based bit stream */
(0 << 29) | /* FIXME: packet format */
((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
OUT_BCS_BATCH(batch,
(slice_param->slice_data_offset +
(slice_data_bit_offset >> 3)));
OUT_BCS_BATCH(batch,
(0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
(0 << 14) | /* ignore BSDPrematureComplete Error handling */
(0 << 13) | /* FIXME: ??? */
(0 << 12) | /* ignore MPR Error handling */
(0 << 10) | /* ignore Entropy Error handling */
(0 << 8) | /* ignore MB Header Error handling */
(slice_type << 0));
OUT_BCS_BATCH(batch,
(num_ref_idx_l1 << 24) |
(num_ref_idx_l0 << 16) |
(slice_param->chroma_log2_weight_denom << 8) |
(slice_param->luma_log2_weight_denom << 0));
OUT_BCS_BATCH(batch,
(weighted_pred_idc << 30) |
(slice_param->direct_spatial_mv_pred_flag << 29) |
(slice_param->disable_deblocking_filter_idc << 27) |
(slice_param->cabac_init_idc << 24) |
((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
OUT_BCS_BATCH(batch,
(slice_ver_pos << 24) |
(slice_hor_pos << 16) |
(first_mb_in_slice << 0));
OUT_BCS_BATCH(batch,
(1 << 7) |
((0x7 - (slice_data_bit_offset & 0x7)) << 0));
OUT_BCS_BATCH(batch, counter_value);
/* FIXME: dw9-dw11 */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l0);
OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l1);
OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l0);
OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l1);
ADVANCE_BCS_BATCH(batch);
} else {
BEGIN_BCS_BATCH(batch, 16);
OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2));
OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */
OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
}
}
static void
i965_avc_bsd_object(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
VASliceParameterBufferH264 *slice_param,
int slice_index,
struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
if (IS_IRONLAKE(i965->intel.device_id))
ironlake_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
else
g4x_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
}
static void
i965_avc_bsd_phantom_slice(VADriverContextP ctx,
struct decode_state *decode_state,
VAPictureParameterBufferH264 *pic_param,
struct i965_h264_context *i965_h264_context)
{
i965_avc_bsd_object(ctx, decode_state, pic_param, NULL, 0, i965_h264_context);
}
static void
i965_avc_bsd_frame_store_index(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
int i, j;
assert(ARRAY_ELEMS(i965_h264_context->fsid_list) == ARRAY_ELEMS(pic_param->ReferenceFrames));
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
int found = 0;
if (i965_h264_context->fsid_list[i].surface_id == VA_INVALID_ID)
continue;
for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
if (ref_pic->flags & VA_PICTURE_H264_INVALID)
continue;
if (i965_h264_context->fsid_list[i].surface_id == ref_pic->picture_id) {
found = 1;
break;
}
}
if (!found) {
struct object_surface *obj_surface = SURFACE(i965_h264_context->fsid_list[i].surface_id);
obj_surface->flags &= ~SURFACE_REFERENCED;
if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
dri_bo_unreference(obj_surface->bo);
obj_surface->bo = NULL;
obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
}
if (obj_surface->free_private_data)
obj_surface->free_private_data(&obj_surface->private_data);
i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
i965_h264_context->fsid_list[i].frame_store_id = -1;
}
}
for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
int found = 0;
if (ref_pic->flags & VA_PICTURE_H264_INVALID)
continue;
for (j = 0; j < ARRAY_ELEMS(i965_h264_context->fsid_list); j++) {
if (i965_h264_context->fsid_list[j].surface_id == VA_INVALID_ID)
continue;
if (i965_h264_context->fsid_list[j].surface_id == ref_pic->picture_id) {
found = 1;
break;
}
}
if (!found) {
int frame_idx;
struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
assert(obj_surface);
i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
for (frame_idx = 0; frame_idx < ARRAY_ELEMS(i965_h264_context->fsid_list); frame_idx++) {
for (j = 0; j < ARRAY_ELEMS(i965_h264_context->fsid_list); j++) {
if (i965_h264_context->fsid_list[j].surface_id == VA_INVALID_ID)
continue;
if (i965_h264_context->fsid_list[j].frame_store_id == frame_idx)
break;
}
if (j == ARRAY_ELEMS(i965_h264_context->fsid_list))
break;
}
assert(frame_idx < ARRAY_ELEMS(i965_h264_context->fsid_list));
for (j = 0; j < ARRAY_ELEMS(i965_h264_context->fsid_list); j++) {
if (i965_h264_context->fsid_list[j].surface_id == VA_INVALID_ID) {
i965_h264_context->fsid_list[j].surface_id = ref_pic->picture_id;
i965_h264_context->fsid_list[j].frame_store_id = frame_idx;
break;
}
}
}
}
for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list) - 1; i++) {
if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID &&
i965_h264_context->fsid_list[i].frame_store_id == i)
continue;
for (j = i + 1; j < ARRAY_ELEMS(i965_h264_context->fsid_list); j++) {
if (i965_h264_context->fsid_list[j].surface_id != VA_INVALID_ID &&
i965_h264_context->fsid_list[j].frame_store_id == i) {
VASurfaceID id = i965_h264_context->fsid_list[i].surface_id;
int frame_idx = i965_h264_context->fsid_list[i].frame_store_id;
i965_h264_context->fsid_list[i].surface_id = i965_h264_context->fsid_list[j].surface_id;
i965_h264_context->fsid_list[i].frame_store_id = i965_h264_context->fsid_list[j].frame_store_id;
i965_h264_context->fsid_list[j].surface_id = id;
i965_h264_context->fsid_list[j].frame_store_id = frame_idx;
break;
}
}
}
}
void
i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context)
{
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
struct intel_batchbuffer *batch = i965_h264_context->batch;
VAPictureParameterBufferH264 *pic_param;
VASliceParameterBufferH264 *slice_param;
int i, j;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
i965_avc_bsd_frame_store_index(ctx, pic_param, i965_h264_context);
i965_h264_context->enable_avc_ildb = 0;
i965_h264_context->picture.i_flag = 1;
for (j = 0; j < decode_state->num_slice_params && i965_h264_context->enable_avc_ildb == 0; j++) {
assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
assert((slice_param->slice_type == SLICE_TYPE_I) ||
(slice_param->slice_type == SLICE_TYPE_SI) ||
(slice_param->slice_type == SLICE_TYPE_P) ||
(slice_param->slice_type == SLICE_TYPE_SP) ||
(slice_param->slice_type == SLICE_TYPE_B));
if (slice_param->disable_deblocking_filter_idc != 1) {
i965_h264_context->enable_avc_ildb = 1;
break;
}
slice_param++;
}
}
intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
i965_avc_bsd_img_state(ctx, decode_state, i965_h264_context);
i965_avc_bsd_qm_state(ctx, decode_state, i965_h264_context);
for (j = 0; j < decode_state->num_slice_params; j++) {
assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
i965_bsd_ind_obj_base_address(ctx, decode_state, j, i965_h264_context);
for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
assert((slice_param->slice_type == SLICE_TYPE_I) ||
(slice_param->slice_type == SLICE_TYPE_SI) ||
(slice_param->slice_type == SLICE_TYPE_P) ||
(slice_param->slice_type == SLICE_TYPE_SP) ||
(slice_param->slice_type == SLICE_TYPE_B));
if (i965_h264_context->picture.i_flag &&
(slice_param->slice_type != SLICE_TYPE_I ||
slice_param->slice_type != SLICE_TYPE_SI))
i965_h264_context->picture.i_flag = 0;
i965_avc_bsd_slice_state(ctx, pic_param, slice_param, i965_h264_context);
i965_avc_bsd_buf_base_state(ctx, pic_param, slice_param, i965_h264_context);
i965_avc_bsd_object(ctx, decode_state, pic_param, slice_param, j, i965_h264_context);
slice_param++;
}
}
i965_avc_bsd_phantom_slice(ctx, decode_state, pic_param, i965_h264_context);
intel_batchbuffer_emit_mi_flush(batch);
intel_batchbuffer_end_atomic(batch);
intel_batchbuffer_flush(batch);
}
void
i965_avc_bsd_decode_init(VADriverContextP ctx, void *h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
struct i965_avc_bsd_context *i965_avc_bsd_context;
dri_bo *bo;
assert(i965_h264_context);
i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"bsd raw store",
0x3000, /* at least 11520 bytes to support 120 MBs per row */
64);
assert(bo);
i965_avc_bsd_context->bsd_raw_store.bo = bo;
dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"mpr row store",
0x2000, /* at least 7680 bytes to support 120 MBs per row */
64);
assert(bo);
i965_avc_bsd_context->mpr_row_store.bo = bo;
}
Bool
i965_avc_bsd_ternimate(struct i965_avc_bsd_context *i965_avc_bsd_context)
{
dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
return True;
}