diff --git a/drivers/video/Intel-2D/brw/brw.h b/drivers/video/Intel-2D/brw/brw.h new file mode 100644 index 0000000000..77292cfb03 --- /dev/null +++ b/drivers/video/Intel-2D/brw/brw.h @@ -0,0 +1,17 @@ +#include "brw_eu.h" + +bool brw_sf_kernel__nomask(struct brw_compile *p); +bool brw_sf_kernel__mask(struct brw_compile *p); + +bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width); + +bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width); + +bool brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch_width); diff --git a/drivers/video/Intel-2D/brw/brw_eu.c b/drivers/video/Intel-2D/brw/brw_eu.c new file mode 100644 index 0000000000..d6210801c9 --- /dev/null +++ b/drivers/video/Intel-2D/brw/brw_eu.c @@ -0,0 +1,150 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_eu.h" + +#include +#include + +/* Returns the corresponding conditional mod for swapping src0 and + * src1 in e.g. CMP. + */ +uint32_t +brw_swap_cmod(uint32_t cmod) +{ + switch (cmod) { + case BRW_CONDITIONAL_Z: + case BRW_CONDITIONAL_NZ: + return cmod; + case BRW_CONDITIONAL_G: + return BRW_CONDITIONAL_LE; + case BRW_CONDITIONAL_GE: + return BRW_CONDITIONAL_L; + case BRW_CONDITIONAL_L: + return BRW_CONDITIONAL_GE; + case BRW_CONDITIONAL_LE: + return BRW_CONDITIONAL_G; + default: + return ~0; + } +} + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_compression_control(struct brw_compile *p, + enum brw_compression compression_control) +{ + p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); + + if (p->gen >= 060) { + /* Since we don't use the 32-wide support in gen6, we translate + * the pre-gen6 compression control here. + */ + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1Q; + break; + case BRW_COMPRESSION_2NDHALF: + /* For 8-wide, this is "use the second set of 8 bits." */ + p->current->header.compression_control = GEN6_COMPRESSION_2Q; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For 16-wide instruction compression, use the first set of 16 bits + * since we don't do 32-wide dispatch. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + default: + assert(!"not reached"); + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + } + } else { + p->current->header.compression_control = compression_control; + } +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->compressed_stack[p->current - p->stack] = p->compressed; + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; + p->compressed = p->compressed_stack[p->current - p->stack]; +} + +void brw_compile_init(struct brw_compile *p, int gen, void *store) +{ + assert(gen); + + p->gen = gen; + p->store = store; + + p->nr_insn = 0; + p->current = p->stack; + p->compressed = false; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); + + p->if_stack_depth = 0; + p->if_stack_array_size = 0; + p->if_stack = NULL; +} diff --git a/drivers/video/Intel-2D/brw/brw_eu.h b/drivers/video/Intel-2D/brw/brw_eu.h new file mode 100644 index 0000000000..0974760962 --- /dev/null +++ b/drivers/video/Intel-2D/brw/brw_eu.h @@ -0,0 +1,2266 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include +#include +#include +#include + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + +#define WRITEMASK_X 0x1 +#define WRITEMASK_Y 0x2 +#define WRITEMASK_Z 0x4 +#define WRITEMASK_W 0x8 + +#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y) +#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z) +#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W) + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +enum brw_compression { + BRW_COMPRESSION_NONE, + BRW_COMPRESSION_2NDHALF, + BRW_COMPRESSION_COMPRESSED, +}; + +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_R 7 +#define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +/** @{ + * + * Gen6 has replaced "mask enable/disable" with WECtrl, which is + * effectively the same but much simpler to think about. Now, there + * are two contributors ANDed together to whether channels are + * executed: The predication on the instruction, and the channel write + * enable. + */ +/** + * This is the default value. It means that a channel's write enable is set + * if the per-channel IP is pointing at this instruction. + */ +#define BRW_WE_NORMAL 0 +/** + * This is used like BRW_MASK_DISABLE, and causes all channels to have + * their write enable set. Note that predication still contributes to + * whether the channel actually gets written. + */ +#define BRW_WE_ALL 1 +/** @} */ + +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXD, + FS_OPCODE_TXF, + FS_OPCODE_TXL, + FS_OPCODE_TXS, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, + VS_OPCODE_PULL_CONSTANT_LOAD, +}; + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_MRF_COMPR4 (1 << 7) + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */ +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4 +#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5 +#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN5_SAMPLER_MESSAGE_SAMPLE 0 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 + +/* for GEN5 only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +/* This one stays the same across generations. */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +/* GEN4 */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 +/* G45, GEN5 */ +#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 +#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 +/* GEN6 */ +#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +/** + * Message target: Shared Function ID for where to SEND a message. + * + * These are enumerated in the ISA reference under "send - Send Message". + * In particular, see the following tables: + * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" + * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" + * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) / + * Overview / GPE Function IDs + */ +enum brw_message_target { + BRW_SFID_NULL = 0, + BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */ + BRW_SFID_SAMPLER = 2, + BRW_SFID_MESSAGE_GATEWAY = 3, + BRW_SFID_DATAPORT_READ = 4, + BRW_SFID_DATAPORT_WRITE = 5, + BRW_SFID_URB = 6, + BRW_SFID_THREAD_SPAWNER = 7, + + GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, + GEN6_SFID_DATAPORT_RENDER_CACHE = 5, + GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, + + GEN7_SFID_DATAPORT_DATA_CACHE = 10, +}; + +#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +/* GEN6 */ +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 +#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 +#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + +#define REG_SIZE (8*4) + +struct brw_instruction { + struct { + unsigned opcode:7; + unsigned pad:1; + unsigned access_mode:1; + unsigned mask_control:1; + unsigned dependency_control:2; + unsigned compression_control:2; /* gen6: quater control */ + unsigned thread_control:2; + unsigned predicate_control:4; + unsigned predicate_inverse:1; + unsigned execution_size:3; + /** + * Conditional Modifier for most instructions. On Gen6+, this is also + * used for the SEND instruction's Message Target/SFID. + */ + unsigned destreg__conditionalmod:4; + unsigned acc_wr_control:1; + unsigned cmpt_control:1; + unsigned debug_control:1; + unsigned saturate:1; + } header; + + union { + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_subreg_nr:5; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da1; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; /* 0x00000c00 */ + unsigned src1_reg_type:3; /* 0x00007000 */ + unsigned pad:1; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia1; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:1; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da16; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad0:6; + unsigned dest_writemask:4; + int dest_indirect_offset:6; + unsigned dest_subreg_nr:3; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia16; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + + int jump_count:16; + } branch_gen6; + + struct { + unsigned dest_reg_file:1; + unsigned flag_subreg_num:1; + unsigned pad0:2; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src2_abs:1; + unsigned src2_negate:1; + unsigned pad1:7; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:3; + unsigned dest_reg_nr:8; + } da3src; + } bits1; + + + union { + struct { + unsigned src0_subreg_nr:5; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad:5; + } da1; + + struct { + int src0_indirect_offset:10; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad:5; + } ia1; + + struct { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + unsigned src0_subreg_nr:1; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } da16; + + struct { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + int src0_indirect_offset:6; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } ia16; + + /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction. + * + * Does not apply to Gen6+. The SFID/message target moved to bits + * 27:24 of the header (destreg__conditionalmod); EOT is in bits3. + */ + struct { + unsigned pad:26; + unsigned end_of_thread:1; + unsigned pad1:1; + unsigned sfid:4; + } send_gen5; /* for Ironlake only */ + + struct { + unsigned src0_rep_ctrl:1; + unsigned src0_swizzle:8; + unsigned src0_subreg_nr:3; + unsigned src0_reg_nr:8; + unsigned pad0:1; + unsigned src1_rep_ctrl:1; + unsigned src1_swizzle:8; + unsigned src1_subreg_nr_low:2; + } da3src; + } bits2; + + union { + struct { + unsigned src1_subreg_nr:5; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned pad0:7; + } da1; + + struct { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + unsigned src1_subreg_nr:1; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned pad2:7; + } da16; + + struct { + int src1_indirect_offset:10; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } ia1; + + struct { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + int src1_indirect_offset:6; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad2:5; + } ia16; + + struct { + int jump_count:16; /* note: signed */ + unsigned pop_count:4; + unsigned pad0:12; + } if_else; + + /* This is also used for gen7 IF/ELSE instructions */ + struct { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + + /** + * \defgroup SEND instructions / Message Descriptors + * + * @{ + */ + + /** + * Generic Message Descriptor for Gen4 SEND instructions. The structs + * below expand function_control to something specific for their + * message. Due to struct packing issues, they duplicate these bits. + * + * See the G45 PRM, Volume 4, Table 14-15. + */ + struct { + unsigned function_control:16; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } generic; + + /** + * Generic Message Descriptor for Gen5-7 SEND instructions. + * + * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most + * of the information on the SEND instruction is missing from the public + * Ironlake PRM.) + * + * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies. + * According to the SEND instruction description: + * "The MSb of the message description, the EOT field, always comes from + * bit 127 of the instruction word"...which is bit 31 of this field. + */ + struct { + unsigned function_control:19; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } generic_gen5; + + /** G45 PRM, Volume 4, Section 6.1.1.1 */ + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned pad0:8; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } math; + + /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned snapshot:1; + unsigned pad0:10; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } math_gen5; + + /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned return_format:2; + unsigned msg_type:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler; + + /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:4; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:4; + unsigned simd_mode:2; + unsigned pad0:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } sampler_gen5; + + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:5; + unsigned simd_mode:2; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } sampler_gen7; + + struct brw_urb_immediate { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } urb; + + struct { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } urb_gen5; + + struct { + unsigned opcode:3; + unsigned offset:11; + unsigned swizzle_control:1; + unsigned complete:1; + unsigned per_slot_offset:1; + unsigned pad0:2; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } urb_gen7; + + /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:4; + unsigned msg_type:2; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2 */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned msg_type:3; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned msg_type:3; + unsigned target_cache:2; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } dp_read_gen5; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned last_render_target:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_write; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned last_render_target:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } dp_write_gen5; + + /** + * Message for the Sandybridge Sampler Cache or Constant Cache Data Port. + * + * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. + **/ + struct { + unsigned binding_table_index:8; + unsigned msg_control:5; + unsigned msg_type:3; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } gen6_dp_sampler_const_cache; + + /** + * Message for the Sandybridge Render Cache Data Port. + * + * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1, + * Section 3.9.2.1.1: Message Descriptor. + * + * "Slot Group Select" and "Last Render Target" are part of the + * 5-bit message control for Render Target Write messages. See + * Section 3.9.9.2.1 of the same volume. + */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned slot_group_select:1; + unsigned last_render_target:1; + unsigned msg_type:4; + unsigned send_commit_msg:1; + unsigned pad0:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } gen6_dp; + + /** + * Message for any of the Gen7 Data Port caches. + * + * Most fields are defined in BSpec volume 5c.2 Data Port / Messages / + * Data Port Messages / Message Descriptor. Once again, "Slot Group + * Select" and "Last Render Target" are part of the 6-bit message + * control for Render Target Writes. + */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned slot_group_select:1; + unsigned last_render_target:1; + unsigned msg_control_pad:1; + unsigned msg_type:4; + unsigned pad1:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad2:2; + unsigned end_of_thread:1; + } gen7_dp; + /** @} */ + + struct { + unsigned src1_subreg_nr_high:1; + unsigned src1_reg_nr:8; + unsigned pad0:1; + unsigned src2_rep_ctrl:1; + unsigned src2_swizzle:8; + unsigned src2_subreg_nr:3; + unsigned src2_reg_nr:8; + unsigned pad1:2; + } da3src; + + int d; + unsigned ud; + float f; + } bits3; +}; + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg { + unsigned type:4; + unsigned file:2; + unsigned nr:8; + unsigned subnr:5; /* :1 in align16 */ + unsigned negate:1; /* source only */ + unsigned abs:1; /* source only */ + unsigned vstride:4; /* source only */ + unsigned width:3; /* src only, align1 only */ + unsigned hstride:2; /* align1 only */ + unsigned address_mode:1; /* relative addressing, hopefully! */ + unsigned pad0:1; + + union { + struct { + unsigned swizzle:8; /* src only, align16 only */ + unsigned writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + unsigned pad1:10; /* two dwords total */ + } bits; + + float f; + int d; + unsigned ud; + } dw1; +}; + +struct brw_indirect { + unsigned addr_subnr:4; + int addr_offset:10; + unsigned pad:18; +}; + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 10000 + +struct brw_compile { + struct brw_instruction *store; + unsigned nr_insn; + + int gen; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + unsigned flag_value; + bool single_program_flow; + bool compressed; + + /* Control flow stacks: + * - if_stack contains IF and ELSE instructions which must be patched + * (and popped) once the matching ENDIF instruction is encountered. + */ + struct brw_instruction **if_stack; + int if_stack_depth; + int if_stack_array_size; +}; + +static inline int type_sz(unsigned type) +{ + switch (type) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask WRITEMASK_X/Y/Z/W bitfield + */ +static inline struct brw_reg brw_reg(unsigned file, + unsigned nr, + unsigned subnr, + unsigned type, + unsigned vstride, + unsigned width, + unsigned hstride, + unsigned swizzle, + unsigned writemask) +{ + struct brw_reg reg; + if (file == BRW_GENERAL_REGISTER_FILE) + assert(nr < BRW_MAX_GRF); + else if (file == BRW_MESSAGE_REGISTER_FILE) + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + else if (file == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +/** Construct float[16] register */ +static inline struct brw_reg brw_vec16_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static inline struct brw_reg brw_vec8_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static inline struct brw_reg brw_vec4_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static inline struct brw_reg brw_vec2_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + WRITEMASK_XY); +} + +/** Construct float[1] register */ +static inline struct brw_reg brw_vec1_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + + +static inline struct brw_reg __retype(struct brw_reg reg, + unsigned type) +{ + reg.type = type; + return reg; +} + +static inline struct brw_reg __retype_d(struct brw_reg reg) +{ + return __retype(reg, BRW_REGISTER_TYPE_D); +} + +static inline struct brw_reg __retype_ud(struct brw_reg reg) +{ + return __retype(reg, BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg __retype_uw(struct brw_reg reg) +{ + return __retype(reg, BRW_REGISTER_TYPE_UW); +} + +static inline struct brw_reg __sechalf(struct brw_reg reg) +{ + if (reg.vstride) + reg.nr++; + return reg; +} + +static inline struct brw_reg __suboffset(struct brw_reg reg, + unsigned delta) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + +static inline struct brw_reg __offset(struct brw_reg reg, + unsigned delta) +{ + reg.nr += delta; + return reg; +} + +static inline struct brw_reg byte_offset(struct brw_reg reg, + unsigned bytes) +{ + unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +/** Construct unsigned word[16] register */ +static inline struct brw_reg brw_uw16_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static inline struct brw_reg brw_uw8_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static inline struct brw_reg brw_uw1_reg(unsigned file, + unsigned nr, + unsigned subnr) +{ + return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static inline struct brw_reg brw_imm_reg(unsigned type) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static inline struct brw_reg brw_imm_f(float f) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static inline struct brw_reg brw_imm_d(int d) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static inline struct brw_reg brw_imm_ud(unsigned ud) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static inline struct brw_reg brw_imm_uw(uint16_t uw) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static inline struct brw_reg brw_imm_w(int16_t w) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static inline struct brw_reg brw_imm_v(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static inline struct brw_reg brw_imm_vf(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static inline struct brw_reg brw_imm_vf4(unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + +static inline struct brw_reg brw_address(struct brw_reg reg) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct null register (usually used for setting condition codes) */ +static inline struct brw_reg brw_null_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static inline struct brw_reg brw_address_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static inline struct brw_reg brw_ip_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + WRITEMASK_XYZW); /* NOTE! */ +} + +static inline struct brw_reg brw_acc_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + +static inline struct brw_reg brw_notification_1_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NOTIFICATION_COUNT, + 1, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + +static inline struct brw_reg brw_flag_reg(void) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + +static inline struct brw_reg brw_mask_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static inline struct brw_reg brw_message_reg(unsigned nr) +{ + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); +} + +static inline struct brw_reg brw_message4_reg(unsigned nr, + int subnr) +{ + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr); +} + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static inline unsigned cvt(unsigned val) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static inline struct brw_reg __stride(struct brw_reg reg, + unsigned vstride, + unsigned width, + unsigned hstride) +{ + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + +static inline struct brw_reg vec16(struct brw_reg reg) +{ + return __stride(reg, 16,16,1); +} + +static inline struct brw_reg vec8(struct brw_reg reg) +{ + return __stride(reg, 8,8,1); +} + +static inline struct brw_reg vec4(struct brw_reg reg) +{ + return __stride(reg, 4,4,1); +} + +static inline struct brw_reg vec2(struct brw_reg reg) +{ + return __stride(reg, 2,2,1); +} + +static inline struct brw_reg vec1(struct brw_reg reg) +{ + return __stride(reg, 0,1,0); +} + +static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt) +{ + return vec1(__suboffset(reg, elt)); +} + +static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt) +{ + return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + +static inline struct brw_reg brw_swizzle(struct brw_reg reg, + unsigned x, + unsigned y, + unsigned z, + unsigned w) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + +static inline struct brw_reg brw_swizzle1(struct brw_reg reg, + unsigned x) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static inline struct brw_reg brw_writemask(struct brw_reg reg, + unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask &= mask; + return reg; +} + +static inline struct brw_reg brw_set_writemask(struct brw_reg reg, + unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask = mask; + return reg; +} + +static inline struct brw_reg brw_negate(struct brw_reg reg) +{ + reg.negate ^= 1; + return reg; +} + +static inline struct brw_reg brw_abs(struct brw_reg reg) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** +*/ +static inline struct brw_reg brw_vec4_indirect(unsigned subnr, + int offset) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg brw_vec1_indirect(unsigned subnr, + int offset) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) +{ + return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) +{ + return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset) +{ + return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) +{ + return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset) +{ + ptr.addr_offset += offset; + return ptr; +} + +static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +/** Do two brw_regs refer to the same register? */ +static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + +static inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) +{ + p->current->header.predicate_control = pc; +} + +static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse) +{ + p->current->header.predicate_inverse = predicate_inverse; +} + +static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) +{ + p->current->header.destreg__conditionalmod = conditional; +} + +static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode) +{ + p->current->header.access_mode = access_mode; +} + +static inline void brw_set_mask_control(struct brw_compile *p, unsigned value) +{ + p->current->header.mask_control = value; +} + +static inline void brw_set_saturate(struct brw_compile *p, unsigned value) +{ + p->current->header.saturate = value; +} + +static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value) +{ + if (p->gen >= 060) + p->current->header.acc_wr_control = value; +} + +void brw_pop_insn_state(struct brw_compile *p); +void brw_push_insn_state(struct brw_compile *p); +void brw_set_compression_control(struct brw_compile *p, enum brw_compression control); +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); + +void brw_compile_init(struct brw_compile *p, int gen, void *store); + +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg); + +void gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + unsigned msg_reg_nr); + +static inline struct brw_instruction * +brw_next_insn(struct brw_compile *p, unsigned opcode) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + *insn = *p->current; + + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + +/* Helpers for regular instructions: */ +#define ALU1(OP) \ +static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + +/* Rounding operations (other than RNDD) require two instructions - the first + * stores a rounded value (possibly the wrong way) in the dest register, but + * also sets a per-channel "increment bit" in the flag register. A predicated + * add of 1.0 fixes dest to contain the desired result. + * + * Sandybridge and later appear to round correctly without an ADD. + */ +#define ROUND(OP) \ +static inline void brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src) \ +{ \ + struct brw_instruction *rnd, *add; \ + rnd = brw_next_insn(p, BRW_OPCODE_##OP); \ + brw_set_dest(p, rnd, dest); \ + brw_set_src0(p, rnd, src); \ + if (p->gen < 060) { \ + /* turn on round-increments */ \ + rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ + add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ + add->header.predicate_control = BRW_PREDICATE_NORMAL; \ + } \ +} + +static inline struct brw_instruction *brw_alu1(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src) +{ + struct brw_instruction *insn = brw_next_insn(p, opcode); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + return insn; +} + +static inline struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = brw_next_insn(p, opcode); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + return insn; +} + +static inline struct brw_instruction *brw_ADD(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.2.2: add */ + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); +} + +static inline struct brw_instruction *brw_MUL(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.32.38: mul */ + if (src0.type == BRW_REGISTER_TYPE_D || + src0.type == BRW_REGISTER_TYPE_UD || + src1.type == BRW_REGISTER_TYPE_D || + src1.type == BRW_REGISTER_TYPE_UD) { + assert(dest.type != BRW_REGISTER_TYPE_F); + } + + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || + src0.nr != BRW_ARF_ACCUMULATOR); + assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || + src1.nr != BRW_ARF_ACCUMULATOR); + + return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); +} + +static inline struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +ALU1(MOV); +ALU2(SEL); +ALU1(NOT); +ALU2(AND); +ALU2(OR); +ALU2(XOR); +ALU2(SHR); +ALU2(SHL); +ALU2(RSR); +ALU2(RSL); +ALU2(ASR); +ALU1(FRC); +ALU1(RNDD); +ALU2(MAC); +ALU2(MACH); +ALU1(LZD); +ALU2(DP4); +ALU2(DPH); +ALU2(DP3); +ALU2(DP2); +ALU2(LINE); +ALU2(PLN); + +ROUND(RNDZ); +ROUND(RNDE); + +#undef ALU1 +#undef ALU2 +#undef ROUND + +/* Helpers for SEND instruction */ +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + bool header_present, + bool last_render_target, + unsigned response_length, + bool end_of_thread, + bool send_commit_msg); + +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool eot, + bool writes_complete, + unsigned offset, + unsigned swizzle); + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + unsigned response_length, + bool eot); + +void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned msg_control, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + bool eot, + bool header_present); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + bool header_present, + unsigned simd_mode); + +void brw_math_16(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision); + +void brw_math(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision); + +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + struct brw_reg src0, + struct brw_reg src1); + +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index); + +void brw_oword_block_read_scratch(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + unsigned offset); + +void brw_oword_block_write_scratch(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + unsigned offset); + +void brw_dword_scattered_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t bind_table_index); + +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + unsigned location, + unsigned bind_table_index); + +void brw_dp_READ_4_vs_relative(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg addrReg, + unsigned offset, + unsigned bind_table_index); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + unsigned execute_size); +struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1); + +void brw_ELSE(struct brw_compile *p); +void brw_ENDIF(struct brw_compile *p); + +/* DO/WHILE loops: +*/ +struct brw_instruction *brw_DO(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); +struct brw_instruction *gen6_CONT(struct brw_compile *p, + struct brw_instruction *do_insn); +/* Forward jumps: +*/ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + +void brw_NOP(struct brw_compile *p); + +void brw_WAIT(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg(struct brw_reg reg); + +static inline void brw_math_invert(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR); +} + +void brw_set_uip_jip(struct brw_compile *p); + +uint32_t brw_swap_cmod(uint32_t cmod); + +void brw_disasm(FILE *file, + const struct brw_instruction *inst, + int gen); + +#endif diff --git a/drivers/video/Intel-2D/brw/brw_eu_emit.c b/drivers/video/Intel-2D/brw/brw_eu_emit.c new file mode 100644 index 0000000000..e7930cf8e5 --- /dev/null +++ b/drivers/video/Intel-2D/brw/brw_eu_emit.c @@ -0,0 +1,2002 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell + */ + +#include "brw_eu.h" + +#include +#include + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) +{ + if (reg.width == BRW_WIDTH_8 && p->compressed) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; +} + + +/** + * Prior to Sandybridge, the SEND instruction accepted non-MRF source + * registers, implicitly moving the operand to a message register. + * + * On Sandybridge, this is no longer the case. This function performs the + * explicit move; it should be called before emitting a SEND instruction. + */ +void +gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + unsigned msg_reg_nr) +{ + if (p->gen < 060) + return; + + if (src->file == BRW_MESSAGE_REGISTER_FILE) + return; + + if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src)); + brw_pop_insn_state(p); + } + *src = brw_message_reg(msg_reg_nr); +} + +static void +gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) +{ + /* From the BSpec / ISA Reference / send - [DevIVB+]: + * "The send with EOT should use register space R112-R127 for . This is + * to enable loading of a new thread into the same slot while the message + * with EOT for current thread is pending dispatch." + * + * Since we're pretending to have 16 MRFs anyway, we may as well use the + * registers required for messages with EOT. + */ + if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) { + reg->file = BRW_GENERAL_REGISTER_FILE; + reg->nr += 111; + } +} + +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) +{ + if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && + dest.file != BRW_MESSAGE_REGISTER_FILE) + assert(dest.nr < 128); + + gen7_convert_mrf_to_grf(p, &dest); + + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; + } else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.da16.dest_horiz_stride = 1; + } + } else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.ia16.dest_horiz_stride = 1; + } + } + + guess_execution_size(p, insn, dest); +} + +static const int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + +static void +validate_reg(struct brw_instruction *insn, struct brw_reg reg) +{ + int hstride_for_reg[] = {0, 1, 2, 4}; + int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; + int width_for_reg[] = {1, 2, 4, 8, 16}; + int execsize_for_reg[] = {1, 2, 4, 8, 16}; + int width, hstride, vstride, execsize; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + /* 3.3.6: Region Parameters. Restriction: Immediate vectors + * mean the destination has to be 128-bit aligned and the + * destination horiz stride has to be a word. + */ + if (reg.type == BRW_REGISTER_TYPE_V) { + assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * + reg_type_size[insn->bits1.da1.dest_reg_type] == 2); + } + + return; + } + + if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.file == BRW_ARF_NULL) + return; + + hstride = hstride_for_reg[reg.hstride]; + + if (reg.vstride == 0xf) { + vstride = -1; + } else { + vstride = vstride_for_reg[reg.vstride]; + } + + width = width_for_reg[reg.width]; + + execsize = execsize_for_reg[insn->header.execution_size]; + + /* Restrictions from 3.3.10: Register Region Restrictions. */ + /* 3. */ + assert(execsize >= width); + + /* 4. */ + if (execsize == width && hstride != 0) { + assert(vstride == -1 || vstride == width * hstride); + } + + /* 5. */ + if (execsize == width && hstride == 0) { + /* no restriction on vstride. */ + } + + /* 6. */ + if (width == 1) { + assert(hstride == 0); + } + + /* 7. */ + if (execsize == 1 && width == 1) { + assert(hstride == 0); + assert(vstride == 0); + } + + /* 8. */ + if (vstride == 0 && hstride == 0) { + assert(width == 1); + } + + /* 10. Check destination issues. */ +} + +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) +{ + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + + gen7_convert_mrf_to_grf(p, ®); + + validate_reg(insn, reg); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } else { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + assert(reg.nr < 128); + + gen7_convert_mrf_to_grf(p, ®); + + validate_reg(insn, reg); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + +/** + * Set the Message Descriptor and Extended Message Descriptor fields + * for SEND messages. + * + * \note This zeroes out the Function Control bits, so it must be called + * \b before filling out any message-specific data. Callers can + * choose not to fill in irrelevant bits; they will be zero. + */ +static void +brw_set_message_descriptor(struct brw_compile *p, + struct brw_instruction *inst, + enum brw_message_target sfid, + unsigned msg_length, + unsigned response_length, + bool header_present, + bool end_of_thread) +{ + brw_set_src1(p, inst, brw_imm_d(0)); + + if (p->gen >= 050) { + inst->bits3.generic_gen5.header_present = header_present; + inst->bits3.generic_gen5.response_length = response_length; + inst->bits3.generic_gen5.msg_length = msg_length; + inst->bits3.generic_gen5.end_of_thread = end_of_thread; + + if (p->gen >= 060) { + /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ + inst->header.destreg__conditionalmod = sfid; + } else { + /* Set Extended Message Descriptor (ex_desc) */ + inst->bits2.send_gen5.sfid = sfid; + inst->bits2.send_gen5.end_of_thread = end_of_thread; + } + } else { + inst->bits3.generic.response_length = response_length; + inst->bits3.generic.msg_length = msg_length; + inst->bits3.generic.msg_target = sfid; + inst->bits3.generic.end_of_thread = end_of_thread; + } +} + + +static void brw_set_math_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned function, + unsigned integer_type, + bool low_precision, + bool saturate, + unsigned dataType) +{ + unsigned msg_length; + unsigned response_length; + + /* Infer message length from the function */ + switch (function) { + case BRW_MATH_FUNCTION_POW: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: + case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: + msg_length = 2; + break; + default: + msg_length = 1; + break; + } + + /* Infer response length from the function */ + switch (function) { + case BRW_MATH_FUNCTION_SINCOS: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: + response_length = 2; + break; + default: + response_length = 1; + break; + } + + brw_set_message_descriptor(p, insn, BRW_SFID_MATH, + msg_length, response_length, + false, false); + if (p->gen == 050) { + insn->bits3.math_gen5.function = function; + insn->bits3.math_gen5.int_type = integer_type; + insn->bits3.math_gen5.precision = low_precision; + insn->bits3.math_gen5.saturate = saturate; + insn->bits3.math_gen5.data_type = dataType; + insn->bits3.math_gen5.snapshot = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + } +} + +static void brw_set_ff_sync_message(struct brw_compile *p, + struct brw_instruction *insn, + bool allocate, + unsigned response_length, + bool end_of_thread) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_URB, + 1, response_length, + true, end_of_thread); + insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ + insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ +} + +static void brw_set_urb_message(struct brw_compile *p, + struct brw_instruction *insn, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool end_of_thread, + bool complete, + unsigned offset, + unsigned swizzle_control) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_URB, + msg_length, response_length, true, end_of_thread); + if (p->gen >= 070) { + insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ + insn->bits3.urb_gen7.offset = offset; + assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); + insn->bits3.urb_gen7.swizzle_control = swizzle_control; + /* per_slot_offset = 0 makes it ignore offsets in message header */ + insn->bits3.urb_gen7.per_slot_offset = 0; + insn->bits3.urb_gen7.complete = complete; + } else if (p->gen >= 050) { + insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; /* ? */ + insn->bits3.urb_gen5.complete = complete; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + } +} + +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + bool header_present, + bool last_render_target, + unsigned response_length, + bool end_of_thread, + bool send_commit_msg) +{ + unsigned sfid; + + if (p->gen >= 070) { + /* Use the Render Cache for RT writes; otherwise use the Data Cache */ + if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + else + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + } else if (p->gen >= 060) { + /* Use the render cache for all write messages. */ + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + } else { + sfid = BRW_SFID_DATAPORT_WRITE; + } + + brw_set_message_descriptor(p, insn, sfid, + msg_length, response_length, + header_present, end_of_thread); + + if (p->gen >= 070) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.last_render_target = last_render_target; + insn->bits3.gen7_dp.msg_type = msg_type; + } else if (p->gen >= 060) { + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.last_render_target = last_render_target; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; + } else if (p->gen >= 050) { + insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_write_gen5.msg_control = msg_control; + insn->bits3.dp_write_gen5.last_render_target = last_render_target; + insn->bits3.dp_write_gen5.msg_type = msg_type; + insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.last_render_target = last_render_target; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = send_commit_msg; + } +} + +void +brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length) +{ + unsigned sfid; + + if (p->gen >= 070) { + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + } else if (p->gen >= 060) { + if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + else + sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; + } else { + sfid = BRW_SFID_DATAPORT_READ; + } + + brw_set_message_descriptor(p, insn, sfid, + msg_length, response_length, + true, false); + + if (p->gen >= 070) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.last_render_target = 0; + insn->bits3.gen7_dp.msg_type = msg_type; + } else if (p->gen >= 060) { + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.last_render_target = 0; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = 0; + } else if (p->gen >= 050) { + insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_read_gen5.msg_control = msg_control; + insn->bits3.dp_read_gen5.msg_type = msg_type; + insn->bits3.dp_read_gen5.target_cache = target_cache; + } else if (p->gen >= 045) { + insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ + insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ + insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + } +} + +static void brw_set_sampler_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned sampler, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + bool header_present, + unsigned simd_mode) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, + msg_length, response_length, + header_present, false); + + if (p->gen >= 070) { + insn->bits3.sampler_gen7.binding_table_index = binding_table_index; + insn->bits3.sampler_gen7.sampler = sampler; + insn->bits3.sampler_gen7.msg_type = msg_type; + insn->bits3.sampler_gen7.simd_mode = simd_mode; + } else if (p->gen >= 050) { + insn->bits3.sampler_gen5.binding_table_index = binding_table_index; + insn->bits3.sampler_gen5.sampler = sampler; + insn->bits3.sampler_gen5.msg_type = msg_type; + insn->bits3.sampler_gen5.simd_mode = simd_mode; + } else if (p->gen >= 045) { + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + } +} + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src1(p, insn, brw_imm_ud(0x0)); +} + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +static void +push_if_stack(struct brw_compile *p, struct brw_instruction *inst) +{ + p->if_stack[p->if_stack_depth] = inst; + + p->if_stack_depth++; + if (p->if_stack_array_size <= p->if_stack_depth) { + p->if_stack_array_size *= 2; + p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size); + } +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + */ +struct brw_instruction * +brw_IF(struct brw_compile *p, unsigned execute_size) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_IF); + + /* Override the defaults for this instruction: */ + if (p->gen < 060) { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (p->gen < 070) { + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + } else { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; + } + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + push_if_stack(p, insn); + return insn; +} + +/* This function is only used for gen6-style IF instructions with an + * embedded comparison (conditional modifier). It is not used on gen7. + */ +struct brw_instruction * +gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_IF); + + brw_set_dest(p, insn, brw_imm_w(0)); + if (p->compressed) { + insn->header.execution_size = BRW_EXECUTE_16; + } else { + insn->header.execution_size = BRW_EXECUTE_8; + } + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + + assert(insn->header.compression_control == BRW_COMPRESSION_NONE); + assert(insn->header.predicate_control == BRW_PREDICATE_NONE); + insn->header.destreg__conditionalmod = conditional; + + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + push_if_stack(p, insn); + return insn; +} + +/** + * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. + */ +static void +convert_IF_ELSE_to_ADD(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst) +{ + /* The next instruction (where the ENDIF would be, if it existed) */ + struct brw_instruction *next_inst = &p->store[p->nr_insn]; + + assert(p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + assert(if_inst->header.execution_size == BRW_EXECUTE_1); + + /* Convert IF to an ADD instruction that moves the instruction pointer + * to the first instruction of the ELSE block. If there is no ELSE + * block, point to where ENDIF would be. Reverse the predicate. + * + * There's no need to execute an ENDIF since we don't need to do any + * stack operations, and if we're currently executing, we just want to + * continue normally. + */ + if_inst->header.opcode = BRW_OPCODE_ADD; + if_inst->header.predicate_inverse = 1; + + if (else_inst != NULL) { + /* Convert ELSE to an ADD instruction that points where the ENDIF + * would be. + */ + else_inst->header.opcode = BRW_OPCODE_ADD; + + if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; + else_inst->bits3.ud = (next_inst - else_inst) * 16; + } else { + if_inst->bits3.ud = (next_inst - if_inst) * 16; + } +} + +/** + * Patch IF and ELSE instructions with appropriate jump targets. + */ +static void +patch_IF_ELSE(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst, + struct brw_instruction *endif_inst) +{ + unsigned br = 1; + + assert(!p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(endif_inst != NULL); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + + /* Jump count is for 64bit data chunk each, so one 128bit instruction + * requires 2 chunks. + */ + if (p->gen >= 050) + br = 2; + + assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); + endif_inst->header.execution_size = if_inst->header.execution_size; + + if (else_inst == NULL) { + /* Patch IF -> ENDIF */ + if (p->gen < 060) { + /* Turn it into an IFF, which means no mask stack operations for + * all-false and jumping past the ENDIF. + */ + if_inst->header.opcode = BRW_OPCODE_IFF; + if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (p->gen < 070) { + /* As of gen6, there is no IFF and IF must point to the ENDIF. */ + if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); + } else { + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); + } + } else { + else_inst->header.execution_size = if_inst->header.execution_size; + + /* Patch IF -> ELSE */ + if (p->gen < 060) { + if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (p->gen <= 070) { + if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); + } + + /* Patch ELSE -> ENDIF */ + if (p->gen < 060) { + /* BRW_OPCODE_ELSE pre-gen6 should point just past the + * matching ENDIF. + */ + else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); + else_inst->bits3.if_else.pop_count = 1; + else_inst->bits3.if_else.pad0 = 0; + } else if (p->gen < 070) { + /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ + else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); + } else { + /* The IF instruction's JIP should point just past the ELSE */ + if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); + /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); + } + } +} + +void +brw_ELSE(struct brw_compile *p) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_ELSE); + + if (p->gen < 060) { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (p->gen < 070) { + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + } else { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; + } + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + push_if_stack(p, insn); +} + +void +brw_ENDIF(struct brw_compile *p) +{ + struct brw_instruction *insn; + struct brw_instruction *else_inst = NULL; + struct brw_instruction *if_inst = NULL; + + /* Pop the IF and (optional) ELSE instructions from the stack */ + p->if_stack_depth--; + if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { + else_inst = p->if_stack[p->if_stack_depth]; + p->if_stack_depth--; + } + if_inst = p->if_stack[p->if_stack_depth]; + + if (p->single_program_flow) { + /* ENDIF is useless; don't bother emitting it. */ + convert_IF_ELSE_to_ADD(p, if_inst, else_inst); + return; + } + + insn = brw_next_insn(p, BRW_OPCODE_ENDIF); + + if (p->gen < 060) { + brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (p->gen < 070) { + brw_set_dest(p, insn, brw_imm_w(0)); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + } else { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + } + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; + + /* Also pop item off the stack in the endif instruction: */ + if (p->gen < 060) { + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } else if (p->gen < 070) { + insn->bits1.branch_gen6.jump_count = 2; + } else { + insn->bits3.break_cont.jip = 2; + } + patch_IF_ELSE(p, if_inst, else_inst, insn); +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_BREAK); + if (p->gen >= 060) { + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *gen6_CONT(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) +{ + struct brw_instruction *insn; + insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + return insn; +} + +/* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. + */ +struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) +{ + if (p->gen >= 060 || p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(p, insn, brw_null_reg()); + brw_set_src0(p, insn, brw_null_reg()); + brw_set_src1(p, insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + + return insn; + } +} + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + unsigned br = 1; + + if (p->gen >= 050) + br = 2; + + if (p->gen >= 070) { + insn = brw_next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(p, insn, __retype_d(brw_null_reg())); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = br * (do_insn - insn); + + insn->header.execution_size = BRW_EXECUTE_8; + } else if (p->gen >= 060) { + insn = brw_next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(p, insn, __retype_d(brw_null_reg())); + brw_set_src1(p, insn, __retype_d(brw_null_reg())); + + insn->header.execution_size = BRW_EXECUTE_8; + } else { + if (p->single_program_flow) { + insn = brw_next_insn(p, BRW_OPCODE_ADD); + + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = brw_next_insn(p, BRW_OPCODE_WHILE); + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0)); + + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + unsigned jmpi = 1; + + if (p->gen >= 050) + jmpi = 2; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditionalmod = conditional; + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + +/* Issue 'wait' instruction for n1, host could program MMIO + to wake up thread. */ +void brw_WAIT(struct brw_compile *p) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT); + struct brw_reg src = brw_notification_1_reg(); + + brw_set_dest(p, insn, src); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + insn->header.execution_size = 0; /* must */ + insn->header.predicate_control = 0; + insn->header.compression_control = 0; +} + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/** Extended math function, float[8]. + */ +void brw_math(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision) +{ + if (p->gen >= 060) { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src.type == BRW_REGISTER_TYPE_F); + } + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + } else { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, function, + src.type == BRW_REGISTER_TYPE_D, + precision, + saturate, + data_type); + } +} + +/** Extended math function, float[8]. + */ +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src0.type == BRW_REGISTER_TYPE_F); + assert(src1.type == BRW_REGISTER_TYPE_F); + } + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); +} + +/** + * Extended math function, float[16]. + * Use 2 send instructions. + */ +void brw_math_16(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision) +{ + struct brw_instruction *insn; + + if (p->gen >= 060) { + insn = brw_next_insn(p, BRW_OPCODE_MATH); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + return; + } + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditionalmod = msg_reg_nr+1; + + brw_set_dest(p, insn, __offset(dest,1)); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + +/** + * Write a block of OWORDs (half a GRF each) from the scratch buffer, + * using a constant offset per channel. + * + * The offset must be aligned to oword size (16 bytes). Used for + * register spilling. + */ +void brw_oword_block_write_scratch(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + unsigned offset) +{ + uint32_t msg_control, msg_type; + int mlen; + + if (p->gen >= 060) + offset /= 16; + + mrf = __retype_ud(mrf); + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + mlen = 2; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + mlen = 3; + } + + /* Set up the message header. This is g0, with g0.2 filled with + * the offset. We don't want to leave our offset around in g0 or + * it'll screw up texture samples, so set it up inside the message + * reg. + */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), + brw_imm_ud(offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_reg dest; + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + int send_commit_msg; + struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0)); + + if (insn->header.compression_control != BRW_COMPRESSION_NONE) { + insn->header.compression_control = BRW_COMPRESSION_NONE; + src_header = vec16(src_header); + } + assert(insn->header.predicate_control == BRW_PREDICATE_NONE); + insn->header.destreg__conditionalmod = mrf.nr; + + /* Until gen6, writes followed by reads from the same location + * are not guaranteed to be ordered unless write_commit is set. + * If set, then a no-op write is issued to the destination + * register to set a dependency, and a read from the destination + * can be used to ensure the ordering. + * + * For gen6, only writes between different threads need ordering + * protection. Our use of DP writes is all about register + * spilling within a thread. + */ + if (p->gen >= 060) { + dest = __retype_uw(vec16(brw_null_reg())); + send_commit_msg = 0; + } else { + dest = src_header; + send_commit_msg = 1; + } + + brw_set_dest(p, insn, dest); + if (p->gen >= 060) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + if (p->gen >= 060) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + + brw_set_dp_write_message(p, + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + msg_type, + mlen, + true, /* header_present */ + 0, /* pixel scoreboard */ + send_commit_msg, /* response_length */ + 0, /* eot */ + send_commit_msg); + } +} + + +/** + * Read a block of owords (half a GRF each) from the scratch buffer + * using a constant index per channel. + * + * Offset must be aligned to oword size (16 bytes). Used for register + * spilling. + */ +void +brw_oword_block_read_scratch(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + unsigned offset) +{ + uint32_t msg_control; + int rlen; + + if (p->gen >= 060) + offset /= 16; + + mrf = __retype_ud(mrf); + dest = __retype_uw(dest); + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + rlen = 1; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + rlen = 2; + } + + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), + brw_imm_ud(offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); + + assert(insn->header.predicate_control == 0); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = mrf.nr; + + brw_set_dest(p, insn, dest); /* UW? */ + if (p->gen >= 060) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 1, /* msg_length */ + rlen); + } +} + +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + */ +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index) +{ + struct brw_instruction *insn; + + /* On newer hardware, offset is in units of owords. */ + if (p->gen >= 060) + offset /= 16; + + mrf = __retype_ud(mrf); + + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), + brw_imm_ud(offset)); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = mrf.nr; + + /* cast dest to a uword[8] vector */ + dest = __retype_uw(vec8(dest)); + + brw_set_dest(p, insn, dest); + if (p->gen >= 060) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, /* msg_length */ + 1); /* response_length (1 reg, 2 owords!) */ + + brw_pop_insn_state(p); +} + +/** + * Read a set of dwords from the data port Data Cache (const buffer). + * + * Location (in buffer) appears as UD offsets in the register after + * the provided mrf header reg. + */ +void brw_dword_scattered_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t bind_table_index) +{ + struct brw_instruction *insn; + + mrf = __retype_ud(mrf); + + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); + brw_pop_insn_state(p); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = mrf.nr; + + /* cast dest to a uword[8] vector */ + dest = __retype_uw(vec8(dest)); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, brw_null_reg()); + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, + BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 2, /* msg_length */ + 1); /* response_length */ +} + +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + unsigned location, + unsigned bind_table_index) +{ + struct brw_instruction *insn; + unsigned msg_reg_nr = 1; + + if (p->gen >= 060) + location /= 16; + + /* Setup MRF[1] with location/offset into const buffer */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)), + brw_imm_ud(location)); + brw_pop_insn_state(p); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + brw_set_dest(p, insn, dest); + if (p->gen >= 060) { + brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + bind_table_index, + 0, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, /* msg_length */ + 1); /* response_length (1 Oword) */ +} + +/** + * Read a float[4] constant per vertex from VS constant buffer, with + * relative addressing. + */ +void brw_dp_READ_4_vs_relative(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg addr_reg, + unsigned offset, + unsigned bind_table_index) +{ + struct brw_reg src = brw_vec8_grf(0, 0); + struct brw_instruction *insn; + int msg_type; + + /* Setup MRF[1] with offset into const buffer */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* M1.0 is block offset 0, M1.4 is block offset 1, all other + * fields ignored. + */ + brw_ADD(p, __retype_d(brw_message_reg(1)), + addr_reg, brw_imm_d(offset)); + brw_pop_insn_state(p); + + gen6_resolve_implied_move(p, &src, 0); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = 0; + insn->header.mask_control = BRW_MASK_DISABLE; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + + if (p->gen >= 060) + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else if (p->gen >= 045) + msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else + msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 2, /* msg_length */ + 1); /* response_length */ +} + +void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned msg_control, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + bool eot, + bool header_present) +{ + struct brw_instruction *insn; + unsigned msg_type; + struct brw_reg dest; + + if (dispatch_width == 16) + dest = __retype_uw(vec16(brw_null_reg())); + else + dest = __retype_uw(vec8(brw_null_reg())); + + if (p->gen >= 060 && binding_table_index == 0) { + insn = brw_next_insn(p, BRW_OPCODE_SENDC); + } else { + insn = brw_next_insn(p, BRW_OPCODE_SEND); + } + /* The execution mask is ignored for render target writes. */ + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->gen >= 060) { + /* headerless version, just submit color payload */ + src0 = brw_message_reg(msg_reg_nr); + + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } else { + insn->header.destreg__conditionalmod = msg_reg_nr; + + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_dp_write_message(p, + insn, + binding_table_index, + msg_control, + msg_type, + msg_length, + header_present, + eot, + response_length, + eot, + 0 /* send_commit_msg */); +} + +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + bool header_present, + unsigned simd_mode) +{ + assert(writemask); + + if (p->gen < 050 || writemask != WRITEMASK_XYZW) { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + writemask = ~writemask & WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0))); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12)); + + brw_pop_insn_state(p); + + src0 = __retype_uw(brw_null_reg()); + } + + { + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + if (p->gen < 060) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_sampler_message(p, insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + header_present, + simd_mode); + } +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool eot, + bool writes_complete, + unsigned offset, + unsigned swizzle) +{ + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + if (p->gen >= 070) { + /* Enable Channel Masks in the URB_WRITE_HWORD message header */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)), + __retype_ud(brw_vec1_grf(0, 5)), + brw_imm_ud(0xff00)); + brw_pop_insn_state(p); + } + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < BRW_MAX_MRF); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + + if (p->gen <= 060) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_urb_message(p, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + int ip; + int br = 2; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count + : insn->bits3.break_cont.jip; + if (ip + jip / br <= start) + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK and CONT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + int ip; + int br = 2; + + if (p->gen <= 060) + return; + + for (ip = 0; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + /* Gen7 UIP points to WHILE; Gen6 points just after it */ + insn->bits3.break_cont.uip = + br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0)); + break; + case BRW_OPCODE_CONTINUE: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); + + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + unsigned response_length, + bool eot) +{ + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + + if (p->gen < 060) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_ff_sync_message(p, + insn, + allocate, + response_length, + eot); +} diff --git a/drivers/video/Intel-2D/brw/brw_wm.c b/drivers/video/Intel-2D/brw/brw_wm.c new file mode 100644 index 0000000000..8b73abee00 --- /dev/null +++ b/drivers/video/Intel-2D/brw/brw_wm.c @@ -0,0 +1,681 @@ +#include "brw.h" + +#define X16 8 +#define Y16 10 + +static void brw_wm_xy(struct brw_compile *p, int dw) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = __retype_uw(r1); + struct brw_reg x_uw, y_uw; + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + if (dw == 16) { + x_uw = brw_uw16_grf(30, 0); + y_uw = brw_uw16_grf(28, 0); + } else { + x_uw = brw_uw8_grf(30, 0); + y_uw = brw_uw8_grf(28, 0); + } + + brw_ADD(p, + x_uw, + __stride(__suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + brw_ADD(p, + y_uw, + __stride(__suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); + brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); +} + +static void brw_wm_affine_st(struct brw_compile *p, int dw, + int channel, int msg) +{ + int uv; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + uv = p->gen >= 060 ? 6 : 3; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + uv = p->gen >= 060 ? 4 : 3; + } + uv += 2*channel; + + msg++; + if (p->gen >= 060) { + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv, 0), + brw_vec8_grf(2, 0)); + msg += dw/8; + + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv, 4), + brw_vec8_grf(2, 0)); + } else { + struct brw_reg r = brw_vec1_grf(uv, 0); + + brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); + msg += dw/8; + + brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); + } +} + +static inline unsigned simd(int dw) +{ + return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8; +} + +static inline struct brw_reg sample_result(int dw, int result) +{ + return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0, + BRW_REGISTER_TYPE_UW, + dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, + dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static int brw_wm_sample(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + struct brw_reg src0; + bool header; + int len; + + len = dw == 16 ? 4 : 2; + if (p->gen >= 060) { + header = false; + src0 = brw_message_reg(++msg); + } else { + header = true; + src0 = brw_vec8_grf(0, 0); + } + + brw_SAMPLE(p, sample_result(dw, result), msg, src0, + channel+1, channel, WRITEMASK_XYZW, 0, + 2*len, len+header, header, simd(dw)); + return result; +} + +static int brw_wm_sample__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + struct brw_reg src0; + int mlen, rlen; + + if (dw == 8) { + /* SIMD8 sample return is not masked */ + mlen = 3; + rlen = 4; + } else { + mlen = 5; + rlen = 2; + } + + if (p->gen >= 060) + src0 = brw_message_reg(msg); + else + src0 = brw_vec8_grf(0, 0); + + brw_SAMPLE(p, sample_result(dw, result), msg, src0, + channel+1, channel, WRITEMASK_W, 0, + rlen, mlen, true, simd(dw)); + + if (dw == 8) + result += 3; + + return result; +} + +static int brw_wm_affine(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_affine_st(p, dw, channel, msg); + return brw_wm_sample(p, dw, channel, msg, result); +} + +static int brw_wm_affine__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_affine_st(p, dw, channel, msg); + return brw_wm_sample__alpha(p, dw, channel, msg, result); +} + +static inline struct brw_reg null_result(int dw) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0, + BRW_REGISTER_TYPE_UW, + dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, + dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static void brw_fb_write(struct brw_compile *p, int dw) +{ + struct brw_instruction *insn; + unsigned msg_control, msg_type, msg_len; + struct brw_reg src0; + bool header; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; + msg_len = 8; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; + msg_len = 4; + } + + if (p->gen < 060) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + + msg_len += 2; + } + + /* The execution mask is ignored for render target writes. */ + insn = brw_next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->gen >= 060) { + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + src0 = brw_message_reg(2); + header = false; + } else { + insn->header.destreg__conditionalmod = 0; + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + src0 = __retype_uw(brw_vec8_grf(0, 0)); + header = true; + } + + brw_set_dest(p, insn, null_result(dw)); + brw_set_src0(p, insn, src0); + brw_set_dp_write_message(p, insn, 0, + msg_control, msg_type, msg_len, + header, true, 0, true, false); +} + +static void brw_wm_write(struct brw_compile *p, int dw, int src) +{ + int n; + + if (dw == 8 && p->gen >= 060) { + /* XXX pixel execution mask? */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0)); + brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0)); + brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); + brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0)); + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 060) { + brw_MOV(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0)); + } else if (p->gen >= 045 && dw == 16) { + brw_MOV(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__mask(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 060) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src+0, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src+1, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src+2, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src+3, 0), + brw_vec8_grf(mask, 0)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 060) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + } else if (p->gen >= 045 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0), + brw_vec8_grf(mask+1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__opacity(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 060) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src+0, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src+1, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src+2, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src+3, 0), + brw_vec1_grf(mask, 3)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 060) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + } else if (p->gen >= 045 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0), + brw_vec1_grf(mask, 3)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +static void brw_wm_write__mask_ca(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 060) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src + 0, 0), + brw_vec8_grf(mask + 0, 0)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src + 1, 0), + brw_vec8_grf(mask + 1, 0)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src + 2, 0), + brw_vec8_grf(mask + 2, 0)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src + 3, 0), + brw_vec8_grf(mask + 3, 0)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 060) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + } else if (p->gen >= 045 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec8_grf(mask + 2*n, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n + 1, 0), + brw_vec8_grf(mask + 2*n + 1, 0)); + } + } + } + +done: + brw_fb_write(p, dw); +} + +bool +brw_wm_kernel__affine(struct brw_compile *p, int dispatch) +{ + if (p->gen < 060) + brw_wm_xy(p, dispatch); + brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12)); + + return true; +} + +bool +brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 060) + brw_wm_xy(p, dispatch); + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20); + brw_wm_write__mask(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 060) + brw_wm_xy(p, dispatch); + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + mask = brw_wm_affine(p, dispatch, 1, 6, 20); + brw_wm_write__mask_ca(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 060) + brw_wm_xy(p, dispatch); + + src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12); + mask = brw_wm_affine(p, dispatch, 1, 6, 16); + brw_wm_write__mask(p, dispatch, mask, src); + + return true; +} + +/* Projective variants */ + +static void brw_wm_projective_st(struct brw_compile *p, int dw, + int channel, int msg) +{ + int uv; + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + uv = p->gen >= 060 ? 6 : 3; + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + uv = p->gen >= 060 ? 4 : 3; + } + uv += 2*channel; + + msg++; + if (p->gen >= 060) { + /* First compute 1/z */ + brw_PLN(p, + brw_message_reg(msg), + brw_vec1_grf(uv+1, 0), + brw_vec8_grf(2, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_PLN(p, + brw_vec8_grf(28, 0), + brw_vec1_grf(uv, 0), + brw_vec8_grf(2, 0)); + brw_MUL(p, + brw_message_reg(msg), + brw_vec8_grf(28, 0), + brw_vec8_grf(30, 0)); + msg += dw/8; + + brw_PLN(p, + brw_vec8_grf(28, 0), + brw_vec1_grf(uv, 0), + brw_vec8_grf(4, 0)); + brw_MUL(p, + brw_message_reg(msg), + brw_vec8_grf(28, 0), + brw_vec8_grf(30, 0)); + } else { + struct brw_reg r = brw_vec1_grf(uv, 0); + + /* First compute 1/z */ + brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else + brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + + /* Now compute the output s,t values */ + brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); + brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); + msg += dw/8; + + brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); + brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); + brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); + } +} + +static int brw_wm_projective(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_projective_st(p, dw, channel, msg); + return brw_wm_sample(p, dw, channel, msg, result); +} + +static int brw_wm_projective__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) +{ + brw_wm_projective_st(p, dw, channel, msg); + return brw_wm_sample__alpha(p, dw, channel, msg, result); +} + +bool +brw_wm_kernel__projective(struct brw_compile *p, int dispatch) +{ + if (p->gen < 060) + brw_wm_xy(p, dispatch); + brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12)); + + return true; +} + +bool +brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 060) + brw_wm_xy(p, dispatch); + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20); + brw_wm_write__mask(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 060) + brw_wm_xy(p, dispatch); + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + mask = brw_wm_projective(p, dispatch, 1, 6, 20); + brw_wm_write__mask_ca(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 060) + brw_wm_xy(p, dispatch); + + src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12); + mask = brw_wm_projective(p, dispatch, 1, 6, 16); + brw_wm_write__mask(p, dispatch, mask, src); + + return true; +} + +bool +brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 060) { + brw_wm_xy(p, dispatch); + mask = 5; + } else + mask = dispatch == 16 ? 8 : 6; + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + brw_wm_write__opacity(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 060) { + brw_wm_xy(p, dispatch); + mask = 5; + } else + mask = dispatch == 16 ? 8 : 6; + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + brw_wm_write__opacity(p, dispatch, src, mask); + + return true; +} diff --git a/drivers/video/Intel-2D/compiler.h b/drivers/video/Intel-2D/compiler.h new file mode 100644 index 0000000000..1d501185b2 --- /dev/null +++ b/drivers/video/Intel-2D/compiler.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifndef _SNA_COMPILER_H_ +#define _SNA_COMPILER_H_ + +#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) +#define likely(expr) (__builtin_expect (!!(expr), 1)) +#define unlikely(expr) (__builtin_expect (!!(expr), 0)) +#define noinline __attribute__((noinline)) +#define force_inline inline __attribute__((always_inline)) +#define fastcall __attribute__((regparm(3))) +#define must_check __attribute__((warn_unused_result)) +#define constant __attribute__((const)) +#define pure __attribute__((pure)) +#define __packed__ __attribute__((__packed__)) +#else +#define likely(expr) (expr) +#define unlikely(expr) (expr) +#define noinline +#define force_inline +#define fastcall +#define must_check +#define constant +#define pure +#define __packed__ +#endif + +#ifdef HAVE_VALGRIND +#define VG(x) x +#else +#define VG(x) +#endif + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +#define COMPILE_TIME_ASSERT(E) ((void)sizeof(char[1 - 2*!(E)])) + +#endif /* _SNA_COMPILER_H_ */ diff --git a/drivers/video/Intel-2D/drm.h b/drivers/video/Intel-2D/drm.h new file mode 100644 index 0000000000..87acad348f --- /dev/null +++ b/drivers/video/Intel-2D/drm.h @@ -0,0 +1,825 @@ +/** + * \file drm.h + * Header for the Direct Rendering Manager + * + * \author Rickard E. (Rik) Faith + * + * \par Acknowledgments: + * Dec 1999, Richard Henderson , move to generic \c cmpxchg. + */ + +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DRM_H_ +#define _DRM_H_ + +#include +//#include + +typedef int8_t __s8; +typedef uint8_t __u8; +typedef int16_t __s16; +typedef uint16_t __u16; +typedef int32_t __s32; +typedef uint32_t __u32; +typedef int64_t __s64; +typedef uint64_t __u64; +typedef unsigned int drm_handle_t; + + +#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ +#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ +#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ +#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ + +#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ +#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ +#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) +#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) +#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) + +typedef unsigned int drm_context_t; +typedef unsigned int drm_drawable_t; +typedef unsigned int drm_magic_t; + +/** + * Cliprect. + * + * \warning: If you change this structure, make sure you change + * XF86DRIClipRectRec in the server as well + * + * \note KW: Actually it's illegal to change either for + * backwards-compatibility reasons. + */ +struct drm_clip_rect { + unsigned short x1; + unsigned short y1; + unsigned short x2; + unsigned short y2; +}; + +/** + * Drawable information. + */ +struct drm_drawable_info { + unsigned int num_rects; + struct drm_clip_rect *rects; +}; + +/** + * Texture region, + */ +struct drm_tex_region { + unsigned char next; + unsigned char prev; + unsigned char in_use; + unsigned char padding; + unsigned int age; +}; + +/** + * Hardware lock. + * + * The lock structure is a simple cache-line aligned integer. To avoid + * processor bus contention on a multiprocessor system, there should not be any + * other data stored in the same cache line. + */ +struct drm_hw_lock { + __volatile__ unsigned int lock; /**< lock variable */ + char padding[60]; /**< Pad to cache line */ +}; + +/** + * DRM_IOCTL_VERSION ioctl argument type. + * + * \sa drmGetVersion(). + */ +struct drm_version { + int version_major; /**< Major version */ + int version_minor; /**< Minor version */ + int version_patchlevel; /**< Patch level */ + size_t name_len; /**< Length of name buffer */ + char *name; /**< Name of driver */ + size_t date_len; /**< Length of date buffer */ + char *date; /**< User-space buffer to hold date */ + size_t desc_len; /**< Length of desc buffer */ + char *desc; /**< User-space buffer to hold desc */ +}; + +/** + * DRM_IOCTL_GET_UNIQUE ioctl argument type. + * + * \sa drmGetBusid() and drmSetBusId(). + */ +struct drm_unique { + size_t unique_len; /**< Length of unique */ + char *unique; /**< Unique name for driver instantiation */ +}; + +struct drm_list { + int count; /**< Length of user-space structures */ + struct drm_version *version; +}; + +struct drm_block { + int unused; +}; + +/** + * DRM_IOCTL_CONTROL ioctl argument type. + * + * \sa drmCtlInstHandler() and drmCtlUninstHandler(). + */ +struct drm_control { + enum { + DRM_ADD_COMMAND, + DRM_RM_COMMAND, + DRM_INST_HANDLER, + DRM_UNINST_HANDLER + } func; + int irq; +}; + +/** + * Type of memory to map. + */ +enum drm_map_type { + _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */ + _DRM_REGISTERS = 1, /**< no caching, no core dump */ + _DRM_SHM = 2, /**< shared, cached */ + _DRM_AGP = 3, /**< AGP/GART */ + _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ + _DRM_CONSISTENT = 5, /**< Consistent memory for PCI DMA */ + _DRM_GEM = 6, /**< GEM object */ +}; + +/** + * Memory mapping flags. + */ +enum drm_map_flags { + _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */ + _DRM_READ_ONLY = 0x02, + _DRM_LOCKED = 0x04, /**< shared, cached, locked */ + _DRM_KERNEL = 0x08, /**< kernel requires access */ + _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */ + _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */ + _DRM_REMOVABLE = 0x40, /**< Removable mapping */ + _DRM_DRIVER = 0x80 /**< Managed by driver */ +}; + +struct drm_ctx_priv_map { + unsigned int ctx_id; /**< Context requesting private mapping */ + void *handle; /**< Handle of map */ +}; + +/** + * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls + * argument type. + * + * \sa drmAddMap(). + */ +struct drm_map { + unsigned long offset; /**< Requested physical address (0 for SAREA)*/ + unsigned long size; /**< Requested physical size (bytes) */ + enum drm_map_type type; /**< Type of memory to map */ + enum drm_map_flags flags; /**< Flags */ + void *handle; /**< User-space: "Handle" to pass to mmap() */ + /**< Kernel-space: kernel-virtual address */ + int mtrr; /**< MTRR slot used */ + /* Private data */ +}; + +/** + * DRM_IOCTL_GET_CLIENT ioctl argument type. + */ +struct drm_client { + int idx; /**< Which client desired? */ + int auth; /**< Is client authenticated? */ + unsigned long pid; /**< Process ID */ + unsigned long uid; /**< User ID */ + unsigned long magic; /**< Magic */ + unsigned long iocs; /**< Ioctl count */ +}; + +enum drm_stat_type { + _DRM_STAT_LOCK, + _DRM_STAT_OPENS, + _DRM_STAT_CLOSES, + _DRM_STAT_IOCTLS, + _DRM_STAT_LOCKS, + _DRM_STAT_UNLOCKS, + _DRM_STAT_VALUE, /**< Generic value */ + _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */ + _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */ + + _DRM_STAT_IRQ, /**< IRQ */ + _DRM_STAT_PRIMARY, /**< Primary DMA bytes */ + _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */ + _DRM_STAT_DMA, /**< DMA */ + _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */ + _DRM_STAT_MISSED /**< Missed DMA opportunity */ + /* Add to the *END* of the list */ +}; + +/** + * DRM_IOCTL_GET_STATS ioctl argument type. + */ +struct drm_stats { + unsigned long count; + struct { + unsigned long value; + enum drm_stat_type type; + } data[15]; +}; + +/** + * Hardware locking flags. + */ +enum drm_lock_flags { + _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */ + _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */ + _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */ + _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */ + /* These *HALT* flags aren't supported yet + -- they will be used to support the + full-screen DGA-like mode. */ + _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */ + _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ +}; + +/** + * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. + * + * \sa drmGetLock() and drmUnlock(). + */ +struct drm_lock { + int context; + enum drm_lock_flags flags; +}; + +/** + * DMA flags + * + * \warning + * These values \e must match xf86drm.h. + * + * \sa drm_dma. + */ +enum drm_dma_flags { + /* Flags for DMA buffer dispatch */ + _DRM_DMA_BLOCK = 0x01, /**< + * Block until buffer dispatched. + * + * \note The buffer may not yet have + * been processed by the hardware -- + * getting a hardware lock with the + * hardware quiescent will ensure + * that the buffer has been + * processed. + */ + _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */ + _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */ + + /* Flags for DMA buffer request */ + _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */ + _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */ + _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ +}; + +/** + * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. + * + * \sa drmAddBufs(). + */ +struct drm_buf_desc { + int count; /**< Number of buffers of this size */ + int size; /**< Size in bytes */ + int low_mark; /**< Low water mark */ + int high_mark; /**< High water mark */ + enum { + _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ + _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ + _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ + _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */ + _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */ + } flags; + unsigned long agp_start; /**< + * Start address of where the AGP buffers are + * in the AGP aperture + */ +}; + +/** + * DRM_IOCTL_INFO_BUFS ioctl argument type. + */ +struct drm_buf_info { + int count; /**< Entries in list */ + struct drm_buf_desc *list; +}; + +/** + * DRM_IOCTL_FREE_BUFS ioctl argument type. + */ +struct drm_buf_free { + int count; + int *list; +}; + +/** + * Buffer information + * + * \sa drm_buf_map. + */ +struct drm_buf_pub { + int idx; /**< Index into the master buffer list */ + int total; /**< Buffer size */ + int used; /**< Amount of buffer in use (for DMA) */ + void *address; /**< Address of buffer */ +}; + +/** + * DRM_IOCTL_MAP_BUFS ioctl argument type. + */ +struct drm_buf_map { + int count; /**< Length of the buffer list */ +#ifdef __cplusplus + void *virt; +#else + void *virtual; /**< Mmap'd area in user-virtual */ +#endif + struct drm_buf_pub *list; /**< Buffer information */ +}; + +/** + * DRM_IOCTL_DMA ioctl argument type. + * + * Indices here refer to the offset into the buffer list in drm_buf_get. + * + * \sa drmDMA(). + */ +struct drm_dma { + int context; /**< Context handle */ + int send_count; /**< Number of buffers to send */ + int *send_indices; /**< List of handles to buffers */ + int *send_sizes; /**< Lengths of data to send */ + enum drm_dma_flags flags; /**< Flags */ + int request_count; /**< Number of buffers requested */ + int request_size; /**< Desired size for buffers */ + int *request_indices; /**< Buffer information */ + int *request_sizes; + int granted_count; /**< Number of buffers granted */ +}; + +enum drm_ctx_flags { + _DRM_CONTEXT_PRESERVED = 0x01, + _DRM_CONTEXT_2DONLY = 0x02 +}; + +/** + * DRM_IOCTL_ADD_CTX ioctl argument type. + * + * \sa drmCreateContext() and drmDestroyContext(). + */ +struct drm_ctx { + drm_context_t handle; + enum drm_ctx_flags flags; +}; + +/** + * DRM_IOCTL_RES_CTX ioctl argument type. + */ +struct drm_ctx_res { + int count; + struct drm_ctx *contexts; +}; + +/** + * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. + */ +struct drm_draw { + drm_drawable_t handle; +}; + +/** + * DRM_IOCTL_UPDATE_DRAW ioctl argument type. + */ +typedef enum { + DRM_DRAWABLE_CLIPRECTS, +} drm_drawable_info_type_t; + +struct drm_update_draw { + drm_drawable_t handle; + unsigned int type; + unsigned int num; + unsigned long long data; +}; + +/** + * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. + */ +struct drm_auth { + drm_magic_t magic; +}; + +/** + * DRM_IOCTL_IRQ_BUSID ioctl argument type. + * + * \sa drmGetInterruptFromBusID(). + */ +struct drm_irq_busid { + int irq; /**< IRQ number */ + int busnum; /**< bus number */ + int devnum; /**< device number */ + int funcnum; /**< function number */ +}; + +enum drm_vblank_seq_type { + _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ + _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ + _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ + _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ + _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ + _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ + _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ +}; + +#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) +#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ + _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) + +struct drm_wait_vblank_request { + enum drm_vblank_seq_type type; + unsigned int sequence; + unsigned long signal; +}; + +struct drm_wait_vblank_reply { + enum drm_vblank_seq_type type; + unsigned int sequence; + long tval_sec; + long tval_usec; +}; + +/** + * DRM_IOCTL_WAIT_VBLANK ioctl argument type. + * + * \sa drmWaitVBlank(). + */ +union drm_wait_vblank { + struct drm_wait_vblank_request request; + struct drm_wait_vblank_reply reply; +}; + +#define _DRM_PRE_MODESET 1 +#define _DRM_POST_MODESET 2 + +/** + * DRM_IOCTL_MODESET_CTL ioctl argument type + * + * \sa drmModesetCtl(). + */ +struct drm_modeset_ctl { + __u32 crtc; + __u32 cmd; +}; + +/** + * DRM_IOCTL_AGP_ENABLE ioctl argument type. + * + * \sa drmAgpEnable(). + */ +struct drm_agp_mode { + unsigned long mode; /**< AGP mode */ +}; + +/** + * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. + * + * \sa drmAgpAlloc() and drmAgpFree(). + */ +struct drm_agp_buffer { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for binding / unbinding */ + unsigned long type; /**< Type of memory to allocate */ + unsigned long physical; /**< Physical used by i810 */ +}; + +/** + * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. + * + * \sa drmAgpBind() and drmAgpUnbind(). + */ +struct drm_agp_binding { + unsigned long handle; /**< From drm_agp_buffer */ + unsigned long offset; /**< In bytes -- will round to page boundary */ +}; + +/** + * DRM_IOCTL_AGP_INFO ioctl argument type. + * + * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), + * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(), + * drmAgpVendorId() and drmAgpDeviceId(). + */ +struct drm_agp_info { + int agp_version_major; + int agp_version_minor; + unsigned long mode; + unsigned long aperture_base; /* physical address */ + unsigned long aperture_size; /* bytes */ + unsigned long memory_allowed; /* bytes */ + unsigned long memory_used; + + /* PCI information */ + unsigned short id_vendor; + unsigned short id_device; +}; + +/** + * DRM_IOCTL_SG_ALLOC ioctl argument type. + */ +struct drm_scatter_gather { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for mapping / unmapping */ +}; + +/** + * DRM_IOCTL_SET_VERSION ioctl argument type. + */ +struct drm_set_version { + int drm_di_major; + int drm_di_minor; + int drm_dd_major; + int drm_dd_minor; +}; + +/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ +struct drm_gem_close { + /** Handle of the object to be closed. */ + __u32 handle; + __u32 pad; +}; + +/** DRM_IOCTL_GEM_FLINK ioctl argument type */ +struct drm_gem_flink { + /** Handle for the object being named */ + __u32 handle; + + /** Returned global name */ + __u32 name; +}; + +/** DRM_IOCTL_GEM_OPEN ioctl argument type */ +struct drm_gem_open { + /** Name of object being opened */ + __u32 name; + + /** Returned handle for the object */ + __u32 handle; + + /** Returned size of the object */ + __u64 size; +}; + +/** DRM_IOCTL_GET_CAP ioctl argument type */ +struct drm_get_cap { + __u64 capability; + __u64 value; +}; + +#define DRM_CLOEXEC O_CLOEXEC +struct drm_prime_handle { + __u32 handle; + + /** Flags.. only applicable for handle->fd */ + __u32 flags; + + /** Returned dmabuf file descriptor */ + __s32 fd; +}; + +//#include "drm_mode.h" + +#if 0 + +#define DRM_IOCTL_BASE 'd' +#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) +#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) +#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) +#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) + +#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) +#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) +#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) +#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid) +#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) +#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) +#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) +#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) +#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) +#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) +#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) +#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) +#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) + +#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique) +#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth) +#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block) +#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block) +#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control) +#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map) +#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc) +#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc) +#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info) +#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map) +#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free) + +#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map) + +#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map) +#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map) + +#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) +#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) + +#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx) +#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx) +#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx) +#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx) +#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx) +#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx) +#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res) +#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw) +#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw) +#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma) +#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock) +#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) +#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) + +#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) + +#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) +#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31) +#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode) +#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info) +#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding) +#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding) + +#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather) +#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather) + +#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) + +#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) + +#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) +#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor) +#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder) +#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector) +#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd) +#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd) + +#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property) +#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property) +#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) +#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) +#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) +#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) +#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) + +#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) +#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) +#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) +#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res) +#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane) +#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane) +#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties) +#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property) + +#endif + +/** + * Device specific ioctls should only be in their respective headers + * The device specific ioctl range is from 0x40 to 0x99. + * Generic IOCTLS restart at 0xA0. + * + * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and + * drmCommandReadWrite(). + */ +#define DRM_COMMAND_BASE 0x40 +#define DRM_COMMAND_END 0xA0 + +/** + * Header for events written back to userspace on the drm fd. The + * type defines the type of event, the length specifies the total + * length of the event (including the header), and user_data is + * typically a 64 bit value passed with the ioctl that triggered the + * event. A read on the drm fd will always only return complete + * events, that is, if for example the read buffer is 100 bytes, and + * there are two 64 byte events pending, only one will be returned. + * + * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and + * up are chipset specific. + */ +struct drm_event { + __u32 type; + __u32 length; +}; + +#define DRM_EVENT_VBLANK 0x01 +#define DRM_EVENT_FLIP_COMPLETE 0x02 + +struct drm_event_vblank { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 sequence; + __u32 reserved; +}; + +#define DRM_CAP_DUMB_BUFFER 0x1 +#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +#define DRM_CAP_PRIME 0x5 + +#define DRM_PRIME_CAP_IMPORT 0x1 +#define DRM_PRIME_CAP_EXPORT 0x2 + +/* typedef area */ +typedef struct drm_clip_rect drm_clip_rect_t; +typedef struct drm_drawable_info drm_drawable_info_t; +typedef struct drm_tex_region drm_tex_region_t; +typedef struct drm_hw_lock drm_hw_lock_t; +typedef struct drm_version drm_version_t; +typedef struct drm_unique drm_unique_t; +typedef struct drm_list drm_list_t; +typedef struct drm_block drm_block_t; +typedef struct drm_control drm_control_t; +typedef enum drm_map_type drm_map_type_t; +typedef enum drm_map_flags drm_map_flags_t; +typedef struct drm_ctx_priv_map drm_ctx_priv_map_t; +typedef struct drm_map drm_map_t; +typedef struct drm_client drm_client_t; +typedef enum drm_stat_type drm_stat_type_t; +typedef struct drm_stats drm_stats_t; +typedef enum drm_lock_flags drm_lock_flags_t; +typedef struct drm_lock drm_lock_t; +typedef enum drm_dma_flags drm_dma_flags_t; +typedef struct drm_buf_desc drm_buf_desc_t; +typedef struct drm_buf_info drm_buf_info_t; +typedef struct drm_buf_free drm_buf_free_t; +typedef struct drm_buf_pub drm_buf_pub_t; +typedef struct drm_buf_map drm_buf_map_t; +typedef struct drm_dma drm_dma_t; +typedef union drm_wait_vblank drm_wait_vblank_t; +typedef struct drm_agp_mode drm_agp_mode_t; +typedef enum drm_ctx_flags drm_ctx_flags_t; +typedef struct drm_ctx drm_ctx_t; +typedef struct drm_ctx_res drm_ctx_res_t; +typedef struct drm_draw drm_draw_t; +typedef struct drm_update_draw drm_update_draw_t; +typedef struct drm_auth drm_auth_t; +typedef struct drm_irq_busid drm_irq_busid_t; +typedef enum drm_vblank_seq_type drm_vblank_seq_type_t; + +typedef struct drm_agp_buffer drm_agp_buffer_t; +typedef struct drm_agp_binding drm_agp_binding_t; +typedef struct drm_agp_info drm_agp_info_t; +typedef struct drm_scatter_gather drm_scatter_gather_t; +typedef struct drm_set_version drm_set_version_t; + +#endif diff --git a/drivers/video/Intel-2D/gen4_vertex.h b/drivers/video/Intel-2D/gen4_vertex.h new file mode 100644 index 0000000000..b482809b66 --- /dev/null +++ b/drivers/video/Intel-2D/gen4_vertex.h @@ -0,0 +1,16 @@ +#ifndef GEN4_VERTEX_H +#define GEN4_VERTEX_H + +#include "compiler.h" + +#include "sna.h" +#include "sna_render.h" + +void gen4_vertex_flush(struct sna *sna); +int gen4_vertex_finish(struct sna *sna); +void gen4_vertex_close(struct sna *sna); + +unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp); +//unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp); + +#endif /* GEN4_VERTEX_H */ diff --git a/drivers/video/Intel-2D/gen6_render.c b/drivers/video/Intel-2D/gen6_render.c new file mode 100644 index 0000000000..0e70f333dc --- /dev/null +++ b/drivers/video/Intel-2D/gen6_render.c @@ -0,0 +1,3459 @@ +/* + * Copyright © 2006,2008,2011 Intel Corporation + * Copyright © 2007 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu + * Eric Anholt + * Carl Worth + * Keith Packard + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_reg.h" +#include "sna_render.h" +//#include "sna_render_inline.h" +//#include "sna_video.h" + +#include "brw/brw.h" +#include "gen6_render.h" + +#include "gen4_vertex.h" + +#define NO_COMPOSITE 0 +#define NO_COMPOSITE_SPANS 0 +#define NO_COPY 0 +#define NO_COPY_BOXES 0 +#define NO_FILL 0 +#define NO_FILL_BOXES 0 +#define NO_FILL_ONE 0 +#define NO_FILL_CLEAR 0 + +#define NO_RING_SWITCH 1 +#define PREFER_RENDER 0 + +#define USE_8_PIXEL_DISPATCH 1 +#define USE_16_PIXEL_DISPATCH 1 +#define USE_32_PIXEL_DISPATCH 0 + +#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH +#error "Must select at least 8, 16 or 32 pixel dispatch" +#endif + +#define GEN6_MAX_SIZE 8192 + +struct gt_info { + int max_vs_threads; + int max_gs_threads; + int max_wm_threads; + struct { + int size; + int max_vs_entries; + int max_gs_entries; + } urb; +}; + +static const struct gt_info gt1_info = { + .max_vs_threads = 24, + .max_gs_threads = 21, + .max_wm_threads = 40, + .urb = { 32, 256, 256 }, +}; + +static const struct gt_info gt2_info = { + .max_vs_threads = 60, + .max_gs_threads = 60, + .max_wm_threads = 80, + .urb = { 64, 256, 256 }, +}; + +static const uint32_t ps_kernel_packed[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_planar[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_planar.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +#define NOKERNEL(kernel_enum, func, ns) \ + [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} +#define KERNEL(kernel_enum, kernel, ns) \ + [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} + +static const struct wm_kernel_info { + const char *name; + const void *data; + unsigned int size; + unsigned int num_surfaces; +} wm_kernels[] = { + NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), + NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), + + NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), + NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), + + NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), + NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), + + NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), + NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), + + NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), + NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), + + KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), + KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), +}; +#undef KERNEL + +static const struct blendinfo { + bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} gen6_blend_op[] = { + /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, + /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, + /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, + /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, + /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, + /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, + /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, + /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, + /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, +}; + +/** + * Highest-valued BLENDFACTOR used in gen6_blend_op. + * + * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, + * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, + * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} + */ +#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) + +#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) + +#define BLEND_OFFSET(s, d) \ + (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) + +#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) +#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) + +#define SAMPLER_OFFSET(sf, se, mf, me) \ + (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) + +#define VERTEX_2s2s 0 + +#define COPY_SAMPLER 0 +#define COPY_VERTEX VERTEX_2s2s +#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) + +#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) +#define FILL_VERTEX VERTEX_2s2s +#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) +#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) + +#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) +#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) +#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) +#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) +#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) +#define OUT_VERTEX_F(v) vertex_emit(sna, v) + +static inline bool too_large(int width, int height) +{ + return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; +} + +static uint32_t gen6_get_blend(int op, + bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t src, dst; + +// src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend; +// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend; + + src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend; + dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend; + +#if 0 + /* If there's no dst alpha channel, adjust the blend op so that + * we'll treat it always as 1. + */ + if (PICT_FORMAT_A(dst_format) == 0) { + if (src == GEN6_BLENDFACTOR_DST_ALPHA) + src = GEN6_BLENDFACTOR_ONE; + else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) + src = GEN6_BLENDFACTOR_ZERO; + } + + /* If the source alpha is being used, then we should only be in a + * case where the source blend factor is 0, and the source blend + * value is the mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen6_blend_op[op].src_alpha) { + if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) + dst = GEN6_BLENDFACTOR_SRC_COLOR; + else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) + dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; + } + + DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", + op, dst_format, PICT_FORMAT_A(dst_format), + src, dst, (int)BLEND_OFFSET(src, dst))); +#endif + + return BLEND_OFFSET(src, dst); +} + +static uint32_t gen6_get_card_format(PictFormat format) +{ + return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + +/* + switch (format) { + default: + return -1; + case PICT_a8r8g8b8: + return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_x8r8g8b8: + return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; + case PICT_a8b8g8r8: + return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_x8b8g8r8: + return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; + case PICT_a2r10g10b10: + return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_x2r10g10b10: + return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; + case PICT_r8g8b8: + return GEN6_SURFACEFORMAT_R8G8B8_UNORM; + case PICT_r5g6b5: + return GEN6_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_a1r5g5b5: + return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return GEN6_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; + } + */ +} + +static uint32_t gen6_get_dest_format(PictFormat format) +{ + return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + +#if 0 + + switch (format) { + default: + return -1; + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_r5g6b5: + return GEN6_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return GEN6_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; + } +#endif + +} + +#if 0 + +static bool gen6_check_dst_format(PictFormat format) +{ + if (gen6_get_dest_format(format) != -1) + return true; + + DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); + return false; +} + +static bool gen6_check_format(uint32_t format) +{ + if (gen6_get_card_format(format) != -1) + return true; + + DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); + return false; +} + +static uint32_t gen6_filter(uint32_t filter) +{ + switch (filter) { + default: + assert(0); + case PictFilterNearest: + return SAMPLER_FILTER_NEAREST; + case PictFilterBilinear: + return SAMPLER_FILTER_BILINEAR; + } +} + +static uint32_t gen6_check_filter(PicturePtr picture) +{ + switch (picture->filter) { + case PictFilterNearest: + case PictFilterBilinear: + return true; + default: + return false; + } +} + +static uint32_t gen6_repeat(uint32_t repeat) +{ + switch (repeat) { + default: + assert(0); + case RepeatNone: + return SAMPLER_EXTEND_NONE; + case RepeatNormal: + return SAMPLER_EXTEND_REPEAT; + case RepeatPad: + return SAMPLER_EXTEND_PAD; + case RepeatReflect: + return SAMPLER_EXTEND_REFLECT; + } +} + +static bool gen6_check_repeat(PicturePtr picture) +{ + if (!picture->repeat) + return true; + + switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return true; + default: + return false; + } +} +#endif + +static int +gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) +{ + int base; + + if (has_mask) { +/* + if (is_ca) { + if (gen6_blend_op[op].src_alpha) + base = GEN6_WM_KERNEL_MASKCA_SRCALPHA; + else + base = GEN6_WM_KERNEL_MASKCA; + } else + base = GEN6_WM_KERNEL_MASK; +*/ + } else + base = GEN6_WM_KERNEL_NOMASK; + + return base + !is_affine; +} + +static void +gen6_emit_urb(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); + OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | + (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ + OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | + (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ +} + +static void +gen6_emit_state_base_address(struct sna *sna) +{ + OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(0); /* general */ + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ + sna->kgem.nbatch, + NULL, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ + sna->kgem.nbatch, + sna->render_state.gen6.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(0); /* indirect */ + OUT_BATCH(kgem_add_reloc(&sna->kgem, + sna->kgem.nbatch, + sna->render_state.gen6.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + + /* upper bounds, disable */ + OUT_BATCH(0); + OUT_BATCH(BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + OUT_BATCH(BASE_ADDRESS_MODIFY); +} + +static void +gen6_emit_viewports(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | + GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | + (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen6_emit_vs(struct sna *sna) +{ + /* disable VS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); + OUT_BATCH(0); /* no VS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +static void +gen6_emit_gs(struct sna *sna) +{ + /* disable GS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); + OUT_BATCH(0); /* no GS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +static void +gen6_emit_clip(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ + OUT_BATCH(0); +} + +static void +gen6_emit_wm_constants(struct sna *sna) +{ + /* disable WM constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen6_emit_null_depth_buffer(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); + OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | + GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); + OUT_BATCH(0); +} + +static void +gen6_emit_invariant(struct sna *sna) +{ + OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(1); + + gen6_emit_urb(sna); + + gen6_emit_state_base_address(sna); + + gen6_emit_viewports(sna); + gen6_emit_vs(sna); + gen6_emit_gs(sna); + gen6_emit_clip(sna); + gen6_emit_wm_constants(sna); + gen6_emit_null_depth_buffer(sna); + + sna->render_state.gen6.needs_invariant = false; +} + +static bool +gen6_emit_cc(struct sna *sna, int blend) +{ + struct gen6_render_state *render = &sna->render_state.gen6; + + if (render->blend == blend) + return blend != NO_BLEND; + + DBG(("%s: blend = %x\n", __FUNCTION__, blend)); + + OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); + OUT_BATCH((render->cc_blend + blend) | 1); + if (render->blend == (unsigned)-1) { + OUT_BATCH(1); + OUT_BATCH(1); + } else { + OUT_BATCH(0); + OUT_BATCH(0); + } + + render->blend = blend; + return blend != NO_BLEND; +} + +static void +gen6_emit_sampler(struct sna *sna, uint32_t state) +{ + if (sna->render_state.gen6.samplers == state) + return; + + sna->render_state.gen6.samplers = state; + + DBG(("%s: sampler = %x\n", __FUNCTION__, state)); + + OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | + GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + OUT_BATCH(sna->render_state.gen6.wm_state + state); +} + +static void +gen6_emit_sf(struct sna *sna, bool has_mask) +{ + int num_sf_outputs = has_mask ? 2 : 1; + + if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) + return; + + DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", + __FUNCTION__, num_sf_outputs, 1, 0)); + + sna->render_state.gen6.num_sf_outputs = num_sf_outputs; + + OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); + OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | + 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); + OUT_BATCH(0); + OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); + OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW9 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW14 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW19 */ +} + +static void +gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) +{ + const uint32_t *kernels; + + if (sna->render_state.gen6.kernel == kernel) + return; + + sna->render_state.gen6.kernel = kernel; + kernels = sna->render_state.gen6.wm_kernel[kernel]; + + DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", + __FUNCTION__, + wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, + kernels[0], kernels[1], kernels[2])); + + OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); + OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); + OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | + wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); + OUT_BATCH(0); /* scratch space */ + OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | + 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | + 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); + OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | + (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | + (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | + (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | + GEN6_3DSTATE_WM_DISPATCH_ENABLE); + OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | + GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(kernels[2]); + OUT_BATCH(kernels[1]); +} + +static bool +gen6_emit_binding_table(struct sna *sna, uint16_t offset) +{ + if (sna->render_state.gen6.surface_table == offset) + return false; + + /* Binding table pointers */ + OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | + GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + /* Only the PS uses the binding table */ + OUT_BATCH(offset*4); + + sna->render_state.gen6.surface_table = offset; + return true; +} + +static bool +gen6_emit_drawing_rectangle(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); + uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + + assert(!too_large(op->dst.x, op->dst.y)); + assert(!too_large(op->dst.width, op->dst.height)); + + if (sna->render_state.gen6.drawrect_limit == limit && + sna->render_state.gen6.drawrect_offset == offset) + return false; + + /* [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + */ + if (!sna->render_state.gen6.first_state_packet) { + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | + GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); + OUT_BATCH(0); + } + + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + sna->render_state.gen6.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16 | + I915_GEM_DOMAIN_INSTRUCTION, + 64)); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(limit); + OUT_BATCH(offset); + + sna->render_state.gen6.drawrect_offset = offset; + sna->render_state.gen6.drawrect_limit = limit; + return true; +} + +static void +gen6_emit_vertex_elements(struct sna *sna, + const struct sna_composite_op *op) +{ + /* + * vertex data in vertex buffer + * position: (x, y) + * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) + * texture coordinate 1 if (has_mask is true): same as above + */ + struct gen6_render_state *render = &sna->render_state.gen6; + uint32_t src_format, dw; + int id = GEN6_VERTEX(op->u.gen6.flags); + bool has_mask; + + DBG(("%s: setup id=%d\n", __FUNCTION__, id)); + + if (render->ve_id == id) + return; + render->ve_id = id; + + /* The VUE layout + * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) + * dword 4-7: position (x, y, 1.0, 1.0), + * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) + * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) + * + * dword 4-15 are fetched from vertex buffer + */ + has_mask = (id >> 2) != 0; + OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | + ((2 * (3 + has_mask)) + 1 - 2)); + + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | + 0 << VE0_OFFSET_SHIFT); + OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); + + /* x,y */ + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | + 0 << VE0_OFFSET_SHIFT); + OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); + + /* u0, v0, w0 */ + DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); + dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch (id & 3) { + default: + assert(0); + case 0: + src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 1: + src_format = GEN6_SURFACEFORMAT_R32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 2: + src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + src_format << VE0_FORMAT_SHIFT | + 4 << VE0_OFFSET_SHIFT); + OUT_BATCH(dw); + + /* u1, v1, w1 */ + if (has_mask) { + unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); + DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); + dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch (id >> 2) { + case 1: + src_format = GEN6_SURFACEFORMAT_R32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + default: + assert(0); + case 2: + src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + src_format << VE0_FORMAT_SHIFT | + offset << VE0_OFFSET_SHIFT); + OUT_BATCH(dw); + } +} + +static void +gen6_emit_flush(struct sna *sna) +{ + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | + GEN6_PIPE_CONTROL_TC_FLUSH | + GEN6_PIPE_CONTROL_CS_STALL); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen6_emit_state(struct sna *sna, + const struct sna_composite_op *op, + uint16_t wm_binding_table) +{ + bool need_stall = wm_binding_table & 1; + + if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags))) + need_stall = false; + gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); + gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); + gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); + gen6_emit_vertex_elements(sna, op); + + need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1); + if (gen6_emit_drawing_rectangle(sna, op)) + need_stall = false; + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + gen6_emit_flush(sna); + kgem_clear_dirty(&sna->kgem); + if (op->dst.bo->exec) + kgem_bo_mark_dirty(op->dst.bo); + need_stall = false; + } + if (need_stall) { + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | + GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); + OUT_BATCH(0); + } + sna->render_state.gen6.first_state_packet = false; +} + +static bool gen6_magic_ca_pass(struct sna *sna, + const struct sna_composite_op *op) +{ + struct gen6_render_state *state = &sna->render_state.gen6; + + if (!op->need_magic_ca_pass) + return false; + + DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, + sna->render.vertex_start, sna->render.vertex_index)); + + gen6_emit_flush(sna); + + gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); + gen6_emit_wm(sna, + gen6_choose_composite_kernel(PictOpAdd, + true, true, + op->is_affine), + true); + + OUT_BATCH(GEN6_3DPRIMITIVE | + GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | + _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | + 0 << 9 | + 4); + OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); + OUT_BATCH(sna->render.vertex_start); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + + state->last_primitive = sna->kgem.nbatch; + return true; +} + +typedef struct gen6_surface_state_padded { + struct gen6_surface_state state; + char pad[32 - sizeof(struct gen6_surface_state)]; +} gen6_surface_state_padded; + +static void null_create(struct sna_static_stream *stream) +{ + /* A bunch of zeros useful for legacy border color and depth-stencil */ + sna_static_stream_map(stream, 64, 64); +} + +static void scratch_create(struct sna_static_stream *stream) +{ + /* 64 bytes of scratch space for random writes, such as + * the pipe-control w/a. + */ + sna_static_stream_map(stream, 64, 64); +} + +static void +sampler_state_init(struct gen6_sampler_state *sampler_state, + sampler_filter_t filter, + sampler_extend_t extend) +{ + sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + + /* We use the legacy mode to get the semantics specified by + * the Render extension. */ + sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; + + switch (filter) { + default: + case SAMPLER_FILTER_NEAREST: + sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; + sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; + break; + case SAMPLER_FILTER_BILINEAR: + sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; + break; + } + + switch (extend) { + default: + case SAMPLER_EXTEND_NONE: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; + break; + case SAMPLER_EXTEND_REPEAT: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; + break; + case SAMPLER_EXTEND_PAD: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; + break; + case SAMPLER_EXTEND_REFLECT: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; + break; + } +} + +static void +sampler_copy_init(struct gen6_sampler_state *ss) +{ + sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); + ss->ss3.non_normalized_coord = 1; + + sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); +} + +static void +sampler_fill_init(struct gen6_sampler_state *ss) +{ + sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); + ss->ss3.non_normalized_coord = 1; + + sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); +} + +static uint32_t +gen6_tiling_bits(uint32_t tiling) +{ + return 0; +/* + switch (tiling) { + default: assert(0); + case I915_TILING_NONE: return 0; + case I915_TILING_X: return GEN6_SURFACE_TILED; + case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; + } +*/ +} + +/** + * Sets up the common fields for a surface state buffer for the given + * picture in the given surface state buffer. + */ +static int +gen6_bind_bo(struct sna *sna, + struct kgem_bo *bo, + uint32_t width, + uint32_t height, + uint32_t format, + bool is_dst) +{ + uint32_t *ss; + uint32_t domains; + uint16_t offset; + uint32_t is_scanout = is_dst && bo->scanout; + + /* After the first bind, we manage the cache domains within the batch */ + offset = kgem_bo_get_binding(bo, format | is_scanout << 31); + if (offset) { + DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", + offset, bo->handle, format, + is_dst ? "render" : "sampler")); + if (is_dst) + kgem_bo_mark_dirty(bo); + return offset * sizeof(uint32_t); + } + + offset = sna->kgem.surface -= + sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + ss = sna->kgem.batch + offset; + ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | + GEN6_SURFACE_BLEND_ENABLED | + format << GEN6_SURFACE_FORMAT_SHIFT); + if (is_dst) + domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; + else + domains = I915_GEM_DOMAIN_SAMPLER << 16; + ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); + ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | + (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); + assert(bo->pitch <= (1 << 18)); + ss[3] = (gen6_tiling_bits(bo->tiling) | + (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); + ss[4] = 0; + ss[5] = is_scanout ? 0 : 3 << 16; + + kgem_bo_set_binding(bo, format | is_scanout << 31, offset); + + DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", + offset, bo->handle, ss[1], + format, width, height, bo->pitch, bo->tiling, + domains & 0xffff ? "render" : "sampler")); + + return offset * sizeof(uint32_t); +} + +static void gen6_emit_vertex_buffer(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = GEN6_VERTEX(op->u.gen6.flags); + + OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); + OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | + 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); + sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; + OUT_BATCH(0); + OUT_BATCH(~0); /* max address: disabled */ + OUT_BATCH(0); + + sna->render.vb_id |= 1 << id; +} + +static void gen6_emit_primitive(struct sna *sna) +{ + if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { + DBG(("%s: continuing previous primitive, start=%d, index=%d\n", + __FUNCTION__, + sna->render.vertex_start, + sna->render.vertex_index)); + sna->render.vertex_offset = sna->kgem.nbatch - 5; + return; + } + + OUT_BATCH(GEN6_3DPRIMITIVE | + GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | + _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | + 0 << 9 | + 4); + sna->render.vertex_offset = sna->kgem.nbatch; + OUT_BATCH(0); /* vertex count, to be filled in later */ + OUT_BATCH(sna->render.vertex_index); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + sna->render.vertex_start = sna->render.vertex_index; + DBG(("%s: started new primitive: index=%d\n", + __FUNCTION__, sna->render.vertex_start)); + + sna->render_state.gen6.last_primitive = sna->kgem.nbatch; +} + +static bool gen6_rectangle_begin(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = 1 << GEN6_VERTEX(op->u.gen6.flags); + int ndwords; + + ndwords = op->need_magic_ca_pass ? 60 : 6; + if ((sna->render.vb_id & id) == 0) + ndwords += 5; + if (!kgem_check_batch(&sna->kgem, ndwords)) + return false; + + if ((sna->render.vb_id & id) == 0) + gen6_emit_vertex_buffer(sna, op); + + gen6_emit_primitive(sna); + return true; +} + +static int gen6_get_rectangles__flush(struct sna *sna, + const struct sna_composite_op *op) +{ + + if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) + return 0; + if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) + return 0; + + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + if (gen6_magic_ca_pass(sna, op)) { + gen6_emit_flush(sna); + gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); + gen6_emit_wm(sna, + GEN6_KERNEL(op->u.gen6.flags), + GEN6_VERTEX(op->u.gen6.flags) >> 2); + } + } + + return gen4_vertex_finish(sna); +} + +inline static int gen6_get_rectangles(struct sna *sna, + const struct sna_composite_op *op, + int want, + void (*emit_state)(struct sna *, const struct sna_composite_op *op)) +{ + int rem; + +start: + rem = vertex_space(sna); + if (unlikely(rem < op->floats_per_rect)) { + DBG(("flushing vbo for %s: %d < %d\n", + __FUNCTION__, rem, op->floats_per_rect)); + rem = gen6_get_rectangles__flush(sna, op); + if (unlikely(rem == 0)) + goto flush; + } + + if (unlikely(sna->render.vertex_offset == 0 && + !gen6_rectangle_begin(sna, op))) + goto flush; + + if (want > 1 && want * op->floats_per_rect > rem) + want = rem / op->floats_per_rect; + + assert(want > 0); + sna->render.vertex_index += 3*want; + return want; + +flush: + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + gen6_magic_ca_pass(sna, op); + } +// sna_vertex_wait__locked(&sna->render); + _kgem_submit(&sna->kgem); + emit_state(sna, op); + goto start; +} + +inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, + uint16_t *offset) +{ + uint32_t *table; + + sna->kgem.surface -= + sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + /* Clear all surplus entries to zero in case of prefetch */ + table = memset(sna->kgem.batch + sna->kgem.surface, + 0, sizeof(struct gen6_surface_state_padded)); + + DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); + + *offset = sna->kgem.surface; + return table; +} + +static bool +gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); + + if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { + DBG(("%s: flushing batch: %d < %d+%d\n", + __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, + 150, 4*8)); + kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + if (sna->render_state.gen6.needs_invariant) + gen6_emit_invariant(sna); + + return kgem_bo_is_dirty(op->dst.bo); +} + +static void gen6_emit_composite_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + bool dirty; + + dirty = gen6_get_batch(sna, op); + + binding_table = gen6_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + true); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + false); + if (op->mask.bo) { + binding_table[2] = + gen6_bind_bo(sna, + op->mask.bo, + op->mask.width, + op->mask.height, + op->mask.card_format, + false); + } + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && + (op->mask.bo == NULL || + sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { + sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset | dirty); +} + +static void +gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) +{ + assert (sna->render.vertex_offset == 0); + if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { + if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) + gen4_vertex_finish(sna); + + DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", + sna->render_state.gen6.floats_per_vertex, + op->floats_per_vertex, + sna->render.vertex_index, + (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); + sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; + sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; + } + assert((sna->render.vertex_used % op->floats_per_vertex) == 0); +} + +#if 0 + +fastcall static void +gen6_render_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); + op->prim_emit(sna, op, r); +} + +fastcall static void +gen6_render_composite_box(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + struct sna_composite_rectangles r; + + gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); +} + +static void +gen6_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("composite_boxes(%d)\n", nbox)); + + do { + int nbox_this_time; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + nbox -= nbox_this_time; + + do { + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen6_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen6_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +#endif + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +static uint32_t +gen6_composite_create_blend_state(struct sna_static_stream *stream) +{ + char *base, *ptr; + int src, dst; + + base = sna_static_stream_map(stream, + GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, + 64); + + ptr = base; + for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { + for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { + struct gen6_blend_state *blend = + (struct gen6_blend_state *)ptr; + + blend->blend0.dest_blend_factor = dst; + blend->blend0.source_blend_factor = src; + blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; + blend->blend0.blend_enable = + !(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); + + blend->blend1.post_blend_clamp_enable = 1; + blend->blend1.pre_blend_clamp_enable = 1; + + ptr += GEN6_BLEND_STATE_PADDED_SIZE; + } + } + + return sna_static_stream_offsetof(stream, base); +} + +#if 0 + +static uint32_t gen6_bind_video_source(struct sna *sna, + struct kgem_bo *src_bo, + uint32_t src_offset, + int src_width, + int src_height, + int src_pitch, + uint32_t src_surf_format) +{ + struct gen6_surface_state *ss; + + sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + + ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); + ss->ss0.surface_type = GEN6_SURFACE_2D; + ss->ss0.surface_format = src_surf_format; + + ss->ss1.base_addr = + kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + src_bo, + I915_GEM_DOMAIN_SAMPLER << 16, + src_offset); + + ss->ss2.width = src_width - 1; + ss->ss2.height = src_height - 1; + ss->ss3.pitch = src_pitch - 1; + + return sna->kgem.surface * sizeof(uint32_t); +} + +static void gen6_emit_video_state(struct sna *sna, + const struct sna_composite_op *op) +{ + struct sna_video_frame *frame = op->priv; + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + uint32_t *binding_table; + uint16_t offset; + bool dirty; + int n_src, n; + + dirty = gen6_get_batch(sna, op); + + src_surf_base[0] = 0; + src_surf_base[1] = 0; + src_surf_base[2] = frame->VBufOffset; + src_surf_base[3] = frame->VBufOffset; + src_surf_base[4] = frame->UBufOffset; + src_surf_base[5] = frame->UBufOffset; + + if (is_planar_fourcc(frame->id)) { + src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = frame->width; + src_height[1] = src_height[0] = frame->height; + src_pitch[1] = src_pitch[0] = frame->pitch[1]; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + frame->width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + frame->height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + frame->pitch[0]; + n_src = 6; + } else { + if (frame->id == FOURCC_UYVY) + src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = frame->width; + src_height[0] = frame->height; + src_pitch[0] = frame->pitch[0]; + n_src = 1; + } + + binding_table = gen6_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + true); + for (n = 0; n < n_src; n++) { + binding_table[1+n] = + gen6_bind_video_source(sna, + frame->bo, + src_surf_base[n], + src_width[n], + src_height[n], + src_pitch[n], + src_surf_format); + } + + gen6_emit_state(sna, op, offset | dirty); +} + +static bool +gen6_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + short src_w, short src_h, + short drw_w, short drw_h, + short dx, short dy, + PixmapPtr pixmap) +{ + struct sna_composite_op tmp; + int nbox, pix_xoff, pix_yoff; + float src_scale_x, src_scale_y; + struct sna_pixmap *priv; + unsigned filter; + BoxPtr box; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", + __FUNCTION__, src_w, src_h, drw_w, drw_h, + REGION_NUM_RECTS(dstRegion), + REGION_EXTENTS(NULL, dstRegion)->x1, + REGION_EXTENTS(NULL, dstRegion)->y1, + REGION_EXTENTS(NULL, dstRegion)->x2, + REGION_EXTENTS(NULL, dstRegion)->y2)); + + priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); + if (priv == NULL) + return false; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.dst.pixmap = pixmap; + tmp.dst.width = pixmap->drawable.width; + tmp.dst.height = pixmap->drawable.height; + tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); + tmp.dst.bo = priv->gpu_bo; + + tmp.src.bo = frame->bo; + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; + + if (src_w == drw_w && src_h == drw_h) + filter = SAMPLER_FILTER_NEAREST; + else + filter = SAMPLER_FILTER_BILINEAR; + + tmp.u.gen6.flags = + GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, + SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), + NO_BLEND, + is_planar_fourcc(frame->id) ? + GEN6_WM_KERNEL_VIDEO_PLANAR : + GEN6_WM_KERNEL_VIDEO_PACKED, + 2); + tmp.priv = frame; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen6_emit_video_state(sna, &tmp, frame); + gen6_align_vertex(sna, &tmp); + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + /* Use normalized texture coordinates */ + src_scale_x = ((float)src_w / frame->width) / (float)drw_w; + src_scale_y = ((float)src_h / frame->height) / (float)drw_h; + + box = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + BoxRec r; + + r.x1 = box->x1 + pix_xoff; + r.x2 = box->x2 + pix_xoff; + r.y1 = box->y1 + pix_yoff; + r.y2 = box->y2 + pix_yoff; + + gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); + + OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX_F((box->x2 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); + + OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y2 - dy) * src_scale_y); + + OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX_F((box->x1 - dx) * src_scale_x); + OUT_VERTEX_F((box->y1 - dy) * src_scale_y); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + sna_damage_add_box(&priv->gpu_damage, &r); + sna_damage_subtract_box(&priv->cpu_damage, &r); + } + box++; + } + priv->clear = false; + + gen4_vertex_flush(sna); + return true; +} + +static int +gen6_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y, + bool precise) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->is_solid = false; + channel->card_format = -1; + + if (sna_picture_is_solid(picture, &color)) + return gen4_channel_init_solid(sna, channel, color); + + if (picture->pDrawable == NULL) { + int ret; + + if (picture->pSourcePict->type == SourcePictTypeLinear) + return gen4_channel_init_linear(sna, picture, channel, + x, y, + w, h, + dst_x, dst_y); + + DBG(("%s -- fixup, gradient\n", __FUNCTION__)); + ret = -1; + if (!precise) + ret = sna_render_picture_approximate_gradient(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + if (ret == -1) + ret = sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + return ret; + } + + if (picture->alphaMap) { + DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + if (!gen6_check_repeat(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen6_check_filter(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else + channel->transform = picture->transform; + + channel->pict_format = picture->format; + channel->card_format = gen6_get_card_format(picture->format); + if (channel->card_format == (unsigned)-1) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y, + false); + + if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { + DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, + pixmap->drawable.width, pixmap->drawable.height)); + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) +{ + channel->repeat = gen6_repeat(channel->repeat); + channel->filter = gen6_filter(channel->filter); + if (channel->card_format == (unsigned)-1) + channel->card_format = gen6_get_card_format(channel->pict_format); + assert(channel->card_format != (unsigned)-1); +} + +static void gen6_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + DBG(("%s\n", __FUNCTION__)); + + assert(!sna->render.active); + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + gen6_magic_ca_pass(sna, op); + } + +// if (op->mask.bo) +// kgem_bo_destroy(&sna->kgem, op->mask.bo); +// if (op->src.bo) +// kgem_bo_destroy(&sna->kgem, op->src.bo); + +// sna_render_composite_redirect_done(sna, op); +} + +static bool +gen6_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst, + int x, int y, int w, int h) +{ + BoxRec box; + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.format = dst->format; + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + + if (w && h) { + box.x1 = x; + box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + } else + sna_render_picture_extents(dst, &box); + + op->dst.bo = sna_drawable_use_bo (dst->pDrawable, + PREFER_GPU | FORCE_GPU | RENDER_GPU, + &box, &op->damage); + if (op->dst.bo == NULL) + return false; + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + + DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", + __FUNCTION__, + op->dst.pixmap, (int)op->dst.format, + op->dst.width, op->dst.height, + op->dst.bo->pitch, + op->dst.x, op->dst.y, + op->damage ? *op->damage : (void *)-1)); + + assert(op->dst.bo->proxy == NULL); + + if (too_large(op->dst.width, op->dst.height) && + !sna_render_composite_redirect(sna, op, x, y, w, h)) + return false; + + return true; +} + + + +static bool +gen6_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + if (op >= ARRAY_SIZE(gen6_blend_op)) + return false; + + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.ring)); + + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp, false)) + return true; + + if (gen6_composite_fallback(sna, src, mask, dst)) + return false; + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(op, src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + tmp); + + if (op == PictOpClear) + op = PictOpSrc; + tmp->op = op; + if (!gen6_composite_set_target(sna, tmp, dst, + dst_x, dst_y, width, height)) + return false; + + switch (gen6_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_dst; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->src, 0)) + goto cleanup_dst; + /* fall through to fixup */ + case 1: + /* Did we just switch rings to prepare the source? */ + if (mask == NULL && + prefer_blt_composite(sna, tmp) && + sna_blt_composite__convert(sna, + dst_x, dst_y, width, height, + tmp)) + return true; + + gen6_composite_channel_convert(&tmp->src); + break; + } + + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = false; + tmp->need_magic_ca_pass = false; + + tmp->mask.bo = NULL; + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + + if (mask) { + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + tmp->has_component_alpha = true; + + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (gen6_blend_op[op].src_alpha && + (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { + if (op != PictOpOver) + goto cleanup_src; + + tmp->need_magic_ca_pass = true; + tmp->op = PictOpOutReverse; + } + } + + if (!reuse_source(sna, + src, &tmp->src, src_x, src_y, + mask, &tmp->mask, msk_x, msk_y)) { + switch (gen6_composite_picture(sna, mask, &tmp->mask, + msk_x, msk_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_src; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) + goto cleanup_src; + /* fall through to fixup */ + case 1: + gen6_composite_channel_convert(&tmp->mask); + break; + } + } + + tmp->is_affine &= tmp->mask.is_affine; + } + + tmp->u.gen6.flags = + GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, + tmp->src.repeat, + tmp->mask.filter, + tmp->mask.repeat), + gen6_get_blend(tmp->op, + tmp->has_component_alpha, + tmp->dst.format), + gen6_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine), + gen4_choose_composite_emitter(tmp)); + + tmp->blt = gen6_render_composite_blt; + tmp->box = gen6_render_composite_box; + tmp->boxes = gen6_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen6_render_composite_boxes; + tmp->thread_boxes = gen6_render_composite_boxes__thread; + } + tmp->done = gen6_render_composite_done; + + + + gen6_emit_composite_state(sna, tmp); + gen6_align_vertex(sna, tmp); + return true; + +cleanup_mask: + if (tmp->mask.bo) + kgem_bo_destroy(&sna->kgem, tmp->mask.bo); +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return false; +} + +#if !NO_COMPOSITE_SPANS +fastcall static void +gen6_render_composite_spans_box(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, float opacity) +{ + DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", + __FUNCTION__, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); + op->prim_emit(sna, op, box, opacity); +} + +static void +gen6_render_composite_spans_boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, int nbox, + float opacity) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y)); + + do { + int nbox_this_time; + + nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, + gen6_emit_composite_state); + nbox -= nbox_this_time; + + do { + DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + op->prim_emit(sna, op, box++, opacity); + } while (--nbox_this_time); + } while (nbox); +} + +fastcall static void +gen6_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void +gen6_render_composite_spans_done(struct sna *sna, + const struct sna_composite_spans_op *op) +{ + DBG(("%s()\n", __FUNCTION__)); + assert(!sna->render.active); + + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); + + if (op->base.src.bo) + kgem_bo_destroy(&sna->kgem, op->base.src.bo); + + sna_render_composite_redirect_done(sna, &op->base); +} + +static bool +gen6_check_composite_spans(struct sna *sna, + uint8_t op, PicturePtr src, PicturePtr dst, + int16_t width, int16_t height, + unsigned flags) +{ + DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", + __FUNCTION__, op, width, height, flags)); + + if (op >= ARRAY_SIZE(gen6_blend_op)) + return false; + + if (gen6_composite_fallback(sna, src, NULL, dst)) { + DBG(("%s: operation would fallback\n", __FUNCTION__)); + return false; + } + + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); + return false; + } + + if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); + assert(priv); + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + + if (flags & COMPOSITE_SPANS_INPLACE_HINT) + return false; + + return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); + } + + return true; +} + +static bool +gen6_render_composite_spans(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_spans_op *tmp) +{ + DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, + width, height, flags, sna->kgem.ring)); + + assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); + + if (need_tiling(sna, width, height)) { + DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", + __FUNCTION__, width, height)); + return sna_tiling_composite_spans(op, src, dst, + src_x, src_y, dst_x, dst_y, + width, height, flags, tmp); + } + + tmp->base.op = op; + if (!gen6_composite_set_target(sna, &tmp->base, dst, + dst_x, dst_y, width, height)) + return false; + + switch (gen6_composite_picture(sna, src, &tmp->base.src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_dst; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) + goto cleanup_dst; + /* fall through to fixup */ + case 1: + gen6_composite_channel_convert(&tmp->base.src); + break; + } + tmp->base.mask.bo = NULL; + + tmp->base.is_affine = tmp->base.src.is_affine; + tmp->base.need_magic_ca_pass = false; + + tmp->base.u.gen6.flags = + GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, + tmp->base.src.repeat, + SAMPLER_FILTER_NEAREST, + SAMPLER_EXTEND_PAD), + gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), + GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, + gen4_choose_spans_emitter(tmp)); + + tmp->box = gen6_render_composite_spans_box; + tmp->boxes = gen6_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; + tmp->done = gen6_render_composite_spans_done; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) + goto cleanup_src; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen6_emit_composite_state(sna, &tmp->base); + gen6_align_vertex(sna, &tmp->base); + return true; + +cleanup_src: + if (tmp->base.src.bo) + kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); +cleanup_dst: + if (tmp->base.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); + return false; +} +#endif + +#endif + +static void +gen6_emit_copy_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + bool dirty; + + dirty = gen6_get_batch(sna, op); + + binding_table = gen6_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + true); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + false); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset | dirty); +} + +#if 0 + +static inline bool prefer_blt_copy(struct sna *sna, + struct kgem_bo *src_bo, + struct kgem_bo *dst_bo, + unsigned flags) +{ + if (flags & COPY_SYNC) + return false; + + if (PREFER_RENDER) + return PREFER_RENDER > 0; + + if (sna->kgem.ring == KGEM_BLT) + return true; + + if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) + return true; + + if (untiled_tlb_miss(src_bo) || + untiled_tlb_miss(dst_bo)) + return true; + + if (!prefer_blt_ring(sna, dst_bo, flags)) + return false; + + return (prefer_blt_bo(sna, src_bo) >= 0 && + prefer_blt_bo(sna, dst_bo) > 0); +} + +inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents) +{ + *extents = box[0]; + while (--n) { + box++; + + if (box->x1 < extents->x1) + extents->x1 = box->x1; + if (box->x2 > extents->x2) + extents->x2 = box->x2; + + if (box->y1 < extents->y1) + extents->y1 = box->y1; + if (box->y2 > extents->y2) + extents->y2 = box->y2; + } +} + +static inline bool +overlaps(struct sna *sna, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, BoxRec *extents) +{ + if (src_bo != dst_bo) + return false; + + boxes_extents(box, n, extents); + return (extents->x2 + src_dx > extents->x1 + dst_dx && + extents->x1 + src_dx < extents->x2 + dst_dx && + extents->y2 + src_dy > extents->y1 + dst_dy && + extents->y1 + src_dy < extents->y2 + dst_dy); +} + +static bool +gen6_render_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, unsigned flags) +{ + struct sna_composite_op tmp; + BoxRec extents; + + DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, + src_bo == dst_bo, + overlaps(sna, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + box, n, &extents))); + + if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && + sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + + if (!(alu == GXcopy || alu == GXclear)) { +fallback_blt: + if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) + return false; + + return sna_blt_copy_boxes_fallback(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); + } + + if (overlaps(sna, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + box, n, &extents)) { + if (too_large(extents.x2-extents.x1, extents.y2-extents.y1)) + goto fallback_blt; + + if (can_switch_to_blt(sna, dst_bo, flags) && + sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + + return sna_render_copy_boxes__overlap(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n, &extents); + } + + if (dst->drawable.depth == src->drawable.depth) { + tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth); + tmp.src.pict_format = tmp.dst.format; + } else { + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); + } + if (!gen6_check_format(tmp.src.pict_format)) + goto fallback_blt; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.bo = dst_bo; + tmp.dst.x = tmp.dst.y = 0; + tmp.damage = NULL; + + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height)) { + int i; + + extents = box[0]; + for (i = 1; i < n; i++) { + if (box[i].x1 < extents.x1) + extents.x1 = box[i].x1; + if (box[i].y1 < extents.y1) + extents.y1 = box[i].y1; + + if (box[i].x2 > extents.x2) + extents.x2 = box[i].x2; + if (box[i].y2 > extents.y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1 + dst_dx, + extents.y1 + dst_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled; + + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + + tmp.dst.x = tmp.dst.y = 0; + } + + tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); + if (too_large(src->drawable.width, src->drawable.height)) { + int i; + + extents = box[0]; + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, + extents.x1 + src_dx, + extents.y1 + src_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) { + DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); + goto fallback_tiled_dst; + } + + src_dx += tmp.src.offset[0]; + src_dy += tmp.src.offset[1]; + } else { + tmp.src.bo = src_bo; + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + } + + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = 0; + + tmp.u.gen6.flags = COPY_FLAGS(alu); + assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); + assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { + DBG(("%s: too large for a single operation\n", + __FUNCTION__)); + goto fallback_tiled_src; + } + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen6_emit_copy_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + do { + int16_t *v; + int n_this_time; + + n_this_time = gen6_get_rectangles(sna, &tmp, n, + gen6_emit_copy_state); + n -= n_this_time; + + v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); + sna->render.vertex_used += 6 * n_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + do { + + DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1)); + v[0] = box->x2 + dst_dx; + v[2] = box->x2 + src_dx; + v[1] = v[5] = box->y2 + dst_dy; + v[3] = v[7] = box->y2 + src_dy; + v[8] = v[4] = box->x1 + dst_dx; + v[10] = v[6] = box->x1 + src_dx; + v[9] = box->y1 + dst_dy; + v[11] = box->y1 + src_dy; + v += 12; box++; + } while (--n_this_time); + } while (n); + + gen4_vertex_flush(sna); + sna_render_composite_redirect_done(sna, &tmp); + if (tmp.src.bo != src_bo) + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return true; + +fallback_tiled_src: + if (tmp.src.bo != src_bo) + kgem_bo_destroy(&sna->kgem, tmp.src.bo); +fallback_tiled_dst: + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); +fallback_tiled: + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); +} + +#endif + +static void +gen6_render_copy_blt(struct sna *sna, + const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy) +{ + int16_t *v; + + gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dx+w; v[1] = dy+h; + v[2] = sx+w; v[3] = sy+h; + v[4] = dx; v[5] = dy+h; + v[6] = sx; v[7] = sy+h; + v[8] = dx; v[9] = dy; + v[10] = sx; v[11] = sy; +} + +static void +gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) +{ + DBG(("%s()\n", __FUNCTION__)); + + assert(!sna->render.active); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); +} + +static bool +gen6_render_copy(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op) +{ + DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", + __FUNCTION__, alu, + src->drawable.width, src->drawable.height, + dst->drawable.width, dst->drawable.height)); + +fallback: + + op->base.dst.format = PIXMAN_a8r8g8b8; + op->base.src.pict_format = op->base.dst.format; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.bo = dst_bo; + + op->base.src.bo = src_bo; + op->base.src.card_format = + gen6_get_card_format(op->base.src.pict_format); + op->base.src.width = src->drawable.width; + op->base.src.height = src->drawable.height; + + op->base.mask.bo = NULL; + + op->base.floats_per_vertex = 2; + op->base.floats_per_rect = 6; + + op->base.u.gen6.flags = COPY_FLAGS(alu); + assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); + assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); + + + gen6_emit_copy_state(sna, &op->base); + gen6_align_vertex(sna, &op->base); + + op->blt = gen6_render_copy_blt; + op->done = gen6_render_copy_done; + return true; +} + +#if 0 + +static void +gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + bool dirty; + + dirty = gen6_get_batch(sna, op); + + binding_table = gen6_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + true); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, 1, 1, + GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, + false); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += + sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset | dirty); +} + +static inline bool prefer_blt_fill(struct sna *sna, + struct kgem_bo *bo) +{ + if (PREFER_RENDER) + return PREFER_RENDER < 0; + + if (untiled_tlb_miss(bo)) + return true; + + return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0; +} + +static bool +gen6_render_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + uint32_t pixel; + + DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", + __FUNCTION__, op, + color->red, color->green, color->blue, color->alpha, (int)format)); + + if (op >= ARRAY_SIZE(gen6_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return false; + } + + if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) { + uint8_t alu = GXinvalid; + + if (op <= PictOpSrc) { + pixel = 0; + if (op == PictOpClear) + alu = GXclear; + else if (sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + format)) + alu = GXcopy; + } + + if (alu != GXinvalid && + sna_blt_fill_boxes(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + pixel, box, n)) + return true; + + if (!gen6_check_dst_format(format)) + return false; + } + + if (op == PictOpClear) { + pixel = 0; + op = PictOpSrc; + } else if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + PICT_a8r8g8b8)) + return false; + + DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", + __FUNCTION__, pixel, n, + box[0].x1, box[0].y1, box[0].x2, box[0].y2)); + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + tmp.dst.x = tmp.dst.y = 0; + tmp.damage = NULL; + + sna_render_composite_redirect_init(&tmp); + if (too_large(dst->drawable.width, dst->drawable.height)) { + BoxRec extents; + + boxes_extents(box, n, &extents); + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + return sna_tiling_fill_boxes(sna, op, format, color, + dst, dst_bo, box, n); + } + + tmp.src.bo = sna_render_get_solid(sna, pixel); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen6.flags = FILL_FLAGS(op, format); + assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); + assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); + + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); + } + + gen6_emit_fill_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + do { + int n_this_time; + int16_t *v; + + n_this_time = gen6_get_rectangles(sna, &tmp, n, + gen6_emit_fill_state); + n -= n_this_time; + + v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); + sna->render.vertex_used += 6 * n_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + do { + DBG((" (%d, %d), (%d, %d)\n", + box->x1, box->y1, box->x2, box->y2)); + + v[0] = box->x2; + v[5] = v[1] = box->y2; + v[8] = v[4] = box->x1; + v[9] = box->y1; + v[2] = v[3] = v[7] = 1; + v[6] = v[10] = v[11] = 0; + v += 12; box++; + } while (--n_this_time); + } while (n); + + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + sna_render_composite_redirect_done(sna, &tmp); + return true; +} + +static void +gen6_render_op_fill_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) +{ + int16_t *v; + + DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); + + gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = x+w; + v[4] = v[8] = x; + v[1] = v[5] = y+h; + v[9] = y; + + v[2] = v[3] = v[7] = 1; + v[6] = v[10] = v[11] = 0; +} + +fastcall static void +gen6_render_op_fill_box(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box) +{ + int16_t *v; + + DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = box->x2; + v[8] = v[4] = box->x1; + v[5] = v[1] = box->y2; + v[9] = box->y1; + + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; +} + +fastcall static void +gen6_render_op_fill_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, nbox)); + + do { + int nbox_this_time; + int16_t *v; + + nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, + gen6_emit_fill_state); + nbox -= nbox_this_time; + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6 * nbox_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + do { + v[0] = box->x2; + v[8] = v[4] = box->x1; + v[5] = v[1] = box->y2; + v[9] = box->y1; + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; + box++; v += 12; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) +{ + DBG(("%s()\n", __FUNCTION__)); + + assert(!sna->render.active); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, op->base.src.bo); +} + +static bool +gen6_render_fill(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *op) +{ + DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); + + if (prefer_blt_fill(sna, dst_bo) && + sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op)) + return true; + + if (!(alu == GXcopy || alu == GXclear) || + too_large(dst->drawable.width, dst->drawable.height)) + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op); + + if (alu == GXclear) + color = 0; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.dst.bo = dst_bo; + op->base.dst.x = op->base.dst.y = 0; + + op->base.src.bo = + sna_render_get_solid(sna, + sna_rgba_for_color(color, + dst->drawable.depth)); + op->base.mask.bo = NULL; + + op->base.need_magic_ca_pass = false; + op->base.floats_per_vertex = 2; + op->base.floats_per_rect = 6; + + op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; + assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); + assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); + + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); + } + + gen6_emit_fill_state(sna, &op->base); + gen6_align_vertex(sna, &op->base); + + op->blt = gen6_render_op_fill_blt; + op->box = gen6_render_op_fill_box; + op->boxes = gen6_render_op_fill_boxes; + op->done = gen6_render_op_fill_done; + return true; +} + +static bool +gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, + uint32_t color, + int16_t x1, int16_t y1, int16_t x2, int16_t y2, + uint8_t alu) +{ + BoxRec box; + + box.x1 = x1; + box.y1 = y1; + box.x2 = x2; + box.y2 = y2; + + return sna_blt_fill_boxes(sna, alu, + bo, dst->drawable.bitsPerPixel, + color, &box, 1); +} + +static bool +gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, + uint32_t color, + int16_t x1, int16_t y1, + int16_t x2, int16_t y2, + uint8_t alu) +{ + struct sna_composite_op tmp; + int16_t *v; + + /* Prefer to use the BLT if already engaged */ + if (prefer_blt_fill(sna, bo) && + gen6_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu)) + return true; + + /* Must use the BLT if we can't RENDER... */ + if (!(alu == GXcopy || alu == GXclear) || + too_large(dst->drawable.width, dst->drawable.height)) + return gen6_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu); + + if (alu == GXclear) + color = 0; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = bo; + tmp.dst.x = tmp.dst.y = 0; + + tmp.src.bo = + sna_render_get_solid(sna, + sna_rgba_for_color(color, + dst->drawable.depth)); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; + assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); + assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); + + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } + } + + gen6_emit_fill_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); + + DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = x2; + v[8] = v[4] = x1; + v[5] = v[1] = y2; + v[9] = y1; + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; + + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + + return true; +} + +static bool +gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) +{ + BoxRec box; + + box.x1 = 0; + box.y1 = 0; + box.x2 = dst->drawable.width; + box.y2 = dst->drawable.height; + + return sna_blt_fill_boxes(sna, GXclear, + bo, dst->drawable.bitsPerPixel, + 0, &box, 1); +} + +static bool +gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) +{ + struct sna_composite_op tmp; + int16_t *v; + + DBG(("%s: %dx%d\n", + __FUNCTION__, + dst->drawable.width, + dst->drawable.height)); + + /* Prefer to use the BLT if, and only if, already engaged */ + if (sna->kgem.ring == KGEM_BLT && + gen6_render_clear_try_blt(sna, dst, bo)) + return true; + + /* Must use the BLT if we can't RENDER... */ + if (too_large(dst->drawable.width, dst->drawable.height)) + return gen6_render_clear_try_blt(sna, dst, bo); + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = bo; + tmp.dst.x = tmp.dst.y = 0; + + tmp.src.bo = sna_render_get_solid(sna, 0); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; + assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); + assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); + + + + gen6_emit_fill_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst->drawable.width; + v[5] = v[1] = dst->drawable.height; + v[8] = v[4] = 0; + v[9] = 0; + + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; + + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + + return true; +} + +static void gen6_render_flush(struct sna *sna) +{ + gen4_vertex_close(sna); + + assert(sna->render.vb_id == 0); + assert(sna->render.vertex_offset == 0); +} + +#endif + +static void +gen6_render_retire(struct kgem *kgem) +{ + struct sna *sna; + + if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire)) + kgem->ring = kgem->mode; + + sna = container_of(kgem, struct sna, kgem); + if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { + DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + } +} + + +static void gen6_render_reset(struct sna *sna) +{ + sna->render_state.gen6.needs_invariant = true; + sna->render_state.gen6.first_state_packet = true; + sna->render_state.gen6.ve_id = 3 << 2; + sna->render_state.gen6.last_primitive = -1; + + sna->render_state.gen6.num_sf_outputs = 0; + sna->render_state.gen6.samplers = -1; + sna->render_state.gen6.blend = -1; + sna->render_state.gen6.kernel = -1; + sna->render_state.gen6.drawrect_offset = -1; + sna->render_state.gen6.drawrect_limit = -1; + sna->render_state.gen6.surface_table = -1; + + sna->render.vertex_offset = 0; + sna->render.nvertex_reloc = 0; + sna->render.vb_id = 0; +} + +static void gen6_render_fini(struct sna *sna) +{ +// kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); +} + +static bool is_gt2(struct sna *sna) +{ + return DEVICE_ID(sna->PciInfo) & 0x30; +} + +static bool is_mobile(struct sna *sna) +{ + return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6; +} + +static bool gen6_render_setup(struct sna *sna) +{ + struct gen6_render_state *state = &sna->render_state.gen6; + struct sna_static_stream general; + struct gen6_sampler_state *ss; + int i, j, k, l, m; + + state->info = >1_info; + if (is_gt2(sna)) + state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ + + sna_static_stream_init(&general); + + /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer + * dumps, you know it points to zero. + */ + null_create(&general); + scratch_create(&general); + + for (m = 0; m < GEN6_KERNEL_COUNT; m++) { + if (wm_kernels[m].size) { + state->wm_kernel[m][1] = + sna_static_stream_add(&general, + wm_kernels[m].data, + wm_kernels[m].size, + 64); + } else { + if (USE_8_PIXEL_DISPATCH) { + state->wm_kernel[m][0] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 8); + } + + if (USE_16_PIXEL_DISPATCH) { + state->wm_kernel[m][1] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 16); + } + + if (USE_32_PIXEL_DISPATCH) { + state->wm_kernel[m][2] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 32); + } + } + if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { + state->wm_kernel[m][1] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 16); + } + } + + ss = sna_static_stream_map(&general, + 2 * sizeof(*ss) * + (2 + + FILTER_COUNT * EXTEND_COUNT * + FILTER_COUNT * EXTEND_COUNT), + 32); + state->wm_state = sna_static_stream_offsetof(&general, ss); + sampler_copy_init(ss); ss += 2; + sampler_fill_init(ss); ss += 2; + for (i = 0; i < FILTER_COUNT; i++) { + for (j = 0; j < EXTEND_COUNT; j++) { + for (k = 0; k < FILTER_COUNT; k++) { + for (l = 0; l < EXTEND_COUNT; l++) { + sampler_state_init(ss++, i, j); + sampler_state_init(ss++, k, l); + } + } + } + } + + state->cc_blend = gen6_composite_create_blend_state(&general); + + state->general_bo = sna_static_stream_fini(sna, &general); + return state->general_bo != NULL; +} + +bool gen6_render_init(struct sna *sna) +{ + if (!gen6_render_setup(sna)) + return false; + +// sna->kgem.context_switch = gen6_render_context_switch; + sna->kgem.retire = gen6_render_retire; + +// sna->render.composite = gen6_render_composite; +// sna->render.video = gen6_render_video; + +// sna->render.copy_boxes = gen6_render_copy_boxes; + sna->render.copy = gen6_render_copy; + +// sna->render.fill_boxes = gen6_render_fill_boxes; +// sna->render.fill = gen6_render_fill; +// sna->render.fill_one = gen6_render_fill_one; +// sna->render.clear = gen6_render_clear; + +// sna->render.flush = gen6_render_flush; + sna->render.reset = gen6_render_reset; + sna->render.fini = gen6_render_fini; + + sna->render.max_3d_size = GEN6_MAX_SIZE; + sna->render.max_3d_pitch = 1 << 18; + return true; +} + + +void gen4_vertex_flush(struct sna *sna) +{ + DBG(("%s[%x] = %d\n", __FUNCTION__, + 4*sna->render.vertex_offset, + sna->render.vertex_index - sna->render.vertex_start)); + + assert(sna->render.vertex_offset); + assert(sna->render.vertex_index > sna->render.vertex_start); + + sna->kgem.batch[sna->render.vertex_offset] = + sna->render.vertex_index - sna->render.vertex_start; + sna->render.vertex_offset = 0; +} + +int gen4_vertex_finish(struct sna *sna) +{ + struct kgem_bo *bo; + unsigned int i; + unsigned hint, size; + + DBG(("%s: used=%d / %d\n", __FUNCTION__, + sna->render.vertex_used, sna->render.vertex_size)); + assert(sna->render.vertex_offset == 0); + assert(sna->render.vertex_used); + +// sna_vertex_wait__locked(&sna->render); + + /* Note: we only need dword alignment (currently) */ + + bo = sna->render.vbo; + if (bo) { + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); + + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + 0); + } + + assert(!sna->render.active); + sna->render.nvertex_reloc = 0; + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + sna->render.vbo = NULL; + sna->render.vb_id = 0; + + kgem_bo_destroy(&sna->kgem, bo); + } + + hint = CREATE_GTT_MAP; + if (bo) + hint |= CREATE_CACHED | CREATE_NO_THROTTLE; + + size = 256*1024; + assert(!sna->render.active); + sna->render.vertices = NULL; + sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); + while (sna->render.vbo == NULL && size > 16*1024) { + size /= 2; + sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); + } + if (sna->render.vbo == NULL) + sna->render.vbo = kgem_create_linear(&sna->kgem, + 256*1024, CREATE_GTT_MAP); + if (sna->render.vbo) + sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); + if (sna->render.vertices == NULL) { + if (sna->render.vbo) { + kgem_bo_destroy(&sna->kgem, sna->render.vbo); + sna->render.vbo = NULL; + } + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + return 0; + } + + if (sna->render.vertex_used) { + DBG(("%s: copying initial buffer x %d to handle=%d\n", + __FUNCTION__, + sna->render.vertex_used, + sna->render.vbo->handle)); + assert(sizeof(float)*sna->render.vertex_used <= + __kgem_bo_size(sna->render.vbo)); + memcpy(sna->render.vertices, + sna->render.vertex_data, + sizeof(float)*sna->render.vertex_used); + } + + size = __kgem_bo_size(sna->render.vbo)/4; + if (size >= UINT16_MAX) + size = UINT16_MAX - 1; + + DBG(("%s: create vbo handle=%d, size=%d\n", + __FUNCTION__, sna->render.vbo->handle, size)); + + sna->render.vertex_size = size; + return sna->render.vertex_size - sna->render.vertex_used; +} + +void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) +{ + return NULL; +}; + +bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, + const void *data, int length) +{ + + return false; +} + diff --git a/drivers/video/Intel-2D/gen6_render.h b/drivers/video/Intel-2D/gen6_render.h new file mode 100644 index 0000000000..2201a62780 --- /dev/null +++ b/drivers/video/Intel-2D/gen6_render.h @@ -0,0 +1,1563 @@ +#ifndef GEN6_RENDER_H +#define GEN6_RENDER_H + +#define GEN6_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low)) + +#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1) +#define GEN6_STATE_SIP GEN6_3D(0, 1, 2) + +#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4) + +#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0) +#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0) + +#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1) +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */ + +#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8) +#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9) +#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa) +#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb) + +#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0) +#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1) +#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2) +#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4) +#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5) +# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29 +# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18 + +#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6) +#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7) +#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8) +#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9) +/* These two are BLC and CTG only, not BW or CL */ +#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa) +#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb) + +#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0) + +#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10) +/* DW1 */ +# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15) + +#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8) + +#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05) +/* DW1 */ +# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16 +# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0 +/* DW2 */ +# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0 + +#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10) + +#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e) + +#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10) + +#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11) +/* DW4 */ +# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0 + +#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12) + +#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13) +/* DW1 */ +# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +/* DW3 */ +# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29) +# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29) +# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29) +# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29) +/* DW4 */ +# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 + +#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14) +/* DW2 */ +# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT 16 +# define GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT 8 +# define GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25 +# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_3DSTATE_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) + + +#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15) +#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16) +#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17) + +#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18) + +#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d) +/* DW1 */ +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) + +#define PIPELINE_SELECT_3D 0 +#define PIPELINE_SELECT_MEDIA 1 + +/* for GEN6_STATE_BASE_ADDRESS */ +#define BASE_ADDRESS_MODIFY (1 << 0) + +/* VERTEX_BUFFER_STATE Structure */ +#define VB0_BUFFER_INDEX_SHIFT 26 +#define VB0_VERTEXDATA (0 << 20) +#define VB0_INSTANCEDATA (1 << 20) +#define VB0_BUFFER_PITCH_SHIFT 0 + +/* VERTEX_ELEMENT_STATE Structure */ +#define VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */ +#define VE0_VALID (1 << 25) /* for GEN6 */ +#define VE0_FORMAT_SHIFT 16 +#define VE0_OFFSET_SHIFT 0 +#define VE1_VFCOMPONENT_0_SHIFT 28 +#define VE1_VFCOMPONENT_1_SHIFT 24 +#define VE1_VFCOMPONENT_2_SHIFT 20 +#define VE1_VFCOMPONENT_3_SHIFT 16 +#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 + +/* 3DPRIMITIVE bits */ +#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) +#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15) +/* Primitive types are in gen6_defines.h */ +#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10 + +#define GEN6_SVG_CTL 0x7400 + +#define GEN6_SVG_CTL_GS_BA (0 << 8) +#define GEN6_SVG_CTL_SS_BA (1 << 8) +#define GEN6_SVG_CTL_IO_BA (2 << 8) +#define GEN6_SVG_CTL_GS_AUB (3 << 8) +#define GEN6_SVG_CTL_IO_AUB (4 << 8) +#define GEN6_SVG_CTL_SIP (5 << 8) + +#define GEN6_SVG_RDATA 0x7404 +#define GEN6_SVG_WORK_CTL 0x7408 + +#define GEN6_VF_CTL 0x7500 + +#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8) +#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8) +#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4) +#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4) +#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3) +#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2) +#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1) +#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_VF_STRG_VAL 0x7504 +#define GEN6_VF_STR_VL_OVR 0x7508 +#define GEN6_VF_VC_OVR 0x750c +#define GEN6_VF_STR_PSKIP 0x7510 +#define GEN6_VF_MAX_PRIM 0x7514 +#define GEN6_VF_RDATA 0x7518 + +#define GEN6_VS_CTL 0x7600 +#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8) +#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_VS_STRG_VAL 0x7604 +#define GEN6_VS_RDATA 0x7608 + +#define GEN6_SF_CTL 0x7b00 +#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8) +#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4) +#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3) +#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_SF_STRG_VAL 0x7b04 +#define GEN6_SF_RDATA 0x7b18 + +#define GEN6_WIZ_CTL 0x7c00 +#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16 +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8) +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8) +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8) +#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6) +#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5) +#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4) +#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3) +#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_WIZ_STRG_VAL 0x7c04 +#define GEN6_WIZ_RDATA 0x7c18 + +#define GEN6_TS_CTL 0x7e00 +#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8) +#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8) +#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2) +#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1) +#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_TS_STRG_VAL 0x7e04 +#define GEN6_TS_RDATA 0x7e08 + +#define GEN6_TD_CTL 0x8000 +#define GEN6_TD_CTL_MUX_SHIFT 8 +#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7) +#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6) +#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5) +#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4) +#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2) +#define GEN6_TD_CTL2 0x8004 +#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28) +#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26) +#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25) +#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16 +#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8) +#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7) +#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6) +#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5) +#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4) +#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3) +#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0) +#define GEN6_TD_VF_VS_EMSK 0x8008 +#define GEN6_TD_GS_EMSK 0x800c +#define GEN6_TD_CLIP_EMSK 0x8010 +#define GEN6_TD_SF_EMSK 0x8014 +#define GEN6_TD_WIZ_EMSK 0x8018 +#define GEN6_TD_0_6_EHTRG_VAL 0x801c +#define GEN6_TD_0_7_EHTRG_VAL 0x8020 +#define GEN6_TD_0_6_EHTRG_MSK 0x8024 +#define GEN6_TD_0_7_EHTRG_MSK 0x8028 +#define GEN6_TD_RDATA 0x802c +#define GEN6_TD_TS_EMSK 0x8030 + +#define GEN6_EU_CTL 0x8800 +#define GEN6_EU_CTL_SELECT_SHIFT 16 +#define GEN6_EU_CTL_DATA_MUX_SHIFT 8 +#define GEN6_EU_ATT_0 0x8810 +#define GEN6_EU_ATT_1 0x8814 +#define GEN6_EU_ATT_DATA_0 0x8820 +#define GEN6_EU_ATT_DATA_1 0x8824 +#define GEN6_EU_ATT_CLR_0 0x8830 +#define GEN6_EU_ATT_CLR_1 0x8834 +#define GEN6_EU_RDATA 0x8840 + +#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1) +#define GEN6_STATE_SIP GEN6_3D(0, 1, 2) + +#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4) + +#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0) +#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0) + +#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1) +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */ + +#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8) +#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9) +#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa) +#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb) + +#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0) +#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1) +#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2) +#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4) +#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5) +# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29 +# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18 + +#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6) +#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7) +#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8) +#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9) +/* These two are BLC and CTG only, not BW or CL */ +#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa) +#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb) + +#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0) + +#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10) +/* DW1 */ +# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15) + +/* for GEN6+ */ +#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8) + +#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05) +/* DW1 */ +# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16 +# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0 +/* DW2 */ +# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0 + +#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10) + +#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e) + +#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10) + +#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11) +/* DW4 */ +# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0 + +#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12) + +#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13) +/* DW1 */ +# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +/* DW3 */ +# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29) +# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29) +# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29) +# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29) +/* DW4 */ +# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 + + +#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14) +/* DW2 */ +# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27 +# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16 +/* DW5 */ +# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25 +# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) + + +#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15) +#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16) +#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17) + +#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18) + +#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d) +/* DW1 */ +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) + +#define PIPELINE_SELECT_3D 0 +#define PIPELINE_SELECT_MEDIA 1 + +#define UF0_CS_REALLOC (1 << 13) +#define UF0_VFE_REALLOC (1 << 12) +#define UF0_SF_REALLOC (1 << 11) +#define UF0_CLIP_REALLOC (1 << 10) +#define UF0_GS_REALLOC (1 << 9) +#define UF0_VS_REALLOC (1 << 8) +#define UF1_CLIP_FENCE_SHIFT 20 +#define UF1_GS_FENCE_SHIFT 10 +#define UF1_VS_FENCE_SHIFT 0 +#define UF2_CS_FENCE_SHIFT 20 +#define UF2_VFE_FENCE_SHIFT 10 +#define UF2_SF_FENCE_SHIFT 0 + +/* for GEN6_STATE_BASE_ADDRESS */ +#define BASE_ADDRESS_MODIFY (1 << 0) + +/* for GEN6_3DSTATE_PIPELINED_POINTERS */ +#define GEN6_GS_DISABLE 0 +#define GEN6_GS_ENABLE 1 +#define GEN6_CLIP_DISABLE 0 +#define GEN6_CLIP_ENABLE 1 + +/* for GEN6_PIPE_CONTROL */ +#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0) +#define GEN6_PIPE_CONTROL_CS_STALL (1 << 20) +#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14) +#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10) +#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) +#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + +/* 3DPRIMITIVE bits */ +#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) +#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15) +/* Primitive types are in gen6_defines.h */ +#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10 + +#define GEN6_SVG_CTL 0x7400 + +#define GEN6_SVG_CTL_GS_BA (0 << 8) +#define GEN6_SVG_CTL_SS_BA (1 << 8) +#define GEN6_SVG_CTL_IO_BA (2 << 8) +#define GEN6_SVG_CTL_GS_AUB (3 << 8) +#define GEN6_SVG_CTL_IO_AUB (4 << 8) +#define GEN6_SVG_CTL_SIP (5 << 8) + +#define GEN6_SVG_RDATA 0x7404 +#define GEN6_SVG_WORK_CTL 0x7408 + +#define GEN6_VF_CTL 0x7500 + +#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8) +#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8) +#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4) +#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4) +#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3) +#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2) +#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1) +#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_VF_STRG_VAL 0x7504 +#define GEN6_VF_STR_VL_OVR 0x7508 +#define GEN6_VF_VC_OVR 0x750c +#define GEN6_VF_STR_PSKIP 0x7510 +#define GEN6_VF_MAX_PRIM 0x7514 +#define GEN6_VF_RDATA 0x7518 + +#define GEN6_VS_CTL 0x7600 +#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8) +#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_VS_STRG_VAL 0x7604 +#define GEN6_VS_RDATA 0x7608 + +#define GEN6_SF_CTL 0x7b00 +#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8) +#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4) +#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3) +#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_SF_STRG_VAL 0x7b04 +#define GEN6_SF_RDATA 0x7b18 + +#define GEN6_WIZ_CTL 0x7c00 +#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16 +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8) +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8) +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8) +#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6) +#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5) +#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4) +#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3) +#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_WIZ_STRG_VAL 0x7c04 +#define GEN6_WIZ_RDATA 0x7c18 + +#define GEN6_TS_CTL 0x7e00 +#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8) +#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8) +#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2) +#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1) +#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_TS_STRG_VAL 0x7e04 +#define GEN6_TS_RDATA 0x7e08 + +#define GEN6_TD_CTL 0x8000 +#define GEN6_TD_CTL_MUX_SHIFT 8 +#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7) +#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6) +#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5) +#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4) +#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2) +#define GEN6_TD_CTL2 0x8004 +#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28) +#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26) +#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25) +#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16 +#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8) +#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7) +#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6) +#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5) +#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4) +#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3) +#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0) +#define GEN6_TD_VF_VS_EMSK 0x8008 +#define GEN6_TD_GS_EMSK 0x800c +#define GEN6_TD_CLIP_EMSK 0x8010 +#define GEN6_TD_SF_EMSK 0x8014 +#define GEN6_TD_WIZ_EMSK 0x8018 +#define GEN6_TD_0_6_EHTRG_VAL 0x801c +#define GEN6_TD_0_7_EHTRG_VAL 0x8020 +#define GEN6_TD_0_6_EHTRG_MSK 0x8024 +#define GEN6_TD_0_7_EHTRG_MSK 0x8028 +#define GEN6_TD_RDATA 0x802c +#define GEN6_TD_TS_EMSK 0x8030 + +#define GEN6_EU_CTL 0x8800 +#define GEN6_EU_CTL_SELECT_SHIFT 16 +#define GEN6_EU_CTL_DATA_MUX_SHIFT 8 +#define GEN6_EU_ATT_0 0x8810 +#define GEN6_EU_ATT_1 0x8814 +#define GEN6_EU_ATT_DATA_0 0x8820 +#define GEN6_EU_ATT_DATA_1 0x8824 +#define GEN6_EU_ATT_CLR_0 0x8830 +#define GEN6_EU_ATT_CLR_1 0x8834 +#define GEN6_EU_RDATA 0x8840 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define GEN6_ANISORATIO_2 0 +#define GEN6_ANISORATIO_4 1 +#define GEN6_ANISORATIO_6 2 +#define GEN6_ANISORATIO_8 3 +#define GEN6_ANISORATIO_10 4 +#define GEN6_ANISORATIO_12 5 +#define GEN6_ANISORATIO_14 6 +#define GEN6_ANISORATIO_16 7 + +#define GEN6_BLENDFACTOR_ONE 0x1 +#define GEN6_BLENDFACTOR_SRC_COLOR 0x2 +#define GEN6_BLENDFACTOR_SRC_ALPHA 0x3 +#define GEN6_BLENDFACTOR_DST_ALPHA 0x4 +#define GEN6_BLENDFACTOR_DST_COLOR 0x5 +#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define GEN6_BLENDFACTOR_CONST_COLOR 0x7 +#define GEN6_BLENDFACTOR_CONST_ALPHA 0x8 +#define GEN6_BLENDFACTOR_SRC1_COLOR 0x9 +#define GEN6_BLENDFACTOR_SRC1_ALPHA 0x0A +#define GEN6_BLENDFACTOR_ZERO 0x11 +#define GEN6_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define GEN6_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define GEN6_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define GEN6_BLENDFACTOR_INV_DST_COLOR 0x15 +#define GEN6_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define GEN6_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define GEN6_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define GEN6_BLENDFUNCTION_ADD 0 +#define GEN6_BLENDFUNCTION_SUBTRACT 1 +#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define GEN6_BLENDFUNCTION_MIN 3 +#define GEN6_BLENDFUNCTION_MAX 4 + +#define GEN6_ALPHATEST_FORMAT_UNORM8 0 +#define GEN6_ALPHATEST_FORMAT_FLOAT32 1 + +#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define GEN6_CHROMAKEY_REPLACE_BLACK 1 + +#define GEN6_CLIP_API_OGL 0 +#define GEN6_CLIP_API_DX 1 + +#define GEN6_CLIPMODE_NORMAL 0 +#define GEN6_CLIPMODE_CLIP_ALL 1 +#define GEN6_CLIPMODE_CLIP_NON_REJECTED 2 +#define GEN6_CLIPMODE_REJECT_ALL 3 +#define GEN6_CLIPMODE_ACCEPT_ALL 4 + +#define GEN6_CLIP_NDCSPACE 0 +#define GEN6_CLIP_SCREENSPACE 1 + +#define GEN6_COMPAREFUNCTION_ALWAYS 0 +#define GEN6_COMPAREFUNCTION_NEVER 1 +#define GEN6_COMPAREFUNCTION_LESS 2 +#define GEN6_COMPAREFUNCTION_EQUAL 3 +#define GEN6_COMPAREFUNCTION_LEQUAL 4 +#define GEN6_COMPAREFUNCTION_GREATER 5 +#define GEN6_COMPAREFUNCTION_NOTEQUAL 6 +#define GEN6_COMPAREFUNCTION_GEQUAL 7 + +#define GEN6_COVERAGE_PIXELS_HALF 0 +#define GEN6_COVERAGE_PIXELS_1 1 +#define GEN6_COVERAGE_PIXELS_2 2 +#define GEN6_COVERAGE_PIXELS_4 3 + +#define GEN6_CULLMODE_BOTH 0 +#define GEN6_CULLMODE_NONE 1 +#define GEN6_CULLMODE_FRONT 2 +#define GEN6_CULLMODE_BACK 3 + +#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define GEN6_DEPTHFORMAT_D32_FLOAT 1 +#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define GEN6_DEPTHFORMAT_D16_UNORM 5 + +#define GEN6_FLOATING_POINT_IEEE_754 0 +#define GEN6_FLOATING_POINT_NON_IEEE_754 1 + +#define GEN6_FRONTWINDING_CW 0 +#define GEN6_FRONTWINDING_CCW 1 + +#define GEN6_INDEX_BYTE 0 +#define GEN6_INDEX_WORD 1 +#define GEN6_INDEX_DWORD 2 + +#define GEN6_LOGICOPFUNCTION_CLEAR 0 +#define GEN6_LOGICOPFUNCTION_NOR 1 +#define GEN6_LOGICOPFUNCTION_AND_INVERTED 2 +#define GEN6_LOGICOPFUNCTION_COPY_INVERTED 3 +#define GEN6_LOGICOPFUNCTION_AND_REVERSE 4 +#define GEN6_LOGICOPFUNCTION_INVERT 5 +#define GEN6_LOGICOPFUNCTION_XOR 6 +#define GEN6_LOGICOPFUNCTION_NAND 7 +#define GEN6_LOGICOPFUNCTION_AND 8 +#define GEN6_LOGICOPFUNCTION_EQUIV 9 +#define GEN6_LOGICOPFUNCTION_NOOP 10 +#define GEN6_LOGICOPFUNCTION_OR_INVERTED 11 +#define GEN6_LOGICOPFUNCTION_COPY 12 +#define GEN6_LOGICOPFUNCTION_OR_REVERSE 13 +#define GEN6_LOGICOPFUNCTION_OR 14 +#define GEN6_LOGICOPFUNCTION_SET 15 + +#define GEN6_MAPFILTER_NEAREST 0x0 +#define GEN6_MAPFILTER_LINEAR 0x1 +#define GEN6_MAPFILTER_ANISOTROPIC 0x2 + +#define GEN6_MIPFILTER_NONE 0 +#define GEN6_MIPFILTER_NEAREST 1 +#define GEN6_MIPFILTER_LINEAR 3 + +#define GEN6_POLYGON_FRONT_FACING 0 +#define GEN6_POLYGON_BACK_FACING 1 + +#define GEN6_PREFILTER_ALWAYS 0x0 +#define GEN6_PREFILTER_NEVER 0x1 +#define GEN6_PREFILTER_LESS 0x2 +#define GEN6_PREFILTER_EQUAL 0x3 +#define GEN6_PREFILTER_LEQUAL 0x4 +#define GEN6_PREFILTER_GREATER 0x5 +#define GEN6_PREFILTER_NOTEQUAL 0x6 +#define GEN6_PREFILTER_GEQUAL 0x7 + +#define GEN6_PROVOKING_VERTEX_0 0 +#define GEN6_PROVOKING_VERTEX_1 1 +#define GEN6_PROVOKING_VERTEX_2 2 + +#define GEN6_RASTRULE_UPPER_LEFT 0 +#define GEN6_RASTRULE_UPPER_RIGHT 1 + +#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define GEN6_STENCILOP_KEEP 0 +#define GEN6_STENCILOP_ZERO 1 +#define GEN6_STENCILOP_REPLACE 2 +#define GEN6_STENCILOP_INCRSAT 3 +#define GEN6_STENCILOP_DECRSAT 4 +#define GEN6_STENCILOP_INCR 5 +#define GEN6_STENCILOP_DECR 6 +#define GEN6_STENCILOP_INVERT 7 + +#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define GEN6_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define GEN6_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define GEN6_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define GEN6_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define GEN6_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define GEN6_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define GEN6_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define GEN6_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define GEN6_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define GEN6_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define GEN6_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define GEN6_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define GEN6_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define GEN6_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define GEN6_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define GEN6_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define GEN6_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define GEN6_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define GEN6_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define GEN6_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define GEN6_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define GEN6_SURFACEFORMAT_R32G32_SINT 0x086 +#define GEN6_SURFACEFORMAT_R32G32_UINT 0x087 +#define GEN6_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define GEN6_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define GEN6_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define GEN6_SURFACEFORMAT_R32G32_UNORM 0x08B +#define GEN6_SURFACEFORMAT_R32G32_SNORM 0x08C +#define GEN6_SURFACEFORMAT_R64_FLOAT 0x08D +#define GEN6_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define GEN6_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define GEN6_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define GEN6_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define GEN6_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define GEN6_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define GEN6_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define GEN6_SURFACEFORMAT_R32G32_USCALED 0x096 +#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define GEN6_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define GEN6_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define GEN6_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define GEN6_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define GEN6_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define GEN6_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define GEN6_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define GEN6_SURFACEFORMAT_R16G16_SINT 0x0CE +#define GEN6_SURFACEFORMAT_R16G16_UINT 0x0CF +#define GEN6_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define GEN6_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define GEN6_SURFACEFORMAT_R32_SINT 0x0D6 +#define GEN6_SURFACEFORMAT_R32_UINT 0x0D7 +#define GEN6_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define GEN6_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define GEN6_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define GEN6_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define GEN6_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define GEN6_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define GEN6_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define GEN6_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define GEN6_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define GEN6_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define GEN6_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define GEN6_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define GEN6_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define GEN6_SURFACEFORMAT_R32_UNORM 0x0F1 +#define GEN6_SURFACEFORMAT_R32_SNORM 0x0F2 +#define GEN6_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define GEN6_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define GEN6_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define GEN6_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define GEN6_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define GEN6_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define GEN6_SURFACEFORMAT_R32_USCALED 0x0F9 +#define GEN6_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define GEN6_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define GEN6_SURFACEFORMAT_R8G8_UNORM 0x106 +#define GEN6_SURFACEFORMAT_R8G8_SNORM 0x107 +#define GEN6_SURFACEFORMAT_R8G8_SINT 0x108 +#define GEN6_SURFACEFORMAT_R8G8_UINT 0x109 +#define GEN6_SURFACEFORMAT_R16_UNORM 0x10A +#define GEN6_SURFACEFORMAT_R16_SNORM 0x10B +#define GEN6_SURFACEFORMAT_R16_SINT 0x10C +#define GEN6_SURFACEFORMAT_R16_UINT 0x10D +#define GEN6_SURFACEFORMAT_R16_FLOAT 0x10E +#define GEN6_SURFACEFORMAT_I16_UNORM 0x111 +#define GEN6_SURFACEFORMAT_L16_UNORM 0x112 +#define GEN6_SURFACEFORMAT_A16_UNORM 0x113 +#define GEN6_SURFACEFORMAT_L8A8_UNORM 0x114 +#define GEN6_SURFACEFORMAT_I16_FLOAT 0x115 +#define GEN6_SURFACEFORMAT_L16_FLOAT 0x116 +#define GEN6_SURFACEFORMAT_A16_FLOAT 0x117 +#define GEN6_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define GEN6_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define GEN6_SURFACEFORMAT_R8G8_USCALED 0x11D +#define GEN6_SURFACEFORMAT_R16_SSCALED 0x11E +#define GEN6_SURFACEFORMAT_R16_USCALED 0x11F +#define GEN6_SURFACEFORMAT_R8_UNORM 0x140 +#define GEN6_SURFACEFORMAT_R8_SNORM 0x141 +#define GEN6_SURFACEFORMAT_R8_SINT 0x142 +#define GEN6_SURFACEFORMAT_R8_UINT 0x143 +#define GEN6_SURFACEFORMAT_A8_UNORM 0x144 +#define GEN6_SURFACEFORMAT_I8_UNORM 0x145 +#define GEN6_SURFACEFORMAT_L8_UNORM 0x146 +#define GEN6_SURFACEFORMAT_P4A4_UNORM 0x147 +#define GEN6_SURFACEFORMAT_A4P4_UNORM 0x148 +#define GEN6_SURFACEFORMAT_R8_SSCALED 0x149 +#define GEN6_SURFACEFORMAT_R8_USCALED 0x14A +#define GEN6_SURFACEFORMAT_R1_UINT 0x181 +#define GEN6_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define GEN6_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define GEN6_SURFACEFORMAT_BC1_UNORM 0x186 +#define GEN6_SURFACEFORMAT_BC2_UNORM 0x187 +#define GEN6_SURFACEFORMAT_BC3_UNORM 0x188 +#define GEN6_SURFACEFORMAT_BC4_UNORM 0x189 +#define GEN6_SURFACEFORMAT_BC5_UNORM 0x18A +#define GEN6_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define GEN6_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define GEN6_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define GEN6_SURFACEFORMAT_MONO8 0x18E +#define GEN6_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define GEN6_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define GEN6_SURFACEFORMAT_DXT1_RGB 0x191 +#define GEN6_SURFACEFORMAT_FXT1 0x192 +#define GEN6_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define GEN6_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define GEN6_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define GEN6_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define GEN6_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define GEN6_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define GEN6_SURFACEFORMAT_BC4_SNORM 0x199 +#define GEN6_SURFACEFORMAT_BC5_SNORM 0x19A +#define GEN6_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define GEN6_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define GEN6_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define GEN6_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define GEN6_SURFACERETURNFORMAT_FLOAT32 0 +#define GEN6_SURFACERETURNFORMAT_S1 1 + +#define GEN6_SURFACE_1D 0 +#define GEN6_SURFACE_2D 1 +#define GEN6_SURFACE_3D 2 +#define GEN6_SURFACE_CUBE 3 +#define GEN6_SURFACE_BUFFER 4 +#define GEN6_SURFACE_NULL 7 + +#define GEN6_BORDER_COLOR_MODE_DEFAULT 0 +#define GEN6_BORDER_COLOR_MODE_LEGACY 1 + +#define GEN6_TEXCOORDMODE_WRAP 0 +#define GEN6_TEXCOORDMODE_MIRROR 1 +#define GEN6_TEXCOORDMODE_CLAMP 2 +#define GEN6_TEXCOORDMODE_CUBE 3 +#define GEN6_TEXCOORDMODE_CLAMP_BORDER 4 +#define GEN6_TEXCOORDMODE_MIRROR_ONCE 5 + +#define GEN6_THREAD_PRIORITY_NORMAL 0 +#define GEN6_THREAD_PRIORITY_HIGH 1 + +#define GEN6_TILEWALK_XMAJOR 0 +#define GEN6_TILEWALK_YMAJOR 1 + +#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define GEN6_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define GEN6_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define GEN6_VFCOMPONENT_NOSTORE 0 +#define GEN6_VFCOMPONENT_STORE_SRC 1 +#define GEN6_VFCOMPONENT_STORE_0 2 +#define GEN6_VFCOMPONENT_STORE_1_FLT 3 +#define GEN6_VFCOMPONENT_STORE_1_INT 4 +#define GEN6_VFCOMPONENT_STORE_VID 5 +#define GEN6_VFCOMPONENT_STORE_IID 6 +#define GEN6_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define GEN6_ALIGN_1 0 +#define GEN6_ALIGN_16 1 + +#define GEN6_ADDRESS_DIRECT 0 +#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define GEN6_CHANNEL_X 0 +#define GEN6_CHANNEL_Y 1 +#define GEN6_CHANNEL_Z 2 +#define GEN6_CHANNEL_W 3 + +#define GEN6_COMPRESSION_NONE 0 +#define GEN6_COMPRESSION_2NDHALF 1 +#define GEN6_COMPRESSION_COMPRESSED 2 + +#define GEN6_CONDITIONAL_NONE 0 +#define GEN6_CONDITIONAL_Z 1 +#define GEN6_CONDITIONAL_NZ 2 +#define GEN6_CONDITIONAL_EQ 1 /* Z */ +#define GEN6_CONDITIONAL_NEQ 2 /* NZ */ +#define GEN6_CONDITIONAL_G 3 +#define GEN6_CONDITIONAL_GE 4 +#define GEN6_CONDITIONAL_L 5 +#define GEN6_CONDITIONAL_LE 6 +#define GEN6_CONDITIONAL_C 7 +#define GEN6_CONDITIONAL_O 8 + +#define GEN6_DEBUG_NONE 0 +#define GEN6_DEBUG_BREAKPOINT 1 + +#define GEN6_DEPENDENCY_NORMAL 0 +#define GEN6_DEPENDENCY_NOTCLEARED 1 +#define GEN6_DEPENDENCY_NOTCHECKED 2 +#define GEN6_DEPENDENCY_DISABLE 3 + +#define GEN6_EXECUTE_1 0 +#define GEN6_EXECUTE_2 1 +#define GEN6_EXECUTE_4 2 +#define GEN6_EXECUTE_8 3 +#define GEN6_EXECUTE_16 4 +#define GEN6_EXECUTE_32 5 + +#define GEN6_HORIZONTAL_STRIDE_0 0 +#define GEN6_HORIZONTAL_STRIDE_1 1 +#define GEN6_HORIZONTAL_STRIDE_2 2 +#define GEN6_HORIZONTAL_STRIDE_4 3 + +#define GEN6_INSTRUCTION_NORMAL 0 +#define GEN6_INSTRUCTION_SATURATE 1 + +#define GEN6_MASK_ENABLE 0 +#define GEN6_MASK_DISABLE 1 + +#define GEN6_OPCODE_MOV 1 +#define GEN6_OPCODE_SEL 2 +#define GEN6_OPCODE_NOT 4 +#define GEN6_OPCODE_AND 5 +#define GEN6_OPCODE_OR 6 +#define GEN6_OPCODE_XOR 7 +#define GEN6_OPCODE_SHR 8 +#define GEN6_OPCODE_SHL 9 +#define GEN6_OPCODE_RSR 10 +#define GEN6_OPCODE_RSL 11 +#define GEN6_OPCODE_ASR 12 +#define GEN6_OPCODE_CMP 16 +#define GEN6_OPCODE_JMPI 32 +#define GEN6_OPCODE_IF 34 +#define GEN6_OPCODE_IFF 35 +#define GEN6_OPCODE_ELSE 36 +#define GEN6_OPCODE_ENDIF 37 +#define GEN6_OPCODE_DO 38 +#define GEN6_OPCODE_WHILE 39 +#define GEN6_OPCODE_BREAK 40 +#define GEN6_OPCODE_CONTINUE 41 +#define GEN6_OPCODE_HALT 42 +#define GEN6_OPCODE_MSAVE 44 +#define GEN6_OPCODE_MRESTORE 45 +#define GEN6_OPCODE_PUSH 46 +#define GEN6_OPCODE_POP 47 +#define GEN6_OPCODE_WAIT 48 +#define GEN6_OPCODE_SEND 49 +#define GEN6_OPCODE_ADD 64 +#define GEN6_OPCODE_MUL 65 +#define GEN6_OPCODE_AVG 66 +#define GEN6_OPCODE_FRC 67 +#define GEN6_OPCODE_RNDU 68 +#define GEN6_OPCODE_RNDD 69 +#define GEN6_OPCODE_RNDE 70 +#define GEN6_OPCODE_RNDZ 71 +#define GEN6_OPCODE_MAC 72 +#define GEN6_OPCODE_MACH 73 +#define GEN6_OPCODE_LZD 74 +#define GEN6_OPCODE_SAD2 80 +#define GEN6_OPCODE_SADA2 81 +#define GEN6_OPCODE_DP4 84 +#define GEN6_OPCODE_DPH 85 +#define GEN6_OPCODE_DP3 86 +#define GEN6_OPCODE_DP2 87 +#define GEN6_OPCODE_DPA2 88 +#define GEN6_OPCODE_LINE 89 +#define GEN6_OPCODE_NOP 126 + +#define GEN6_PREDICATE_NONE 0 +#define GEN6_PREDICATE_NORMAL 1 +#define GEN6_PREDICATE_ALIGN1_ANYV 2 +#define GEN6_PREDICATE_ALIGN1_ALLV 3 +#define GEN6_PREDICATE_ALIGN1_ANY2H 4 +#define GEN6_PREDICATE_ALIGN1_ALL2H 5 +#define GEN6_PREDICATE_ALIGN1_ANY4H 6 +#define GEN6_PREDICATE_ALIGN1_ALL4H 7 +#define GEN6_PREDICATE_ALIGN1_ANY8H 8 +#define GEN6_PREDICATE_ALIGN1_ALL8H 9 +#define GEN6_PREDICATE_ALIGN1_ANY16H 10 +#define GEN6_PREDICATE_ALIGN1_ALL16H 11 +#define GEN6_PREDICATE_ALIGN16_REPLICATE_X 2 +#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define GEN6_PREDICATE_ALIGN16_REPLICATE_W 5 +#define GEN6_PREDICATE_ALIGN16_ANY4H 6 +#define GEN6_PREDICATE_ALIGN16_ALL4H 7 + +#define GEN6_ARCHITECTURE_REGISTER_FILE 0 +#define GEN6_GENERAL_REGISTER_FILE 1 +#define GEN6_MESSAGE_REGISTER_FILE 2 +#define GEN6_IMMEDIATE_VALUE 3 + +#define GEN6_REGISTER_TYPE_UD 0 +#define GEN6_REGISTER_TYPE_D 1 +#define GEN6_REGISTER_TYPE_UW 2 +#define GEN6_REGISTER_TYPE_W 3 +#define GEN6_REGISTER_TYPE_UB 4 +#define GEN6_REGISTER_TYPE_B 5 +#define GEN6_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define GEN6_REGISTER_TYPE_HF 6 +#define GEN6_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define GEN6_REGISTER_TYPE_F 7 + +#define GEN6_ARF_NULL 0x00 +#define GEN6_ARF_ADDRESS 0x10 +#define GEN6_ARF_ACCUMULATOR 0x20 +#define GEN6_ARF_FLAG 0x30 +#define GEN6_ARF_MASK 0x40 +#define GEN6_ARF_MASK_STACK 0x50 +#define GEN6_ARF_MASK_STACK_DEPTH 0x60 +#define GEN6_ARF_STATE 0x70 +#define GEN6_ARF_CONTROL 0x80 +#define GEN6_ARF_NOTIFICATION_COUNT 0x90 +#define GEN6_ARF_IP 0xA0 + +#define GEN6_AMASK 0 +#define GEN6_IMASK 1 +#define GEN6_LMASK 2 +#define GEN6_CMASK 3 + + + +#define GEN6_THREAD_NORMAL 0 +#define GEN6_THREAD_ATOMIC 1 +#define GEN6_THREAD_SWITCH 2 + +#define GEN6_VERTICAL_STRIDE_0 0 +#define GEN6_VERTICAL_STRIDE_1 1 +#define GEN6_VERTICAL_STRIDE_2 2 +#define GEN6_VERTICAL_STRIDE_4 3 +#define GEN6_VERTICAL_STRIDE_8 4 +#define GEN6_VERTICAL_STRIDE_16 5 +#define GEN6_VERTICAL_STRIDE_32 6 +#define GEN6_VERTICAL_STRIDE_64 7 +#define GEN6_VERTICAL_STRIDE_128 8 +#define GEN6_VERTICAL_STRIDE_256 9 +#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define GEN6_WIDTH_1 0 +#define GEN6_WIDTH_2 1 +#define GEN6_WIDTH_4 2 +#define GEN6_WIDTH_8 3 +#define GEN6_WIDTH_16 4 + +#define GEN6_STATELESS_BUFFER_BOUNDARY_1K 0 +#define GEN6_STATELESS_BUFFER_BOUNDARY_2K 1 +#define GEN6_STATELESS_BUFFER_BOUNDARY_4K 2 +#define GEN6_STATELESS_BUFFER_BOUNDARY_8K 3 +#define GEN6_STATELESS_BUFFER_BOUNDARY_16K 4 +#define GEN6_STATELESS_BUFFER_BOUNDARY_32K 5 +#define GEN6_STATELESS_BUFFER_BOUNDARY_64K 6 +#define GEN6_STATELESS_BUFFER_BOUNDARY_128K 7 +#define GEN6_STATELESS_BUFFER_BOUNDARY_256K 8 +#define GEN6_STATELESS_BUFFER_BOUNDARY_512K 9 +#define GEN6_STATELESS_BUFFER_BOUNDARY_1M 10 +#define GEN6_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define GEN6_POLYGON_FACING_FRONT 0 +#define GEN6_POLYGON_FACING_BACK 1 + +#define GEN6_MESSAGE_TARGET_NULL 0 +#define GEN6_MESSAGE_TARGET_MATH 1 +#define GEN6_MESSAGE_TARGET_SAMPLER 2 +#define GEN6_MESSAGE_TARGET_GATEWAY 3 +#define GEN6_MESSAGE_TARGET_DATAPORT_READ 4 +#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define GEN6_MESSAGE_TARGET_URB 6 +#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define GEN6_SAMPLER_RETURN_FORMAT_UINT32 2 +#define GEN6_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define GEN6_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define GEN6_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define GEN6_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define GEN6_SAMPLER_MESSAGE_SIMD8_LD 3 +#define GEN6_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define GEN6_MATH_INTEGER_UNSIGNED 0 +#define GEN6_MATH_INTEGER_SIGNED 1 + +#define GEN6_MATH_PRECISION_FULL 0 +#define GEN6_MATH_PRECISION_PARTIAL 1 + +#define GEN6_MATH_SATURATE_NONE 0 +#define GEN6_MATH_SATURATE_SATURATE 1 + +#define GEN6_MATH_DATA_VECTOR 0 +#define GEN6_MATH_DATA_SCALAR 1 + +#define GEN6_URB_OPCODE_WRITE 0 + +#define GEN6_URB_SWIZZLE_NONE 0 +#define GEN6_URB_SWIZZLE_INTERLEAVE 1 +#define GEN6_URB_SWIZZLE_TRANSPOSE 2 + +#define GEN6_SCRATCH_SPACE_SIZE_1K 0 +#define GEN6_SCRATCH_SPACE_SIZE_2K 1 +#define GEN6_SCRATCH_SPACE_SIZE_4K 2 +#define GEN6_SCRATCH_SPACE_SIZE_8K 3 +#define GEN6_SCRATCH_SPACE_SIZE_16K 4 +#define GEN6_SCRATCH_SPACE_SIZE_32K 5 +#define GEN6_SCRATCH_SPACE_SIZE_64K 6 +#define GEN6_SCRATCH_SPACE_SIZE_128K 7 +#define GEN6_SCRATCH_SPACE_SIZE_256K 8 +#define GEN6_SCRATCH_SPACE_SIZE_512K 9 +#define GEN6_SCRATCH_SPACE_SIZE_1M 10 +#define GEN6_SCRATCH_SPACE_SIZE_2M 11 + +/* The hardware supports two different modes for border color. The + * default (OpenGL) mode uses floating-point color channels, while the + * legacy mode uses 4 bytes. + * + * More significantly, the legacy mode respects the components of the + * border color for channels not present in the source, (whereas the + * default mode will ignore the border color's alpha channel and use + * alpha==1 for an RGB source, for example). + * + * The legacy mode matches the semantics specified by the Render + * extension. + */ +struct gen6_sampler_default_border_color { + float color[4]; +}; + +struct gen6_sampler_legacy_border_color { + uint8_t color[4]; +}; + +struct gen6_sampler_state { + struct { + uint32_t shadow_function:3; + uint32_t lod_bias:11; + uint32_t min_filter:3; + uint32_t mag_filter:3; + uint32_t mip_filter:2; + uint32_t base_level:5; + uint32_t pad:1; + uint32_t lod_preclamp:1; + uint32_t border_color_mode:1; + uint32_t pad0:1; + uint32_t disable:1; + } ss0; + + struct { + uint32_t r_wrap_mode:3; + uint32_t t_wrap_mode:3; + uint32_t s_wrap_mode:3; + uint32_t pad:3; + uint32_t max_lod:10; + uint32_t min_lod:10; + } ss1; + + struct { + uint32_t border_color; + } ss2; + + struct { + uint32_t non_normalized_coord:1; + uint32_t pad:12; + uint32_t address_round:6; + uint32_t max_aniso:3; + uint32_t chroma_key_mode:1; + uint32_t chroma_key_index:2; + uint32_t chroma_key_enable:1; + uint32_t monochrome_filter_width:3; + uint32_t monochrome_filter_height:3; + } ss3; +}; + +struct gen6_blend_state { + struct { + uint32_t dest_blend_factor:5; + uint32_t source_blend_factor:5; + uint32_t pad3:1; + uint32_t blend_func:3; + uint32_t pad2:1; + uint32_t ia_dest_blend_factor:5; + uint32_t ia_source_blend_factor:5; + uint32_t pad1:1; + uint32_t ia_blend_func:3; + uint32_t pad0:1; + uint32_t ia_blend_enable:1; + uint32_t blend_enable:1; + } blend0; + + struct { + uint32_t post_blend_clamp_enable:1; + uint32_t pre_blend_clamp_enable:1; + uint32_t clamp_range:2; + uint32_t pad0:4; + uint32_t x_dither_offset:2; + uint32_t y_dither_offset:2; + uint32_t dither_enable:1; + uint32_t alpha_test_func:3; + uint32_t alpha_test_enable:1; + uint32_t pad1:1; + uint32_t logic_op_func:4; + uint32_t logic_op_enable:1; + uint32_t pad2:1; + uint32_t write_disable_b:1; + uint32_t write_disable_g:1; + uint32_t write_disable_r:1; + uint32_t write_disable_a:1; + uint32_t pad3:1; + uint32_t alpha_to_coverage_dither:1; + uint32_t alpha_to_one:1; + uint32_t alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state { + struct { + uint32_t alpha_test_format:1; + uint32_t pad0:14; + uint32_t round_disable:1; + uint32_t bf_stencil_ref:8; + uint32_t stencil_ref:8; + } cc0; + + union { + float alpha_ref_f; + struct { + uint32_t ui:8; + uint32_t pad0:24; + } alpha_ref_fi; + } cc1; + + float constant_r; + float constant_g; + float constant_b; + float constant_a; +}; + +struct gen6_depth_stencil_state { + struct { + uint32_t pad0:3; + uint32_t bf_stencil_pass_depth_pass_op:3; + uint32_t bf_stencil_pass_depth_fail_op:3; + uint32_t bf_stencil_fail_op:3; + uint32_t bf_stencil_func:3; + uint32_t bf_stencil_enable:1; + uint32_t pad1:2; + uint32_t stencil_write_enable:1; + uint32_t stencil_pass_depth_pass_op:3; + uint32_t stencil_pass_depth_fail_op:3; + uint32_t stencil_fail_op:3; + uint32_t stencil_func:3; + uint32_t stencil_enable:1; + } ds0; + + struct { + uint32_t bf_stencil_write_mask:8; + uint32_t bf_stencil_test_mask:8; + uint32_t stencil_write_mask:8; + uint32_t stencil_test_mask:8; + } ds1; + + struct { + uint32_t pad0:26; + uint32_t depth_write_enable:1; + uint32_t depth_test_func:3; + uint32_t pad1:1; + uint32_t depth_test_enable:1; + } ds2; +}; + +struct gen6_surface_state { + struct { + uint32_t cube_pos_z:1; + uint32_t cube_neg_z:1; + uint32_t cube_pos_y:1; + uint32_t cube_neg_y:1; + uint32_t cube_pos_x:1; + uint32_t cube_neg_x:1; + uint32_t pad:3; + uint32_t render_cache_read_mode:1; + uint32_t mipmap_layout_mode:1; + uint32_t vert_line_stride_ofs:1; + uint32_t vert_line_stride:1; + uint32_t color_blend:1; + uint32_t writedisable_blue:1; + uint32_t writedisable_green:1; + uint32_t writedisable_red:1; + uint32_t writedisable_alpha:1; + uint32_t surface_format:9; + uint32_t data_return_format:1; + uint32_t pad0:1; + uint32_t surface_type:3; + } ss0; + + struct { + uint32_t base_addr; + } ss1; + + struct { + uint32_t render_target_rotation:2; + uint32_t mip_count:4; + uint32_t width:13; + uint32_t height:13; + } ss2; + + struct { + uint32_t tile_walk:1; + uint32_t tiled_surface:1; + uint32_t pad:1; + uint32_t pitch:18; + uint32_t depth:11; + } ss3; + + struct { + uint32_t pad:19; + uint32_t min_array_elt:9; + uint32_t min_lod:4; + } ss4; + + struct { + uint32_t pad:20; + uint32_t y_offset:4; + uint32_t pad2:1; + uint32_t x_offset:7; + } ss5; +}; + +/* Surface state DW0 */ +#define GEN6_SURFACE_RC_READ_WRITE (1 << 8) +#define GEN6_SURFACE_MIPLAYOUT_SHIFT 10 +#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1 +#define GEN6_SURFACE_CUBEFACE_ENABLES 0x3f +#define GEN6_SURFACE_BLEND_ENABLED (1 << 13) +#define GEN6_SURFACE_WRITEDISABLE_B_SHIFT 14 +#define GEN6_SURFACE_WRITEDISABLE_G_SHIFT 15 +#define GEN6_SURFACE_WRITEDISABLE_R_SHIFT 16 +#define GEN6_SURFACE_WRITEDISABLE_A_SHIFT 17 +#define GEN6_SURFACE_FORMAT_SHIFT 18 +#define GEN6_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) + +#define GEN6_SURFACE_TYPE_SHIFT 29 +#define GEN6_SURFACE_TYPE_MASK GEN6_MASK(31, 29) +#define GEN6_SURFACE_1D 0 +#define GEN6_SURFACE_2D 1 +#define GEN6_SURFACE_3D 2 +#define GEN6_SURFACE_CUBE 3 +#define GEN6_SURFACE_BUFFER 4 +#define GEN6_SURFACE_NULL 7 + +/* Surface state DW2 */ +#define GEN6_SURFACE_HEIGHT_SHIFT 19 +#define GEN6_SURFACE_HEIGHT_MASK GEN6_MASK(31, 19) +#define GEN6_SURFACE_WIDTH_SHIFT 6 +#define GEN6_SURFACE_WIDTH_MASK GEN6_MASK(18, 6) +#define GEN6_SURFACE_LOD_SHIFT 2 +#define GEN6_SURFACE_LOD_MASK GEN6_MASK(5, 2) + +/* Surface state DW3 */ +#define GEN6_SURFACE_DEPTH_SHIFT 21 +#define GEN6_SURFACE_DEPTH_MASK GEN6_MASK(31, 21) +#define GEN6_SURFACE_PITCH_SHIFT 3 +#define GEN6_SURFACE_PITCH_MASK GEN6_MASK(19, 3) +#define GEN6_SURFACE_TILED (1 << 1) +#define GEN6_SURFACE_TILED_Y (1 << 0) + +/* Surface state DW4 */ +#define GEN6_SURFACE_MIN_LOD_SHIFT 28 +#define GEN6_SURFACE_MIN_LOD_MASK GEN6_MASK(31, 28) + +/* Surface state DW5 */ +#define GEN6_SURFACE_X_OFFSET_SHIFT 25 +#define GEN6_SURFACE_X_OFFSET_MASK GEN6_MASK(31, 25) +#define GEN6_SURFACE_Y_OFFSET_SHIFT 20 +#define GEN6_SURFACE_Y_OFFSET_MASK GEN6_MASK(23, 20) + +struct gen6_cc_viewport { + float min_depth; + float max_depth; +}; + +typedef enum { + SAMPLER_FILTER_NEAREST = 0, + SAMPLER_FILTER_BILINEAR, + FILTER_COUNT +} sampler_filter_t; + +typedef enum { + SAMPLER_EXTEND_NONE = 0, + SAMPLER_EXTEND_REPEAT, + SAMPLER_EXTEND_PAD, + SAMPLER_EXTEND_REFLECT, + EXTEND_COUNT +} sampler_extend_t; + +#endif diff --git a/drivers/video/Intel-2D/i915_drm.h b/drivers/video/Intel-2D/i915_drm.h new file mode 100644 index 0000000000..f27f5806e4 --- /dev/null +++ b/drivers/video/Intel-2D/i915_drm.h @@ -0,0 +1,944 @@ +/* + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _I915_DRM_H_ +#define _I915_DRM_H_ + +#include "drm.h" + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + */ + + +/* Each region is a minimum of 16k, and there are at most 255 of them. + */ +#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use + * of chars for next/prev indices */ +#define I915_LOG_MIN_TEX_REGION_SIZE 14 + +typedef struct _drm_i915_init { + enum { + I915_INIT_DMA = 0x01, + I915_CLEANUP_DMA = 0x02, + I915_RESUME_DMA = 0x03 + } func; + unsigned int mmio_offset; + int sarea_priv_offset; + unsigned int ring_start; + unsigned int ring_end; + unsigned int ring_size; + unsigned int front_offset; + unsigned int back_offset; + unsigned int depth_offset; + unsigned int w; + unsigned int h; + unsigned int pitch; + unsigned int pitch_bits; + unsigned int back_pitch; + unsigned int depth_pitch; + unsigned int cpp; + unsigned int chipset; +} drm_i915_init_t; + +typedef struct _drm_i915_sarea { + struct drm_tex_region texList[I915_NR_TEX_REGIONS + 1]; + int last_upload; /* last time texture was uploaded */ + int last_enqueue; /* last time a buffer was enqueued */ + int last_dispatch; /* age of the most recently dispatched buffer */ + int ctxOwner; /* last context to upload state */ + int texAge; + int pf_enabled; /* is pageflipping allowed? */ + int pf_active; + int pf_current_page; /* which buffer is being displayed? */ + int perf_boxes; /* performance boxes to be displayed */ + int width, height; /* screen size in pixels */ + + drm_handle_t front_handle; + int front_offset; + int front_size; + + drm_handle_t back_handle; + int back_offset; + int back_size; + + drm_handle_t depth_handle; + int depth_offset; + int depth_size; + + drm_handle_t tex_handle; + int tex_offset; + int tex_size; + int log_tex_granularity; + int pitch; + int rotation; /* 0, 90, 180 or 270 */ + int rotated_offset; + int rotated_size; + int rotated_pitch; + int virtualX, virtualY; + + unsigned int front_tiled; + unsigned int back_tiled; + unsigned int depth_tiled; + unsigned int rotated_tiled; + unsigned int rotated2_tiled; + + int pipeA_x; + int pipeA_y; + int pipeA_w; + int pipeA_h; + int pipeB_x; + int pipeB_y; + int pipeB_w; + int pipeB_h; + + /* fill out some space for old userspace triple buffer */ + drm_handle_t unused_handle; + __u32 unused1, unused2, unused3; + + /* buffer object handles for static buffers. May change + * over the lifetime of the client. + */ + __u32 front_bo_handle; + __u32 back_bo_handle; + __u32 unused_bo_handle; + __u32 depth_bo_handle; + +} drm_i915_sarea_t; + +/* due to userspace building against these headers we need some compat here */ +#define planeA_x pipeA_x +#define planeA_y pipeA_y +#define planeA_w pipeA_w +#define planeA_h pipeA_h +#define planeB_x pipeB_x +#define planeB_y pipeB_y +#define planeB_w pipeB_w +#define planeB_h pipeB_h + +/* Flags for perf_boxes + */ +#define I915_BOX_RING_EMPTY 0x1 +#define I915_BOX_FLIP 0x2 +#define I915_BOX_WAIT 0x4 +#define I915_BOX_TEXTURE_LOAD 0x8 +#define I915_BOX_LOST_CONTEXT 0x10 + +/* I915 specific ioctls + * The device specific ioctl range is 0x40 to 0x79. + */ +#define DRM_I915_INIT 0x00 +#define DRM_I915_FLUSH 0x01 +#define DRM_I915_FLIP 0x02 +#define DRM_I915_BATCHBUFFER 0x03 +#define DRM_I915_IRQ_EMIT 0x04 +#define DRM_I915_IRQ_WAIT 0x05 +#define DRM_I915_GETPARAM 0x06 +#define DRM_I915_SETPARAM 0x07 +#define DRM_I915_ALLOC 0x08 +#define DRM_I915_FREE 0x09 +#define DRM_I915_INIT_HEAP 0x0a +#define DRM_I915_CMDBUFFER 0x0b +#define DRM_I915_DESTROY_HEAP 0x0c +#define DRM_I915_SET_VBLANK_PIPE 0x0d +#define DRM_I915_GET_VBLANK_PIPE 0x0e +#define DRM_I915_VBLANK_SWAP 0x0f +#define DRM_I915_HWS_ADDR 0x11 +#define DRM_I915_GEM_INIT 0x13 +#define DRM_I915_GEM_EXECBUFFER 0x14 +#define DRM_I915_GEM_PIN 0x15 +#define DRM_I915_GEM_UNPIN 0x16 +#define DRM_I915_GEM_BUSY 0x17 +#define DRM_I915_GEM_THROTTLE 0x18 +#define DRM_I915_GEM_ENTERVT 0x19 +#define DRM_I915_GEM_LEAVEVT 0x1a +#define DRM_I915_GEM_CREATE 0x1b +#define DRM_I915_GEM_PREAD 0x1c +#define DRM_I915_GEM_PWRITE 0x1d +#define DRM_I915_GEM_MMAP 0x1e +#define DRM_I915_GEM_SET_DOMAIN 0x1f +#define DRM_I915_GEM_SW_FINISH 0x20 +#define DRM_I915_GEM_SET_TILING 0x21 +#define DRM_I915_GEM_GET_TILING 0x22 +#define DRM_I915_GEM_GET_APERTURE 0x23 +#define DRM_I915_GEM_MMAP_GTT 0x24 +#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25 +#define DRM_I915_GEM_MADVISE 0x26 +#define DRM_I915_OVERLAY_PUT_IMAGE 0x27 +#define DRM_I915_OVERLAY_ATTRS 0x28 +#define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GET_SPRITE_COLORKEY 0x2a +#define DRM_I915_SET_SPRITE_COLORKEY 0x2b +#define DRM_I915_GEM_WAIT 0x2c +#define DRM_I915_GEM_CONTEXT_CREATE 0x2d +#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e +#define DRM_I915_GEM_SET_CACHEING 0x2f +#define DRM_I915_GEM_GET_CACHEING 0x30 +#define DRM_I915_REG_READ 0x31 + +#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) +#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) +#define DRM_IOCTL_I915_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLIP) +#define DRM_IOCTL_I915_BATCHBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_BATCHBUFFER, drm_i915_batchbuffer_t) +#define DRM_IOCTL_I915_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_IRQ_EMIT, drm_i915_irq_emit_t) +#define DRM_IOCTL_I915_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_IRQ_WAIT, drm_i915_irq_wait_t) +#define DRM_IOCTL_I915_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GETPARAM, drm_i915_getparam_t) +#define DRM_IOCTL_I915_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SETPARAM, drm_i915_setparam_t) +#define DRM_IOCTL_I915_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_ALLOC, drm_i915_mem_alloc_t) +#define DRM_IOCTL_I915_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_FREE, drm_i915_mem_free_t) +#define DRM_IOCTL_I915_INIT_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT_HEAP, drm_i915_mem_init_heap_t) +#define DRM_IOCTL_I915_CMDBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_CMDBUFFER, drm_i915_cmdbuffer_t) +#define DRM_IOCTL_I915_DESTROY_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_DESTROY_HEAP, drm_i915_mem_destroy_heap_t) +#define DRM_IOCTL_I915_SET_VBLANK_PIPE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t) +#define DRM_IOCTL_I915_GET_VBLANK_PIPE DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t) +#define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t) +#define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init) +#define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) +#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) +#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) +#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) +#define DRM_IOCTL_I915_GEM_SET_CACHEING DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHEING, struct drm_i915_gem_cacheing) +#define DRM_IOCTL_I915_GEM_GET_CACHEING DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHEING, struct drm_i915_gem_cacheing) +#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) +#define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) +#define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) +#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) +#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) +#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) +#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) +#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) +#define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish) +#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) +#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) +#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) +#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id) +#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) +#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image) +#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) +#define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) +#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) +#define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) +#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) +#define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) + +/* Allow drivers to submit batchbuffers directly to hardware, relying + * on the security mechanisms provided by hardware. + */ +typedef struct drm_i915_batchbuffer { + int start; /* agp offset */ + int used; /* nr bytes in use */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */ + int num_cliprects; /* mulitpass with multiple cliprects? */ + struct drm_clip_rect *cliprects; /* pointer to userspace cliprects */ +} drm_i915_batchbuffer_t; + +/* As above, but pass a pointer to userspace buffer which can be + * validated by the kernel prior to sending to hardware. + */ +typedef struct _drm_i915_cmdbuffer { + char *buf; /* pointer to userspace command buffer */ + int sz; /* nr bytes in buf */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */ + int num_cliprects; /* mulitpass with multiple cliprects? */ + struct drm_clip_rect *cliprects; /* pointer to userspace cliprects */ +} drm_i915_cmdbuffer_t; + +/* Userspace can request & wait on irq's: + */ +typedef struct drm_i915_irq_emit { + int *irq_seq; +} drm_i915_irq_emit_t; + +typedef struct drm_i915_irq_wait { + int irq_seq; +} drm_i915_irq_wait_t; + +/* Ioctl to query kernel params: + */ +#define I915_PARAM_IRQ_ACTIVE 1 +#define I915_PARAM_ALLOW_BATCHBUFFER 2 +#define I915_PARAM_LAST_DISPATCH 3 +#define I915_PARAM_CHIPSET_ID 4 +#define I915_PARAM_HAS_GEM 5 +#define I915_PARAM_NUM_FENCES_AVAIL 6 +#define I915_PARAM_HAS_OVERLAY 7 +#define I915_PARAM_HAS_PAGEFLIPPING 8 +#define I915_PARAM_HAS_EXECBUF2 9 +#define I915_PARAM_HAS_BSD 10 +#define I915_PARAM_HAS_BLT 11 +#define I915_PARAM_HAS_RELAXED_FENCING 12 +#define I915_PARAM_HAS_COHERENT_RINGS 13 +#define I915_PARAM_HAS_EXEC_CONSTANTS 14 +#define I915_PARAM_HAS_RELAXED_DELTA 15 +#define I915_PARAM_HAS_GEN7_SOL_RESET 16 +#define I915_PARAM_HAS_LLC 17 +#define I915_PARAM_HAS_ALIASING_PPGTT 18 +#define I915_PARAM_HAS_WAIT_TIMEOUT 19 + +typedef struct drm_i915_getparam { + int param; + int *value; +} drm_i915_getparam_t; + +/* Ioctl to set kernel params: + */ +#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1 +#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 +#define I915_SETPARAM_ALLOW_BATCHBUFFER 3 +#define I915_SETPARAM_NUM_USED_FENCES 4 + +typedef struct drm_i915_setparam { + int param; + int value; +} drm_i915_setparam_t; + +/* A memory manager for regions of shared memory: + */ +#define I915_MEM_REGION_AGP 1 + +typedef struct drm_i915_mem_alloc { + int region; + int alignment; + int size; + int *region_offset; /* offset from start of fb or agp */ +} drm_i915_mem_alloc_t; + +typedef struct drm_i915_mem_free { + int region; + int region_offset; +} drm_i915_mem_free_t; + +typedef struct drm_i915_mem_init_heap { + int region; + int size; + int start; +} drm_i915_mem_init_heap_t; + +/* Allow memory manager to be torn down and re-initialized (eg on + * rotate): + */ +typedef struct drm_i915_mem_destroy_heap { + int region; +} drm_i915_mem_destroy_heap_t; + +/* Allow X server to configure which pipes to monitor for vblank signals + */ +#define DRM_I915_VBLANK_PIPE_A 1 +#define DRM_I915_VBLANK_PIPE_B 2 + +typedef struct drm_i915_vblank_pipe { + int pipe; +} drm_i915_vblank_pipe_t; + +/* Schedule buffer swap at given vertical blank: + */ +typedef struct drm_i915_vblank_swap { + drm_drawable_t drawable; + enum drm_vblank_seq_type seqtype; + unsigned int sequence; +} drm_i915_vblank_swap_t; + +typedef struct drm_i915_hws_addr { + __u64 addr; +} drm_i915_hws_addr_t; + +struct drm_i915_gem_init { + /** + * Beginning offset in the GTT to be managed by the DRM memory + * manager. + */ + __u64 gtt_start; + /** + * Ending offset in the GTT to be managed by the DRM memory + * manager. + */ + __u64 gtt_end; +}; + +struct drm_i915_gem_create { + /** + * Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + __u32 pad; +}; + +struct drm_i915_gem_pread { + /** Handle for the object being read. */ + __u32 handle; + __u32 pad; + /** Offset into the object to read from */ + __u64 offset; + /** Length of data to read */ + __u64 size; + /** + * Pointer to write the data into. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 data_ptr; +}; + +struct drm_i915_gem_pwrite { + /** Handle for the object being written to. */ + __u32 handle; + __u32 pad; + /** Offset into the object to write to */ + __u64 offset; + /** Length of data to write */ + __u64 size; + /** + * Pointer to read the data from. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 data_ptr; +}; + +struct drm_i915_gem_mmap { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** Offset in the object to map. */ + __u64 offset; + /** + * Length of data to map. + * + * The value will be page-aligned. + */ + __u64 size; + /** + * Returned pointer the data was mapped at. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 addr_ptr; +}; + +struct drm_i915_gem_mmap_gtt { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** + * Fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 offset; +}; + +struct drm_i915_gem_set_domain { + /** Handle for the object */ + __u32 handle; + + /** New read domains */ + __u32 read_domains; + + /** New write domain */ + __u32 write_domain; +}; + +struct drm_i915_gem_sw_finish { + /** Handle for the object */ + __u32 handle; +}; + +struct drm_i915_gem_relocation_entry { + /** + * Handle of the buffer being pointed to by this relocation entry. + * + * It's appealing to make this be an index into the mm_validate_entry + * list to refer to the buffer, but this allows the driver to create + * a relocation list for state buffers and not re-write it per + * exec using the buffer. + */ + __u32 target_handle; + + /** + * Value to be added to the offset of the target buffer to make up + * the relocation entry. + */ + __u32 delta; + + /** Offset in the buffer the relocation entry will be written into */ + __u64 offset; + + /** + * Offset value of the target buffer that the relocation entry was last + * written as. + * + * If the buffer has the same offset as last time, we can skip syncing + * and writing the relocation. This value is written back out by + * the execbuffer ioctl when the relocation is written. + */ + __u64 presumed_offset; + + /** + * Target memory domains read by this operation. + */ + __u32 read_domains; + + /** + * Target memory domains written by this operation. + * + * Note that only one domain may be written by the whole + * execbuffer operation, so that where there are conflicts, + * the application will get -EINVAL back. + */ + __u32 write_domain; +}; + +/** @{ + * Intel memory domains + * + * Most of these just align with the various caches in + * the system and are used to flush and invalidate as + * objects end up cached in different domains. + */ +/** CPU cache */ +#define I915_GEM_DOMAIN_CPU 0x00000001 +/** Render cache, used by 2D and 3D drawing */ +#define I915_GEM_DOMAIN_RENDER 0x00000002 +/** Sampler cache, used by texture engine */ +#define I915_GEM_DOMAIN_SAMPLER 0x00000004 +/** Command queue, used to load batch buffers */ +#define I915_GEM_DOMAIN_COMMAND 0x00000008 +/** Instruction cache, used by shader programs */ +#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 +/** Vertex address cache */ +#define I915_GEM_DOMAIN_VERTEX 0x00000020 +/** GTT domain - aperture and scanout */ +#define I915_GEM_DOMAIN_GTT 0x00000040 +/** @} */ + +struct drm_i915_gem_exec_object { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + __u32 handle; + + /** Number of relocations to be performed on this buffer */ + __u32 relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + __u64 relocs_ptr; + + /** Required alignment in graphics aperture */ + __u64 alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + __u64 offset; +}; + +struct drm_i915_gem_execbuffer { + /** + * List of buffers to be validated with their relocations to be + * performend on them. + * + * This is a pointer to an array of struct drm_i915_gem_validate_entry. + * + * These buffers must be listed in an order such that all relocations + * a buffer is performing refer to buffers that have already appeared + * in the validate list. + */ + __u64 buffers_ptr; + __u32 buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + __u32 batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + __u32 batch_len; + __u32 DR1; + __u32 DR4; + __u32 num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + __u64 cliprects_ptr; +}; + +struct drm_i915_gem_exec_object2 { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + __u32 handle; + + /** Number of relocations to be performed on this buffer */ + __u32 relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + __u64 relocs_ptr; + + /** Required alignment in graphics aperture */ + __u64 alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + __u64 offset; + +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) + __u64 flags; + __u64 rsvd1; + __u64 rsvd2; +}; + +struct drm_i915_gem_execbuffer2 { + /** + * List of gem_exec_object2 structs + */ + __u64 buffers_ptr; + __u32 buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + __u32 batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + __u32 batch_len; + __u32 DR1; + __u32 DR4; + __u32 num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + __u64 cliprects_ptr; +#define I915_EXEC_RING_MASK (7<<0) +#define I915_EXEC_DEFAULT (0<<0) +#define I915_EXEC_RENDER (1<<0) +#define I915_EXEC_BSD (2<<0) +#define I915_EXEC_BLT (3<<0) + +/* Used for switching the constants addressing mode on gen4+ RENDER ring. + * Gen6+ only supports relative addressing to dynamic state (default) and + * absolute addressing. + * + * These flags are ignored for the BSD and BLT rings. + */ +#define I915_EXEC_CONSTANTS_MASK (3<<6) +#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */ +#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6) +#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ + __u64 flags; + __u64 rsvd1; /* now used for context info */ + __u64 rsvd2; +}; + +/** Resets the SO write offset registers for transform feedback on gen7. */ +#define I915_EXEC_GEN7_SOL_RESET (1<<8) + +#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) +#define i915_execbuffer2_set_context_id(eb2, context) \ + (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK +#define i915_execbuffer2_get_context_id(eb2) \ + ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK) + +struct drm_i915_gem_pin { + /** Handle of the buffer to be pinned. */ + __u32 handle; + __u32 pad; + + /** alignment required within the aperture */ + __u64 alignment; + + /** Returned GTT offset of the buffer. */ + __u64 offset; +}; + +struct drm_i915_gem_unpin { + /** Handle of the buffer to be unpinned. */ + __u32 handle; + __u32 pad; +}; + +struct drm_i915_gem_busy { + /** Handle of the buffer to check for busy */ + __u32 handle; + + /** Return busy status (1 if busy, 0 if idle). + * The high word is used to indicate on which rings the object + * currently resides: + * 16:31 - busy (r or r/w) rings (16 render, 17 bsd, 18 blt, etc) + */ + __u32 busy; +}; + +#define I915_CACHEING_NONE 0 +#define I915_CACHEING_CACHED 1 + +struct drm_i915_gem_cacheing { + /** + * Handle of the buffer to set/get the cacheing level of. */ + __u32 handle; + + /** + * Cacheing level to apply or return value + * + * bits0-15 are for generic cacheing control (i.e. the above defined + * values). bits16-31 are reserved for platform-specific variations + * (e.g. l3$ caching on gen7). */ + __u32 cacheing; +}; + +#define I915_TILING_NONE 0 +#define I915_TILING_X 1 +#define I915_TILING_Y 2 + +#define I915_BIT_6_SWIZZLE_NONE 0 +#define I915_BIT_6_SWIZZLE_9 1 +#define I915_BIT_6_SWIZZLE_9_10 2 +#define I915_BIT_6_SWIZZLE_9_11 3 +#define I915_BIT_6_SWIZZLE_9_10_11 4 +/* Not seen by userland */ +#define I915_BIT_6_SWIZZLE_UNKNOWN 5 +/* Seen by userland. */ +#define I915_BIT_6_SWIZZLE_9_17 6 +#define I915_BIT_6_SWIZZLE_9_10_17 7 + +struct drm_i915_gem_set_tiling { + /** Handle of the buffer to have its tiling state updated */ + __u32 handle; + + /** + * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + * + * This value is to be set on request, and will be updated by the + * kernel on successful return with the actual chosen tiling layout. + * + * The tiling mode may be demoted to I915_TILING_NONE when the system + * has bit 6 swizzling that can't be managed correctly by GEM. + * + * Buffer contents become undefined when changing tiling_mode. + */ + __u32 tiling_mode; + + /** + * Stride in bytes for the object when in I915_TILING_X or + * I915_TILING_Y. + */ + __u32 stride; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + __u32 swizzle_mode; +}; + +struct drm_i915_gem_get_tiling { + /** Handle of the buffer to get tiling state for. */ + __u32 handle; + + /** + * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + */ + __u32 tiling_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + __u32 swizzle_mode; +}; + +struct drm_i915_gem_get_aperture { + /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ + __u64 aper_size; + + /** + * Available space in the aperture used by i915_gem_execbuffer, in + * bytes + */ + __u64 aper_available_size; +}; + +struct drm_i915_get_pipe_from_crtc_id { + /** ID of CRTC being requested **/ + __u32 crtc_id; + + /** pipe of requested CRTC **/ + __u32 pipe; +}; + +#define I915_MADV_WILLNEED 0 +#define I915_MADV_DONTNEED 1 +#define __I915_MADV_PURGED 2 /* internal state */ + +struct drm_i915_gem_madvise { + /** Handle of the buffer to change the backing store advice */ + __u32 handle; + + /* Advice: either the buffer will be needed again in the near future, + * or wont be and could be discarded under memory pressure. + */ + __u32 madv; + + /** Whether the backing store still exists. */ + __u32 retained; +}; + +/* flags */ +#define I915_OVERLAY_TYPE_MASK 0xff +#define I915_OVERLAY_YUV_PLANAR 0x01 +#define I915_OVERLAY_YUV_PACKED 0x02 +#define I915_OVERLAY_RGB 0x03 + +#define I915_OVERLAY_DEPTH_MASK 0xff00 +#define I915_OVERLAY_RGB24 0x1000 +#define I915_OVERLAY_RGB16 0x2000 +#define I915_OVERLAY_RGB15 0x3000 +#define I915_OVERLAY_YUV422 0x0100 +#define I915_OVERLAY_YUV411 0x0200 +#define I915_OVERLAY_YUV420 0x0300 +#define I915_OVERLAY_YUV410 0x0400 + +#define I915_OVERLAY_SWAP_MASK 0xff0000 +#define I915_OVERLAY_NO_SWAP 0x000000 +#define I915_OVERLAY_UV_SWAP 0x010000 +#define I915_OVERLAY_Y_SWAP 0x020000 +#define I915_OVERLAY_Y_AND_UV_SWAP 0x030000 + +#define I915_OVERLAY_FLAGS_MASK 0xff000000 +#define I915_OVERLAY_ENABLE 0x01000000 + +struct drm_intel_overlay_put_image { + /* various flags and src format description */ + __u32 flags; + /* source picture description */ + __u32 bo_handle; + /* stride values and offsets are in bytes, buffer relative */ + __u16 stride_Y; /* stride for packed formats */ + __u16 stride_UV; + __u32 offset_Y; /* offset for packet formats */ + __u32 offset_U; + __u32 offset_V; + /* in pixels */ + __u16 src_width; + __u16 src_height; + /* to compensate the scaling factors for partially covered surfaces */ + __u16 src_scan_width; + __u16 src_scan_height; + /* output crtc description */ + __u32 crtc_id; + __u16 dst_x; + __u16 dst_y; + __u16 dst_width; + __u16 dst_height; +}; + +/* flags */ +#define I915_OVERLAY_UPDATE_ATTRS (1<<0) +#define I915_OVERLAY_UPDATE_GAMMA (1<<1) +struct drm_intel_overlay_attrs { + __u32 flags; + __u32 color_key; + __s32 brightness; + __u32 contrast; + __u32 saturation; + __u32 gamma0; + __u32 gamma1; + __u32 gamma2; + __u32 gamma3; + __u32 gamma4; + __u32 gamma5; +}; + +/* + * Intel sprite handling + * + * Color keying works with a min/mask/max tuple. Both source and destination + * color keying is allowed. + * + * Source keying: + * Sprite pixels within the min & max values, masked against the color channels + * specified in the mask field, will be transparent. All other pixels will + * be displayed on top of the primary plane. For RGB surfaces, only the min + * and mask fields will be used; ranged compares are not allowed. + * + * Destination keying: + * Primary plane pixels that match the min value, masked against the color + * channels specified in the mask field, will be replaced by corresponding + * pixels from the sprite plane. + * + * Note that source & destination keying are exclusive; only one can be + * active on a given plane. + */ + +#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */ +#define I915_SET_COLORKEY_DESTINATION (1<<1) +#define I915_SET_COLORKEY_SOURCE (1<<2) +struct drm_intel_sprite_colorkey { + __u32 plane_id; + __u32 min_value; + __u32 channel_mask; + __u32 max_value; + __u32 flags; +}; + +struct drm_i915_gem_wait { + /** Handle of BO we shall wait on */ + __u32 bo_handle; + __u32 flags; + /** Number of nanoseconds to wait, Returns time remaining. */ + __s64 timeout_ns; +}; + +struct drm_i915_gem_context_create { + /* output: id of new context*/ + __u32 ctx_id; + __u32 pad; +}; + +struct drm_i915_gem_context_destroy { + __u32 ctx_id; + __u32 pad; +}; + +struct drm_i915_reg_read { + __u64 offset; + __u64 val; /* Return value */ +}; +#endif /* _I915_DRM_H_ */ diff --git a/drivers/video/Intel-2D/intel.h b/drivers/video/Intel-2D/intel.h new file mode 100644 index 0000000000..3a05f3cd82 --- /dev/null +++ b/drivers/video/Intel-2D/intel.h @@ -0,0 +1,152 @@ + + +/** enumeration of 3d consumers so some can maintain invariant state. */ +enum last_3d { + LAST_3D_OTHER, + LAST_3D_VIDEO, + LAST_3D_RENDER, + LAST_3D_ROTATION +}; + + + +typedef struct intel_screen_private { + int cpp; + +#define RENDER_BATCH I915_EXEC_RENDER +#define BLT_BATCH I915_EXEC_BLT + + unsigned int current_batch; + + dri_bufmgr *bufmgr; + + uint32_t batch_ptr[4096]; + /** Byte offset in batch_ptr for the next dword to be emitted. */ + unsigned int batch_used; + /** Position in batch_ptr at the start of the current BEGIN_BATCH */ + unsigned int batch_emit_start; + /** Number of bytes to be emitted in the current BEGIN_BATCH. */ + uint32_t batch_emitting; + dri_bo *batch_bo, *last_batch_bo[2]; + /** Whether we're in a section of code that can't tolerate flushing */ + Bool in_batch_atomic; + /** Ending batch_used that was verified by intel_start_batch_atomic() */ + int batch_atomic_limit; + struct list batch_pixmaps; + drm_intel_bo *wa_scratch_bo; + + unsigned int tiling; + +#define INTEL_TILING_FB 0x1 +#define INTEL_TILING_2D 0x2 +#define INTEL_TILING_3D 0x4 +#define INTEL_TILING_ALL (~0) + + Bool has_relaxed_fencing; + + int Chipset; + + unsigned int BR[20]; + + void (*vertex_flush) (struct intel_screen_private *intel); + void (*batch_flush) (struct intel_screen_private *intel); + void (*batch_commit_notify) (struct intel_screen_private *intel); + + Bool need_sync; + + int accel_pixmap_offset_alignment; + int accel_max_x; + int accel_max_y; + int max_bo_size; + int max_gtt_map_size; + int max_tiling_size; + + struct { + drm_intel_bo *gen4_vs_bo; + drm_intel_bo *gen4_sf_bo; + drm_intel_bo *gen4_wm_packed_bo; + drm_intel_bo *gen4_wm_planar_bo; + drm_intel_bo *gen4_cc_bo; + drm_intel_bo *gen4_cc_vp_bo; + drm_intel_bo *gen4_sampler_bo; + drm_intel_bo *gen4_sip_kernel_bo; + drm_intel_bo *wm_prog_packed_bo; + drm_intel_bo *wm_prog_planar_bo; + drm_intel_bo *gen6_blend_bo; + drm_intel_bo *gen6_depth_stencil_bo; + } video; + + /* Render accel state */ + float scale_units[2][2]; + /** Transform pointers for src/mask, or NULL if identity */ + PictTransform *transform[2]; + + PixmapPtr render_source, render_mask, render_dest; + PicturePtr render_source_picture, render_mask_picture, render_dest_picture; + Bool needs_3d_invariant; + Bool needs_render_state_emit; + Bool needs_render_vertex_emit; + + /* i830 render accel state */ + uint32_t render_dest_format; + uint32_t cblend, ablend, s8_blendctl; + + /* i915 render accel state */ + PixmapPtr texture[2]; + uint32_t mapstate[6]; + uint32_t samplerstate[6]; + + struct { + int op; + uint32_t dst_format; + } i915_render_state; + + struct { + int num_sf_outputs; + int drawrect; + uint32_t blend; + dri_bo *samplers; + dri_bo *kernel; + } gen6_render_state; + + uint32_t prim_offset; + void (*prim_emit)(struct intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h); + int floats_per_vertex; + int last_floats_per_vertex; + uint16_t vertex_offset; + uint16_t vertex_count; + uint16_t vertex_index; + uint16_t vertex_used; + uint32_t vertex_id; + float vertex_ptr[4*1024]; + dri_bo *vertex_bo; + + uint8_t surface_data[16*1024]; + uint16_t surface_used; + uint16_t surface_table; + uint32_t surface_reloc; + dri_bo *surface_bo; + + /* 965 render acceleration state */ + struct gen4_render_state *gen4_render_state; + + Bool use_pageflipping; + Bool use_triple_buffer; + Bool force_fallback; + Bool has_kernel_flush; + Bool needs_flush; + + enum last_3d last_3d; + + /** + * User option to print acceleration fallback info to the server log. + */ + Bool fallback_debug; + unsigned debug_flush; + Bool has_prime_vmap_flush; +} intel_screen_private; + diff --git a/drivers/video/Intel-2D/intel_driver.h b/drivers/video/Intel-2D/intel_driver.h new file mode 100644 index 0000000000..104214a6e0 --- /dev/null +++ b/drivers/video/Intel-2D/intel_driver.h @@ -0,0 +1,288 @@ +#ifndef INTEL_DRIVER_H +#define INTEL_DRIVER_H + +#define INTEL_VERSION 4000 +#define INTEL_NAME "intel" +#define INTEL_DRIVER_NAME "intel" + +#define INTEL_VERSION_MAJOR PACKAGE_VERSION_MAJOR +#define INTEL_VERSION_MINOR PACKAGE_VERSION_MINOR +#define INTEL_VERSION_PATCH PACKAGE_VERSION_PATCHLEVEL + +#ifndef PCI_CHIP_I810 +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 +#define PCI_CHIP_I810_BRIDGE 0x7120 +#define PCI_CHIP_I810_DC100_BRIDGE 0x7122 +#define PCI_CHIP_I810_E_BRIDGE 0x7124 +#define PCI_CHIP_I815_BRIDGE 0x1130 +#endif + +#ifndef PCI_CHIP_I830_M +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_I830_M_BRIDGE 0x3575 +#endif + +#ifndef PCI_CHIP_845_G +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_845_G_BRIDGE 0x2560 +#endif + +#ifndef PCI_CHIP_I854 +#define PCI_CHIP_I854 0x358E +#define PCI_CHIP_I854_BRIDGE 0x358C +#endif + +#ifndef PCI_CHIP_I855_GM +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I855_GM_BRIDGE 0x3580 +#endif + +#ifndef PCI_CHIP_I865_G +#define PCI_CHIP_I865_G 0x2572 +#define PCI_CHIP_I865_G_BRIDGE 0x2570 +#endif + +#ifndef PCI_CHIP_I915_G +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_G_BRIDGE 0x2580 +#endif + +#ifndef PCI_CHIP_I915_GM +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I915_GM_BRIDGE 0x2590 +#endif + +#ifndef PCI_CHIP_E7221_G +#define PCI_CHIP_E7221_G 0x258A +/* Same as I915_G_BRIDGE */ +#define PCI_CHIP_E7221_G_BRIDGE 0x2580 +#endif + +#ifndef PCI_CHIP_I945_G +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_G_BRIDGE 0x2770 +#endif + +#ifndef PCI_CHIP_I945_GM +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GM_BRIDGE 0x27A0 +#endif + +#ifndef PCI_CHIP_I945_GME +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_I945_GME_BRIDGE 0x27AC +#endif + +#ifndef PCI_CHIP_PINEVIEW_M +#define PCI_CHIP_PINEVIEW_M 0xA011 +#define PCI_CHIP_PINEVIEW_M_BRIDGE 0xA010 +#define PCI_CHIP_PINEVIEW_G 0xA001 +#define PCI_CHIP_PINEVIEW_G_BRIDGE 0xA000 +#endif + +#ifndef PCI_CHIP_G35_G +#define PCI_CHIP_G35_G 0x2982 +#define PCI_CHIP_G35_G_BRIDGE 0x2980 +#endif + +#ifndef PCI_CHIP_I965_Q +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_Q_BRIDGE 0x2990 +#endif + +#ifndef PCI_CHIP_I965_G +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_G_BRIDGE 0x29A0 +#endif + +#ifndef PCI_CHIP_I946_GZ +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I946_GZ_BRIDGE 0x2970 +#endif + +#ifndef PCI_CHIP_I965_GM +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GM_BRIDGE 0x2A00 +#endif + +#ifndef PCI_CHIP_I965_GME +#define PCI_CHIP_I965_GME 0x2A12 +#define PCI_CHIP_I965_GME_BRIDGE 0x2A10 +#endif + +#ifndef PCI_CHIP_G33_G +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_G33_G_BRIDGE 0x29C0 +#endif + +#ifndef PCI_CHIP_Q35_G +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q35_G_BRIDGE 0x29B0 +#endif + +#ifndef PCI_CHIP_Q33_G +#define PCI_CHIP_Q33_G 0x29D2 +#define PCI_CHIP_Q33_G_BRIDGE 0x29D0 +#endif + +#ifndef PCI_CHIP_GM45_GM +#define PCI_CHIP_GM45_GM 0x2A42 +#define PCI_CHIP_GM45_BRIDGE 0x2A40 +#endif + +#ifndef PCI_CHIP_G45_E_G +#define PCI_CHIP_G45_E_G 0x2E02 +#define PCI_CHIP_G45_E_G_BRIDGE 0x2E00 +#endif + +#ifndef PCI_CHIP_G45_G +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G45_G_BRIDGE 0x2E20 +#endif + +#ifndef PCI_CHIP_Q45_G +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_Q45_G_BRIDGE 0x2E10 +#endif + +#ifndef PCI_CHIP_G41_G +#define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_G41_G_BRIDGE 0x2E30 +#endif + +#ifndef PCI_CHIP_B43_G +#define PCI_CHIP_B43_G 0x2E42 +#define PCI_CHIP_B43_G_BRIDGE 0x2E40 +#endif + +#ifndef PCI_CHIP_B43_G1 +#define PCI_CHIP_B43_G1 0x2E92 +#define PCI_CHIP_B43_G1_BRIDGE 0x2E90 +#endif + +#ifndef PCI_CHIP_IRONLAKE_D_G +#define PCI_CHIP_IRONLAKE_D_G 0x0042 +#define PCI_CHIP_IRONLAKE_D_G_BRIDGE 0x0040 +#endif + +#ifndef PCI_CHIP_IRONLAKE_M_G +#define PCI_CHIP_IRONLAKE_M_G 0x0046 +#define PCI_CHIP_IRONLAKE_M_G_BRIDGE 0x0044 +#endif + +#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE +#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */ +#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A + +#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 +#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 +#define PCI_CHIP_IVYBRIDGE_D_GT1 0x0152 +#define PCI_CHIP_IVYBRIDGE_D_GT2 0x0162 +#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a +#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a + +#define PCI_CHIP_HASWELL_D_GT1 0x0402 +#define PCI_CHIP_HASWELL_D_GT2 0x0412 +#define PCI_CHIP_HASWELL_D_GT2_PLUS 0x0422 +#define PCI_CHIP_HASWELL_M_GT1 0x0406 +#define PCI_CHIP_HASWELL_M_GT2 0x0416 +#define PCI_CHIP_HASWELL_M_GT2_PLUS 0x0426 +#define PCI_CHIP_HASWELL_S_GT1 0x040A +#define PCI_CHIP_HASWELL_S_GT2 0x041A +#define PCI_CHIP_HASWELL_S_GT2_PLUS 0x042A +#define PCI_CHIP_HASWELL_SDV_D_GT1 0x0C02 +#define PCI_CHIP_HASWELL_SDV_D_GT2 0x0C12 +#define PCI_CHIP_HASWELL_SDV_D_GT2_PLUS 0x0C22 +#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 +#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16 +#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26 +#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A +#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A +#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A +#define PCI_CHIP_HASWELL_ULT_D_GT1 0x0A02 +#define PCI_CHIP_HASWELL_ULT_D_GT2 0x0A12 +#define PCI_CHIP_HASWELL_ULT_D_GT2_PLUS 0x0A22 +#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 +#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 +#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26 +#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A +#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A +#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A +#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D12 +#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D22 +#define PCI_CHIP_HASWELL_CRW_D_GT2_PLUS 0x0D32 +#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16 +#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26 +#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36 +#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A +#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A +#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A + +#define PCI_CHIP_VALLEYVIEW_PO 0x0f30 +#define PCI_CHIP_VALLEYVIEW_1 0x0f31 +#define PCI_CHIP_VALLEYVIEW_2 0x0f32 +#define PCI_CHIP_VALLEYVIEW_3 0x0f33 + +#endif + +#define I85X_CAPID 0x44 +#define I85X_VARIANT_MASK 0x7 +#define I85X_VARIANT_SHIFT 5 +#define I855_GME 0x0 +#define I855_GM 0x4 +#define I852_GME 0x2 +#define I852_GM 0x5 + +#define I810_MEMBASE(p,n) (p)->regions[(n)].base_addr +#define VENDOR_ID(p) (p)->vendor_id +#define DEVICE_ID(p) (p)->device_id +#define SUBVENDOR_ID(p) (p)->subvendor_id +#define SUBSYS_ID(p) (p)->subdevice_id +#define CHIP_REVISION(p) (p)->revision + +#define INTEL_INFO(intel) ((intel)->info) +#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1)) +#define IS_GEN1(intel) IS_GENx(intel, 1) +#define IS_GEN2(intel) IS_GENx(intel, 2) +#define IS_GEN3(intel) IS_GENx(intel, 3) +#define IS_GEN4(intel) IS_GENx(intel, 4) +#define IS_GEN5(intel) IS_GENx(intel, 5) +#define IS_GEN6(intel) IS_GENx(intel, 6) +#define IS_GEN7(intel) IS_GENx(intel, 7) +#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075) + +/* Some chips have specific errata (or limits) that we need to workaround. */ +#define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M) +#define IS_845G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_845_G) +#define IS_I865G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I865_G) + +#define IS_I915G(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_G || DEVICE_ID(pI810->PciInfo) == PCI_CHIP_E7221_G) +#define IS_I915GM(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_GM) + +#define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q) + +/* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */ +#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040) +#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060) + +struct intel_device_info { + int gen; +}; + +//void intel_detect_chipset(ScrnInfoPtr scrn, +// EntityInfoPtr ent, +// struct pci_device *pci); + + +#endif /* INTEL_DRIVER_H */ diff --git a/drivers/video/Intel-2D/intel_list.h b/drivers/video/Intel-2D/intel_list.h new file mode 100644 index 0000000000..5716fc6f10 --- /dev/null +++ b/drivers/video/Intel-2D/intel_list.h @@ -0,0 +1,353 @@ +/* + * Copyright © 2010-2012 Intel Corporation + * Copyright © 2010 Francisco Jerez + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_LIST_H_ +#define _INTEL_LIST_H_ + +#include + +/** + * @file Classic doubly-link circular list implementation. + * For real usage examples of the linked list, see the file test/list.c + * + * Example: + * We need to keep a list of struct foo in the parent struct bar, i.e. what + * we want is something like this. + * + * struct bar { + * ... + * struct foo *list_of_foos; -----> struct foo {}, struct foo {}, struct foo{} + * ... + * } + * + * We need one list head in bar and a list element in all list_of_foos (both are of + * data type 'struct list'). + * + * struct bar { + * ... + * struct list list_of_foos; + * ... + * } + * + * struct foo { + * ... + * struct list entry; + * ... + * } + * + * Now we initialize the list head: + * + * struct bar bar; + * ... + * list_init(&bar.list_of_foos); + * + * Then we create the first element and add it to this list: + * + * struct foo *foo = malloc(...); + * .... + * list_add(&foo->entry, &bar.list_of_foos); + * + * Repeat the above for each element you want to add to the list. Deleting + * works with the element itself. + * list_del(&foo->entry); + * free(foo); + * + * Note: calling list_del(&bar.list_of_foos) will set bar.list_of_foos to an empty + * list again. + * + * Looping through the list requires a 'struct foo' as iterator and the + * name of the field the subnodes use. + * + * struct foo *iterator; + * list_for_each_entry(iterator, &bar.list_of_foos, entry) { + * if (iterator->something == ...) + * ... + * } + * + * Note: You must not call list_del() on the iterator if you continue the + * loop. You need to run the safe for-each loop instead: + * + * struct foo *iterator, *next; + * list_for_each_entry_safe(iterator, next, &bar.list_of_foos, entry) { + * if (...) + * list_del(&iterator->entry); + * } + * + */ + +/** + * The linkage struct for list nodes. This struct must be part of your + * to-be-linked struct. struct list is required for both the head of the + * list and for each list node. + * + * Position and name of the struct list field is irrelevant. + * There are no requirements that elements of a list are of the same type. + * There are no requirements for a list head, any struct list can be a list + * head. + */ +struct list { + struct list *next, *prev; +}; + +/** + * Initialize the list as an empty list. + * + * Example: + * list_init(&bar->list_of_foos); + * + * @param The list to initialized. + */ +static void +list_init(struct list *list) +{ + list->next = list->prev = list; +} + +static inline void +__list_add(struct list *entry, + struct list *prev, + struct list *next) +{ + next->prev = entry; + entry->next = next; + entry->prev = prev; + prev->next = entry; +} + +/** + * Insert a new element after the given list head. The new element does not + * need to be initialised as empty list. + * The list changes from: + * head → some element → ... + * to + * head → new element → older element → ... + * + * Example: + * struct foo *newfoo = malloc(...); + * list_add(&newfoo->entry, &bar->list_of_foos); + * + * @param entry The new element to prepend to the list. + * @param head The existing list. + */ +static inline void +list_add(struct list *entry, struct list *head) +{ + __list_add(entry, head, head->next); +} + +static inline void +list_add_tail(struct list *entry, struct list *head) +{ + __list_add(entry, head->prev, head); +} + +static inline void list_replace(struct list *old, + struct list *new) +{ + new->next = old->next; + new->next->prev = new; + new->prev = old->prev; + new->prev->next = new; +} + +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * Append a new element to the end of the list given with this list head. + * + * The list changes from: + * head → some element → ... → lastelement + * to + * head → some element → ... → lastelement → new element + * + * Example: + * struct foo *newfoo = malloc(...); + * list_append(&newfoo->entry, &bar->list_of_foos); + * + * @param entry The new element to prepend to the list. + * @param head The existing list. + */ +static inline void +list_append(struct list *entry, struct list *head) +{ + __list_add(entry, head->prev, head); +} + + +static inline void +__list_del(struct list *prev, struct list *next) +{ + assert(next->prev == prev->next); + next->prev = prev; + prev->next = next; +} + +static inline void +_list_del(struct list *entry) +{ + assert(entry->prev->next == entry); + assert(entry->next->prev == entry); + __list_del(entry->prev, entry->next); +} + +/** + * Remove the element from the list it is in. Using this function will reset + * the pointers to/from this element so it is removed from the list. It does + * NOT free the element itself or manipulate it otherwise. + * + * Using list_del on a pure list head (like in the example at the top of + * this file) will NOT remove the first element from + * the list but rather reset the list as empty list. + * + * Example: + * list_del(&foo->entry); + * + * @param entry The element to remove. + */ +static inline void +list_del(struct list *entry) +{ + _list_del(entry); + list_init(entry); +} + +static inline void list_move(struct list *list, struct list *head) +{ + if (list->prev != head) { + _list_del(list); + list_add(list, head); + } +} + +static inline void list_move_tail(struct list *list, struct list *head) +{ + _list_del(list); + list_add_tail(list, head); +} + +/** + * Check if the list is empty. + * + * Example: + * list_is_empty(&bar->list_of_foos); + * + * @return True if the list contains one or more elements or False otherwise. + */ +static inline bool +list_is_empty(struct list *head) +{ + return head->next == head; +} + +/** + * Alias of container_of + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * Retrieve the first list entry for the given list pointer. + * + * Example: + * struct foo *first; + * first = list_first_entry(&bar->list_of_foos, struct foo, list_of_foos); + * + * @param ptr The list head + * @param type Data type of the list element to retrieve + * @param member Member name of the struct list field in the list element. + * @return A pointer to the first list element. + */ +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +/** + * Retrieve the last list entry for the given listpointer. + * + * Example: + * struct foo *first; + * first = list_last_entry(&bar->list_of_foos, struct foo, list_of_foos); + * + * @param ptr The list head + * @param type Data type of the list element to retrieve + * @param member Member name of the struct list field in the list element. + * @return A pointer to the last list element. + */ +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +#define __container_of(ptr, sample, member) \ + (void *)((char *)(ptr) \ + - ((char *)&(sample)->member - (char *)(sample))) +/** + * Loop through the list given by head and set pos to struct in the list. + * + * Example: + * struct foo *iterator; + * list_for_each_entry(iterator, &bar->list_of_foos, entry) { + * [modify iterator] + * } + * + * This macro is not safe for node deletion. Use list_for_each_entry_safe + * instead. + * + * @param pos Iterator variable of the type of the list elements. + * @param head List head + * @param member Member name of the struct list in the list elements. + * + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = __container_of((head)->next, pos, member); \ + &pos->member != (head); \ + pos = __container_of(pos->member.next, pos, member)) + +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = __container_of((head)->prev, pos, member); \ + &pos->member != (head); \ + pos = __container_of(pos->member.prev, pos, member)) + +/** + * Loop through the list, keeping a backup pointer to the element. This + * macro allows for the deletion of a list element while looping through the + * list. + * + * See list_for_each_entry for more details. + */ +#define list_for_each_entry_safe(pos, tmp, head, member) \ + for (pos = __container_of((head)->next, pos, member), \ + tmp = __container_of(pos->member.next, pos, member); \ + &pos->member != (head); \ + pos = tmp, tmp = __container_of(pos->member.next, tmp, member)) + + +#undef container_of +#define container_of(ptr, type, member) \ + ((type *)((char *)(ptr) - (char *) &((type *)0)->member)) + +#endif /* _INTEL_LIST_H_ */ + diff --git a/drivers/video/Intel-2D/kgem-sna.c b/drivers/video/Intel-2D/kgem-sna.c new file mode 100644 index 0000000000..803320fa0e --- /dev/null +++ b/drivers/video/Intel-2D/kgem-sna.c @@ -0,0 +1,559 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_reg.h" + +#define DBG_NO_HW 0 +#define DBG_NO_TILING 1 +#define DBG_NO_CACHE 0 +#define DBG_NO_CACHE_LEVEL 0 +#define DBG_NO_CPU 0 +#define DBG_NO_USERPTR 0 +#define DBG_NO_LLC 0 +#define DBG_NO_SEMAPHORES 0 +#define DBG_NO_MADV 0 +#define DBG_NO_UPLOAD_CACHE 0 +#define DBG_NO_UPLOAD_ACTIVE 0 +#define DBG_NO_MAP_UPLOAD 0 +#define DBG_NO_RELAXED_FENCING 0 +#define DBG_NO_SECURE_BATCHES 0 +#define DBG_NO_PINNED_BATCHES 0 +#define DBG_NO_FAST_RELOC 0 +#define DBG_NO_HANDLE_LUT 0 +#define DBG_DUMP 0 + +#define MAX_GTT_VMA_CACHE 512 +#define MAX_CPU_VMA_CACHE INT16_MAX +#define MAP_PRESERVE_TIME 10 + +#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3)) +#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) +#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3)) +#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2) +#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3) + +#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring))) + +#define LOCAL_I915_PARAM_HAS_BLT 11 +#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12 +#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15 +#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 +#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 +#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 +#define LOCAL_I915_PARAM_HAS_NO_RELOC 25 +#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 + + + +static int gem_param(struct kgem *kgem, int name) +{ + ioctl_t io; + + drm_i915_getparam_t gp; + int v = -1; /* No param uses the sign bit, reserve it for errors */ + + VG_CLEAR(gp); + gp.param = name; + gp.value = &v; + + io.handle = kgem->fd; + io.io_code = SRV_GET_PARAM; + io.input = &gp; + io.inp_size = sizeof(gp); + io.output = NULL; + io.out_size = 0; + + if (call_service(&io)!=0) + return -1; + + VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v))); + return v; +} + +static bool test_has_no_reloc(struct kgem *kgem) +{ + if (DBG_NO_FAST_RELOC) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; +} + +static bool test_has_handle_lut(struct kgem *kgem) +{ + if (DBG_NO_HANDLE_LUT) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; +} + +static bool test_has_semaphores_enabled(struct kgem *kgem) +{ + FILE *file; + bool detected = false; + int ret; + + if (DBG_NO_SEMAPHORES) + return false; + + ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES); + if (ret != -1) + return ret > 0; + + return detected; +} + + +static bool test_has_relaxed_fencing(struct kgem *kgem) +{ + if (kgem->gen < 040) { + if (DBG_NO_RELAXED_FENCING) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0; + } else + return true; +} + +static bool test_has_llc(struct kgem *kgem) +{ + int has_llc = -1; + + if (DBG_NO_LLC) + return false; + +#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */ + has_llc = gem_param(kgem, I915_PARAM_HAS_LLC); +#endif + if (has_llc == -1) { + DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__)); + has_llc = kgem->gen >= 060; + } + + return has_llc; +} + +static bool test_has_cacheing(struct kgem *kgem) +{ + uint32_t handle; + bool ret = false; + + if (DBG_NO_CACHE_LEVEL) + return false; + + /* Incoherent blt and sampler hangs the GPU */ + if (kgem->gen == 040) + return false; + +// handle = gem_create(kgem->fd, 1); +// if (handle == 0) +// return false; + +// ret = gem_set_cacheing(kgem->fd, handle, UNCACHED); +// gem_close(kgem->fd, handle); + return ret; +} + +static bool test_has_userptr(struct kgem *kgem) +{ +#if defined(USE_USERPTR) + uint32_t handle; + void *ptr; + + if (DBG_NO_USERPTR) + return false; + + /* Incoherent blt and sampler hangs the GPU */ + if (kgem->gen == 040) + return false; + + ptr = malloc(PAGE_SIZE); + handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); + gem_close(kgem->fd, handle); + free(ptr); + + return handle != 0; +#else + return false; +#endif +} + +static bool test_has_secure_batches(struct kgem *kgem) +{ + if (DBG_NO_SECURE_BATCHES) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; +} + +static bool test_has_pinned_batches(struct kgem *kgem) +{ + if (DBG_NO_PINNED_BATCHES) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; +} + + + +void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) +{ + struct drm_i915_gem_get_aperture aperture; + size_t totalram; + unsigned half_gpu_max; + unsigned int i, j; + + DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); + + memset(kgem, 0, sizeof(*kgem)); + + kgem->fd = fd; + kgem->gen = gen; + + list_init(&kgem->requests[0]); + list_init(&kgem->requests[1]); + list_init(&kgem->batch_buffers); + list_init(&kgem->active_buffers); + list_init(&kgem->flushing); + list_init(&kgem->large); + list_init(&kgem->large_inactive); + list_init(&kgem->snoop); + list_init(&kgem->scanout); + for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++) + list_init(&kgem->pinned_batches[i]); + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) + list_init(&kgem->inactive[i]); + for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { + for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++) + list_init(&kgem->active[i][j]); + } + for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { + for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) + list_init(&kgem->vma[i].inactive[j]); + } + + kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; + kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; + + kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0; + DBG(("%s: has BLT ring? %d\n", __FUNCTION__, + kgem->has_blt)); + + kgem->has_relaxed_delta = + gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0; + DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, + kgem->has_relaxed_delta)); + + + kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem); + DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__, + kgem->has_relaxed_fencing)); + + kgem->has_llc = test_has_llc(kgem); + DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__, + kgem->has_llc)); + + kgem->has_cacheing = test_has_cacheing(kgem); + DBG(("%s: has set-cache-level? %d\n", __FUNCTION__, + kgem->has_cacheing)); + + kgem->has_userptr = test_has_userptr(kgem); + DBG(("%s: has userptr? %d\n", __FUNCTION__, + kgem->has_userptr)); + + kgem->has_no_reloc = test_has_no_reloc(kgem); + DBG(("%s: has no-reloc? %d\n", __FUNCTION__, + kgem->has_no_reloc)); + + kgem->has_handle_lut = test_has_handle_lut(kgem); + DBG(("%s: has handle-lut? %d\n", __FUNCTION__, + kgem->has_handle_lut)); + + kgem->has_semaphores = false; + if (kgem->has_blt && test_has_semaphores_enabled(kgem)) + kgem->has_semaphores = true; + DBG(("%s: semaphores enabled? %d\n", __FUNCTION__, + kgem->has_semaphores)); + + kgem->can_blt_cpu = gen >= 030; + DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, + kgem->can_blt_cpu)); + + kgem->has_secure_batches = test_has_secure_batches(kgem); + DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, + kgem->has_secure_batches)); + + kgem->has_pinned_batches = test_has_pinned_batches(kgem); + DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__, + kgem->has_pinned_batches)); + +#if 0 + + if (!is_hw_supported(kgem, dev)) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, + "Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); + kgem->wedged = 1; + } else if (__kgem_throttle(kgem)) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, + "Detected a hung GPU, disabling acceleration.\n"); + kgem->wedged = 1; + } + + kgem->batch_size = ARRAY_SIZE(kgem->batch); + if (gen == 020 && !kgem->has_pinned_batches) + /* Limited to what we can pin */ + kgem->batch_size = 4*1024; + if (gen == 022) + /* 865g cannot handle a batch spanning multiple pages */ + kgem->batch_size = PAGE_SIZE / sizeof(uint32_t); + if ((gen >> 3) == 7) + kgem->batch_size = 16*1024; + if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) + kgem->batch_size = 4*1024; + + if (!kgem_init_pinned_batches(kgem) && gen == 020) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, + "Unable to reserve memory for GPU, disabling acceleration.\n"); + kgem->wedged = 1; + } + + DBG(("%s: maximum batch size? %d\n", __FUNCTION__, + kgem->batch_size)); + + kgem->min_alignment = 4; + if (gen < 040) + kgem->min_alignment = 64; + + kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; + DBG(("%s: half cpu cache %d pages\n", __FUNCTION__, + kgem->half_cpu_cache_pages)); + + kgem->next_request = __kgem_request_alloc(kgem); + + DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__, + !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing), + kgem->has_llc, kgem->has_cacheing, kgem->has_userptr)); + + VG_CLEAR(aperture); + aperture.aper_size = 0; + (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (aperture.aper_size == 0) + aperture.aper_size = 64*1024*1024; + + DBG(("%s: aperture size %lld, available now %lld\n", + __FUNCTION__, + (long long)aperture.aper_size, + (long long)aperture.aper_available_size)); + + kgem->aperture_total = aperture.aper_size; + kgem->aperture_high = aperture.aper_size * 3/4; + kgem->aperture_low = aperture.aper_size * 1/3; + if (gen < 033) { + /* Severe alignment penalties */ + kgem->aperture_high /= 2; + kgem->aperture_low /= 2; + } + DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, + kgem->aperture_low, kgem->aperture_low / (1024*1024), + kgem->aperture_high, kgem->aperture_high / (1024*1024))); + + kgem->aperture_mappable = agp_aperture_size(dev, gen); + if (kgem->aperture_mappable == 0 || + kgem->aperture_mappable > aperture.aper_size) + kgem->aperture_mappable = aperture.aper_size; + DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, + kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); + + kgem->buffer_size = 64 * 1024; + while (kgem->buffer_size < kgem->aperture_mappable >> 10) + kgem->buffer_size *= 2; + if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages) + kgem->buffer_size = kgem->half_cpu_cache_pages << 12; + DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__, + kgem->buffer_size, kgem->buffer_size / 1024)); + + kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10; + kgem->max_gpu_size = kgem->max_object_size; + if (!kgem->has_llc) + kgem->max_gpu_size = MAX_CACHE_SIZE; + + totalram = total_ram_size(); + if (totalram == 0) { + DBG(("%s: total ram size unknown, assuming maximum of total aperture\n", + __FUNCTION__)); + totalram = kgem->aperture_total; + } + DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); + if (kgem->max_object_size > totalram / 2) + kgem->max_object_size = totalram / 2; + if (kgem->max_gpu_size > totalram / 4) + kgem->max_gpu_size = totalram / 4; + + kgem->max_cpu_size = kgem->max_object_size; + + half_gpu_max = kgem->max_gpu_size / 2; + kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2; + if (kgem->max_copy_tile_size > half_gpu_max) + kgem->max_copy_tile_size = half_gpu_max; + + if (kgem->has_llc) + kgem->max_upload_tile_size = kgem->max_copy_tile_size; + else + kgem->max_upload_tile_size = kgem->aperture_mappable / 4; + if (kgem->max_upload_tile_size > half_gpu_max) + kgem->max_upload_tile_size = half_gpu_max; + + kgem->large_object_size = MAX_CACHE_SIZE; + if (kgem->large_object_size > kgem->max_gpu_size) + kgem->large_object_size = kgem->max_gpu_size; + + if (kgem->has_llc | kgem->has_cacheing | kgem->has_userptr) { + if (kgem->large_object_size > kgem->max_cpu_size) + kgem->large_object_size = kgem->max_cpu_size; + } else + kgem->max_cpu_size = 0; + if (DBG_NO_CPU) + kgem->max_cpu_size = 0; + + DBG(("%s: maximum object size=%d\n", + __FUNCTION__, kgem->max_object_size)); + DBG(("%s: large object thresold=%d\n", + __FUNCTION__, kgem->large_object_size)); + DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", + __FUNCTION__, + kgem->max_gpu_size, kgem->max_cpu_size, + kgem->max_upload_tile_size, kgem->max_copy_tile_size)); + + /* Convert the aperture thresholds to pages */ + kgem->aperture_low /= PAGE_SIZE; + kgem->aperture_high /= PAGE_SIZE; + + kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; + if ((int)kgem->fence_max < 0) + kgem->fence_max = 5; /* minimum safe value for all hw */ + DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max)); + + kgem->batch_flags_base = 0; + if (kgem->has_no_reloc) + kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC; + if (kgem->has_handle_lut) + kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT; + if (kgem->has_pinned_batches) + kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED; + +#endif + +} + + + +void kgem_clear_dirty(struct kgem *kgem) +{ + struct list * const buffers = &kgem->next_request->buffers; + struct kgem_bo *bo; + + list_for_each_entry(bo, buffers, request) { + if (!bo->dirty) + break; + + bo->dirty = false; + } +} + + + +uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) +{ + struct kgem_bo_binding *b; + + for (b = &bo->binding; b && b->offset; b = b->next) + if (format == b->format) + return b->offset; + + return 0; +} + +void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset) +{ + struct kgem_bo_binding *b; + + for (b = &bo->binding; b; b = b->next) { + if (b->offset) + continue; + + b->offset = offset; + b->format = format; + + if (b->next) + b->next->offset = 0; + + return; + } + + b = malloc(sizeof(*b)); + if (b) { + b->next = bo->binding.next; + b->format = format; + b->offset = offset; + bo->binding.next = b; + } +} + +uint32_t kgem_add_reloc(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domain, + uint32_t delta) +{ + return 0; +} + +void kgem_reset(struct kgem *kgem) +{ + +}; + +void _kgem_submit(struct kgem *kgem) +{ +}; + +struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) +{ + struct kgem_bo *bo = NULL; + + return bo; +}; + +void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) +{ + + +}; diff --git a/drivers/video/Intel-2D/kgem.h b/drivers/video/Intel-2D/kgem.h new file mode 100644 index 0000000000..d9b53de15c --- /dev/null +++ b/drivers/video/Intel-2D/kgem.h @@ -0,0 +1,648 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifndef KGEM_H +#define KGEM_H + +#define HAS_DEBUG_FULL 1 + +#include +#include +#include +#include + +#include "i915_drm.h" + +#include "compiler.h" +#include "intel_list.h" + + + +#if HAS_DEBUG_FULL +#define DBG(x) printf x +#else +#define DBG(x) +#endif + +struct kgem_bo { + struct kgem_request *rq; +#define RQ(rq) ((struct kgem_request *)((uintptr_t)(rq) & ~3)) +#define RQ_RING(rq) ((uintptr_t)(rq) & 3) +#define RQ_IS_BLT(rq) (RQ_RING(rq) == KGEM_BLT) + struct drm_i915_gem_exec_object2 *exec; + + struct kgem_bo *proxy; + + struct list list; + struct list request; + struct list vma; + + void *map; +#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1) +#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0) + + struct kgem_bo_binding { + struct kgem_bo_binding *next; + uint32_t format; + uint16_t offset; + } binding; + + uint32_t unique_id; + uint32_t refcnt; + uint32_t handle; + uint32_t target_handle; + uint32_t presumed_offset; + uint32_t delta; + union { + struct { + uint32_t count:27; +#define PAGE_SIZE 4096 + uint32_t bucket:5; +#define NUM_CACHE_BUCKETS 16 +#define MAX_CACHE_SIZE (1 << (NUM_CACHE_BUCKETS+12)) + } pages; + uint32_t bytes; + } size; + uint32_t pitch : 18; /* max 128k */ + uint32_t tiling : 2; + uint32_t reusable : 1; + uint32_t dirty : 1; + uint32_t domain : 2; + uint32_t needs_flush : 1; + uint32_t snoop : 1; + uint32_t io : 1; + uint32_t flush : 1; + uint32_t scanout : 1; + uint32_t purged : 1; +}; +#define DOMAIN_NONE 0 +#define DOMAIN_CPU 1 +#define DOMAIN_GTT 2 +#define DOMAIN_GPU 3 + +struct kgem_request { + struct list list; + struct kgem_bo *bo; + struct list buffers; + int ring; +}; + +enum { + MAP_GTT = 0, + MAP_CPU, + NUM_MAP_TYPES, +}; + +struct kgem { + int fd; + int wedged; + unsigned gen; + + uint32_t unique_id; + + enum kgem_mode { + /* order matches I915_EXEC_RING ordering */ + KGEM_NONE = 0, + KGEM_RENDER, + KGEM_BSD, + KGEM_BLT, + } mode, ring; + + struct list flushing; + struct list large; + struct list large_inactive; + struct list active[NUM_CACHE_BUCKETS][3]; + struct list inactive[NUM_CACHE_BUCKETS]; + struct list pinned_batches[2]; + struct list snoop; + struct list scanout; + struct list batch_buffers, active_buffers; + + struct list requests[2]; + struct kgem_request *next_request; + struct kgem_request static_request; + + struct { + struct list inactive[NUM_CACHE_BUCKETS]; + int16_t count; + } vma[NUM_MAP_TYPES]; + + uint32_t batch_flags; + uint32_t batch_flags_base; +#define I915_EXEC_SECURE (1<<9) +#define LOCAL_EXEC_OBJECT_WRITE (1<<2) + + uint16_t nbatch; + uint16_t surface; + uint16_t nexec; + uint16_t nreloc; + uint16_t nreloc__self; + uint16_t nfence; + uint16_t batch_size; + uint16_t min_alignment; + + uint32_t flush:1; + uint32_t need_expire:1; + uint32_t need_purge:1; + uint32_t need_retire:1; + uint32_t need_throttle:1; + uint32_t scanout_busy:1; + uint32_t busy:1; + + uint32_t has_userptr :1; + uint32_t has_blt :1; + uint32_t has_relaxed_fencing :1; + uint32_t has_relaxed_delta :1; + uint32_t has_semaphores :1; + uint32_t has_secure_batches :1; + uint32_t has_pinned_batches :1; + uint32_t has_cacheing :1; + uint32_t has_llc :1; + uint32_t has_no_reloc :1; + uint32_t has_handle_lut :1; + + uint32_t can_blt_cpu :1; + + uint16_t fence_max; + uint16_t half_cpu_cache_pages; + uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable; + uint32_t aperture, aperture_fenced; + uint32_t max_upload_tile_size, max_copy_tile_size; + uint32_t max_gpu_size, max_cpu_size; + uint32_t large_object_size, max_object_size; + uint32_t buffer_size; + + void (*context_switch)(struct kgem *kgem, int new_mode); + void (*retire)(struct kgem *kgem); + void (*expire)(struct kgem *kgem); + + uint32_t batch[64*1024-8]; + struct drm_i915_gem_exec_object2 exec[256]; + struct drm_i915_gem_relocation_entry reloc[4096]; + uint16_t reloc__self[256]; + +#ifdef DEBUG_MEMORY + struct { + int bo_allocs; + size_t bo_bytes; + } debug_memory; +#endif +}; + +#define KGEM_BATCH_RESERVED 1 +#define KGEM_RELOC_RESERVED 4 +#define KGEM_EXEC_RESERVED 1 + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) +#endif + +#define KGEM_BATCH_SIZE(K) ((K)->batch_size-KGEM_BATCH_RESERVED) +#define KGEM_EXEC_SIZE(K) (int)(ARRAY_SIZE((K)->exec)-KGEM_EXEC_RESERVED) +#define KGEM_RELOC_SIZE(K) (int)(ARRAY_SIZE((K)->reloc)-KGEM_RELOC_RESERVED) + +void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen); +void kgem_reset(struct kgem *kgem); + +struct kgem_bo *kgem_create_map(struct kgem *kgem, + void *ptr, uint32_t size, + bool read_only); + +struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name); + +struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags); +struct kgem_bo *kgem_create_proxy(struct kgem *kgem, + struct kgem_bo *target, + int offset, int length); + + +int kgem_choose_tiling(struct kgem *kgem, + int tiling, int width, int height, int bpp); +unsigned kgem_can_create_2d(struct kgem *kgem, int width, int height, int depth); +#define KGEM_CAN_CREATE_GPU 0x1 +#define KGEM_CAN_CREATE_CPU 0x2 +#define KGEM_CAN_CREATE_LARGE 0x4 +#define KGEM_CAN_CREATE_GTT 0x8 + +struct kgem_bo * +kgem_replace_bo(struct kgem *kgem, + struct kgem_bo *src, + uint32_t width, + uint32_t height, + uint32_t pitch, + uint32_t bpp); +enum { + CREATE_EXACT = 0x1, + CREATE_INACTIVE = 0x2, + CREATE_CPU_MAP = 0x4, + CREATE_GTT_MAP = 0x8, + CREATE_SCANOUT = 0x10, + CREATE_PRIME = 0x20, + CREATE_TEMPORARY = 0x40, + CREATE_CACHED = 0x80, + CREATE_NO_RETIRE = 0x100, + CREATE_NO_THROTTLE = 0x200, +}; +struct kgem_bo *kgem_create_2d(struct kgem *kgem, + int width, + int height, + int bpp, + int tiling, + uint32_t flags); + +uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); +void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); +int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo); + +bool kgem_retire(struct kgem *kgem); + +bool __kgem_ring_is_idle(struct kgem *kgem, int ring); +static inline bool kgem_ring_is_idle(struct kgem *kgem, int ring) +{ + ring = ring == KGEM_BLT; + + if (list_is_empty(&kgem->requests[ring])) + return true; + + return __kgem_ring_is_idle(kgem, ring); +} + +static inline bool kgem_is_idle(struct kgem *kgem) +{ + if (!kgem->need_retire) + return true; + + return kgem_ring_is_idle(kgem, kgem->ring); +} + +void _kgem_submit(struct kgem *kgem); +static inline void kgem_submit(struct kgem *kgem) +{ + if (kgem->nbatch) + _kgem_submit(kgem); +} + +static inline bool kgem_flush(struct kgem *kgem, bool flush) +{ + if (kgem->nreloc == 0) + return false; + + return (kgem->flush ^ flush) && kgem_ring_is_idle(kgem, kgem->ring); +} + +#if 0 + +static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo->exec) + _kgem_submit(kgem); +} + +void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo); +static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) +{ + kgem_bo_submit(kgem, bo); + + if (!bo->needs_flush) + return; + + /* If the kernel fails to emit the flush, then it will be forced when + * we assume direct access. And as the useual failure is EIO, we do + * not actualy care. + */ + __kgem_flush(kgem, bo); +} + +#endif + +static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo) +{ + assert(bo->refcnt); + bo->refcnt++; + return bo; +} + +void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo); +static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->refcnt); + if (--bo->refcnt == 0) + _kgem_bo_destroy(kgem, bo); +} + +void kgem_clear_dirty(struct kgem *kgem); + +static inline void kgem_set_mode(struct kgem *kgem, + enum kgem_mode mode, + struct kgem_bo *bo) +{ + assert(!kgem->wedged); + +#if DEBUG_FLUSH_BATCH + kgem_submit(kgem); +#endif + + if (kgem->mode == mode) + return; + +// kgem->context_switch(kgem, mode); + kgem->mode = mode; +} + +static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) +{ + assert(kgem->mode == KGEM_NONE); + assert(kgem->nbatch == 0); + assert(!kgem->wedged); +// kgem->context_switch(kgem, mode); + kgem->mode = mode; +} + +static inline bool kgem_check_batch(struct kgem *kgem, int num_dwords) +{ + assert(num_dwords > 0); + assert(kgem->nbatch < kgem->surface); + assert(kgem->surface <= kgem->batch_size); + return likely(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface); +} + +static inline bool kgem_check_reloc(struct kgem *kgem, int n) +{ + assert(kgem->nreloc <= KGEM_RELOC_SIZE(kgem)); + return likely(kgem->nreloc + n <= KGEM_RELOC_SIZE(kgem)); +} + +static inline bool kgem_check_exec(struct kgem *kgem, int n) +{ + assert(kgem->nexec <= KGEM_EXEC_SIZE(kgem)); + return likely(kgem->nexec + n <= KGEM_EXEC_SIZE(kgem)); +} + +static inline bool kgem_check_reloc_and_exec(struct kgem *kgem, int n) +{ + return kgem_check_reloc(kgem, n) && kgem_check_exec(kgem, n); +} + +static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem, + int num_dwords, + int num_surfaces) +{ + return (int)(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED) <= (int)(kgem->surface - num_surfaces*8) && + kgem_check_reloc(kgem, num_surfaces) && + kgem_check_exec(kgem, num_surfaces); +} + +static inline uint32_t *kgem_get_batch(struct kgem *kgem) +{ + + return kgem->batch + kgem->nbatch; +} + +bool kgem_check_bo(struct kgem *kgem, ...) __attribute__((sentinel(0))); +bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo); +bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0))); + +#define KGEM_RELOC_FENCED 0x8000 +uint32_t kgem_add_reloc(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domains, + uint32_t delta); + +void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo); +void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo); +void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo); +void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo); +void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo); +void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); +void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo); +void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write); +void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); +void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr); +uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo); + +bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, + const void *data, int length); + +int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo); +void kgem_get_tile_size(struct kgem *kgem, int tiling, + int *tile_width, int *tile_height, int *tile_size); + +static inline int __kgem_buffer_size(struct kgem_bo *bo) +{ + assert(bo->proxy != NULL); + return bo->size.bytes; +} + +static inline int __kgem_bo_size(struct kgem_bo *bo) +{ + assert(bo->proxy == NULL); + return PAGE_SIZE * bo->size.pages.count; +} + +static inline int kgem_bo_size(struct kgem_bo *bo) +{ + if (bo->proxy) + return __kgem_buffer_size(bo); + else + return __kgem_bo_size(bo); +} + +/* +static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem, + struct kgem_bo *bo) +{ + int pitch = bo->pitch; + if (kgem->gen >= 040 && bo->tiling) + pitch /= 4; + if (pitch > MAXSHORT) { + DBG(("%s: can not blt to handle=%d, adjusted pitch=%d\n", + __FUNCTION__, bo->handle, pitch)); + return false; + } + + return true; +} + +static inline bool kgem_bo_can_blt(struct kgem *kgem, + struct kgem_bo *bo) +{ + if (bo->tiling == I915_TILING_Y) { + DBG(("%s: can not blt to handle=%d, tiling=Y\n", + __FUNCTION__, bo->handle)); + return false; + } + + return kgem_bo_blt_pitch_is_ok(kgem, bo); +} +*/ + +static inline bool __kgem_bo_is_mappable(struct kgem *kgem, + struct kgem_bo *bo) +{ + if (bo->domain == DOMAIN_GTT) + return true; + + if (kgem->gen < 040 && bo->tiling && + bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1)) + return false; + + if (!bo->presumed_offset) + return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; + + return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable; +} + +static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: map=%p, tiling=%d, domain=%d\n", + __FUNCTION__, bo->map, bo->tiling, bo->domain)); + assert(bo->refcnt); + + if (bo->map == NULL) + return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU; + + return IS_CPU_MAP(bo->map) == !bo->tiling; +} + + + + + + + + + + + +static inline bool kgem_bo_is_busy(struct kgem_bo *bo) +{ + DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, + bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); + assert(bo->refcnt); + return bo->rq; +} + +/* + +static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, + bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); + assert(bo->refcnt); + + if (bo->exec) + return true; + + if (kgem_flush(kgem, bo->flush)) + kgem_submit(kgem); + + if (bo->rq && !__kgem_busy(kgem, bo->handle)) + __kgem_bo_clear_busy(bo); + + return kgem_bo_is_busy(bo); +} + +*/ + +static inline bool kgem_bo_is_dirty(struct kgem_bo *bo) +{ + if (bo == NULL) + return false; + + assert(bo->refcnt); + return bo->dirty; +} + +static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo) +{ + /* The bo is outside of our control, so presume it is written to */ + bo->needs_flush = true; + if (bo->rq == NULL) + bo->rq = (void *)kgem; + + if (bo->domain != DOMAIN_GPU) + bo->domain = DOMAIN_NONE; +} + +static inline void __kgem_bo_mark_dirty(struct kgem_bo *bo) +{ + DBG(("%s: handle=%d (proxy? %d)\n", __FUNCTION__, + bo->handle, bo->proxy != NULL)); + + bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE; + bo->needs_flush = bo->dirty = true; + list_move(&bo->request, &RQ(bo->rq)->buffers); +} + +static inline void kgem_bo_mark_dirty(struct kgem_bo *bo) +{ + assert(bo->refcnt); + do { + assert(bo->exec); + assert(bo->rq); + + if (bo->dirty) + return; + + __kgem_bo_mark_dirty(bo); + } while ((bo = bo->proxy)); +} + +#define KGEM_BUFFER_WRITE 0x1 +#define KGEM_BUFFER_INPLACE 0x2 +#define KGEM_BUFFER_LAST 0x4 + +#define KGEM_BUFFER_WRITE_INPLACE (KGEM_BUFFER_WRITE | KGEM_BUFFER_INPLACE) + +struct kgem_bo *kgem_create_buffer(struct kgem *kgem, + uint32_t size, uint32_t flags, + void **ret); +struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, + int width, int height, int bpp, + uint32_t flags, + void **ret); +bool kgem_buffer_is_inplace(struct kgem_bo *bo); +void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *bo); + +void kgem_throttle(struct kgem *kgem); +#define MAX_INACTIVE_TIME 10 +bool kgem_expire_cache(struct kgem *kgem); +void kgem_purge_cache(struct kgem *kgem); +void kgem_cleanup_cache(struct kgem *kgem); + +#if HAS_DEBUG_FULL +void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch); +#else +static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) +{ + (void)kgem; + (void)nbatch; +} +#endif + +#endif /* KGEM_H */ diff --git a/drivers/video/Intel-2D/pciaccess.h b/drivers/video/Intel-2D/pciaccess.h new file mode 100644 index 0000000000..f2c9b06a97 --- /dev/null +++ b/drivers/video/Intel-2D/pciaccess.h @@ -0,0 +1,537 @@ +/* + * (C) Copyright IBM Corporation 2006 + * Copyright 2009 Red Hat, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* + * Copyright (c) 2007 Paulo R. Zanoni, Tiago Vignatti + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file pciaccess.h + * + * \author Ian Romanick + */ + +#ifndef PCIACCESS_H +#define PCIACCESS_H + +#include + +#if __GNUC__ >= 3 +#define __deprecated __attribute__((deprecated)) +#else +#define __deprecated +#endif + +typedef uint64_t pciaddr_t; + +struct pci_device; +struct pci_device_iterator; +struct pci_id_match; +struct pci_slot_match; + +#ifdef __cplusplus +extern "C" { +#endif + +int pci_device_has_kernel_driver(struct pci_device *dev); + +int pci_device_is_boot_vga(struct pci_device *dev); + +int pci_device_read_rom(struct pci_device *dev, void *buffer); + +int __deprecated pci_device_map_region(struct pci_device *dev, + unsigned region, int write_enable); + +int __deprecated pci_device_unmap_region(struct pci_device *dev, + unsigned region); + +int pci_device_map_range(struct pci_device *dev, pciaddr_t base, + pciaddr_t size, unsigned map_flags, void **addr); + +int pci_device_unmap_range(struct pci_device *dev, void *memory, + pciaddr_t size); + +int __deprecated pci_device_map_memory_range(struct pci_device *dev, + pciaddr_t base, pciaddr_t size, int write_enable, void **addr); + +int __deprecated pci_device_unmap_memory_range(struct pci_device *dev, + void *memory, pciaddr_t size); + +int pci_device_probe(struct pci_device *dev); + +const struct pci_agp_info *pci_device_get_agp_info(struct pci_device *dev); + +const struct pci_bridge_info *pci_device_get_bridge_info( + struct pci_device *dev); + +const struct pci_pcmcia_bridge_info *pci_device_get_pcmcia_bridge_info( + struct pci_device *dev); + +int pci_device_get_bridge_buses(struct pci_device *dev, int *primary_bus, + int *secondary_bus, int *subordinate_bus); + +int pci_system_init(void); + +void pci_system_init_dev_mem(int fd); + +void pci_system_cleanup(void); + +struct pci_device_iterator *pci_slot_match_iterator_create( + const struct pci_slot_match *match); + +struct pci_device_iterator *pci_id_match_iterator_create( + const struct pci_id_match *match); + +void pci_iterator_destroy(struct pci_device_iterator *iter); + +struct pci_device *pci_device_next(struct pci_device_iterator *iter); + +struct pci_device *pci_device_find_by_slot(uint32_t domain, uint32_t bus, + uint32_t dev, uint32_t func); + +struct pci_device *pci_device_get_parent_bridge(struct pci_device *dev); + +void pci_get_strings(const struct pci_id_match *m, + const char **device_name, const char **vendor_name, + const char **subdevice_name, const char **subvendor_name); +const char *pci_device_get_device_name(const struct pci_device *dev); +const char *pci_device_get_subdevice_name(const struct pci_device *dev); +const char *pci_device_get_vendor_name(const struct pci_device *dev); +const char *pci_device_get_subvendor_name(const struct pci_device *dev); + +void pci_device_enable(struct pci_device *dev); + +int pci_device_cfg_read (struct pci_device *dev, void *data, + pciaddr_t offset, pciaddr_t size, pciaddr_t *bytes_read); +int pci_device_cfg_read_u8 (struct pci_device *dev, uint8_t *data, + pciaddr_t offset); +int pci_device_cfg_read_u16(struct pci_device *dev, uint16_t *data, + pciaddr_t offset); +int pci_device_cfg_read_u32(struct pci_device *dev, uint32_t *data, + pciaddr_t offset); + +int pci_device_cfg_write (struct pci_device *dev, const void *data, + pciaddr_t offset, pciaddr_t size, pciaddr_t *bytes_written); +int pci_device_cfg_write_u8 (struct pci_device *dev, uint8_t data, + pciaddr_t offset); +int pci_device_cfg_write_u16(struct pci_device *dev, uint16_t data, + pciaddr_t offset); +int pci_device_cfg_write_u32(struct pci_device *dev, uint32_t data, + pciaddr_t offset); +int pci_device_cfg_write_bits(struct pci_device *dev, uint32_t mask, + uint32_t data, pciaddr_t offset); + +#ifdef __cplusplus +} +#endif + +/** + * \name Mapping flags passed to \c pci_device_map_range + */ +/*@{*/ +#define PCI_DEV_MAP_FLAG_WRITABLE (1U<<0) +#define PCI_DEV_MAP_FLAG_WRITE_COMBINE (1U<<1) +#define PCI_DEV_MAP_FLAG_CACHABLE (1U<<2) +/*@}*/ + + +#define PCI_MATCH_ANY (~0) + +/** + * Compare two PCI ID values (either vendor or device). This is used + * internally to compare the fields of \c pci_id_match to the fields of + * \c pci_device. + */ +#define PCI_ID_COMPARE(a, b) \ + (((a) == PCI_MATCH_ANY) || ((a) == (b))) + +/** + */ +struct pci_id_match { + /** + * \name Device / vendor matching controls + * + * Control the search based on the device, vendor, subdevice, or subvendor + * IDs. Setting any of these fields to \c PCI_MATCH_ANY will cause the + * field to not be used in the comparison. + */ + /*@{*/ + uint32_t vendor_id; + uint32_t device_id; + uint32_t subvendor_id; + uint32_t subdevice_id; + /*@}*/ + + + /** + * \name Device class matching controls + * + */ + /*@{*/ + uint32_t device_class; + uint32_t device_class_mask; + /*@}*/ + + intptr_t match_data; +}; + + +/** + */ +struct pci_slot_match { + /** + * \name Device slot matching controls + * + * Control the search based on the domain, bus, slot, and function of + * the device. Setting any of these fields to \c PCI_MATCH_ANY will cause + * the field to not be used in the comparison. + */ + /*@{*/ + uint32_t domain; + uint32_t bus; + uint32_t dev; + uint32_t func; + /*@}*/ + + intptr_t match_data; +}; + +/** + * BAR descriptor for a PCI device. + */ +struct pci_mem_region { + /** + * When the region is mapped, this is the pointer to the memory. + * + * This field is \b only set when the deprecated \c pci_device_map_region + * interface is used. Use \c pci_device_map_range instead. + * + * \deprecated + */ + void *memory; + + + /** + * Base physical address of the region within its bus / domain. + * + * \warning + * This address is really only useful to other devices in the same + * domain. It's probably \b not the address applications will ever + * use. + * + * \warning + * Most (all?) platform back-ends leave this field unset. + */ + pciaddr_t bus_addr; + + + /** + * Base physical address of the region from the CPU's point of view. + * + * This address is typically passed to \c pci_device_map_range to create + * a mapping of the region to the CPU's virtual address space. + */ + pciaddr_t base_addr; + + + /** + * Size, in bytes, of the region. + */ + pciaddr_t size; + + + /** + * Is the region I/O ports or memory? + */ + unsigned is_IO:1; + + /** + * Is the memory region prefetchable? + * + * \note + * This can only be set if \c is_IO is not set. + */ + unsigned is_prefetchable:1; + + + /** + * Is the memory at a 64-bit address? + * + * \note + * This can only be set if \c is_IO is not set. + */ + unsigned is_64:1; +}; + + +/** + * PCI device. + * + * Contains all of the information about a particular PCI device. + */ +struct pci_device { + /** + * \name Device bus identification. + * + * Complete bus identification, including domain, of the device. On + * platforms that do not support PCI domains (e.g., 32-bit x86 hardware), + * the domain will always be zero. + */ + /*@{*/ + uint16_t domain; + uint8_t bus; + uint8_t dev; + uint8_t func; + /*@}*/ + + + /** + * \name Vendor / device ID + * + * The vendor ID, device ID, and sub-IDs for the device. + */ + /*@{*/ + uint16_t vendor_id; + uint16_t device_id; + uint16_t subvendor_id; + uint16_t subdevice_id; + /*@}*/ + + /** + * Device's class, subclass, and programming interface packed into a + * single 32-bit value. The class is at bits [23:16], subclass is at + * bits [15:8], and programming interface is at [7:0]. + */ + uint32_t device_class; + + + /** + * Device revision number, as read from the configuration header. + */ + uint8_t revision; + + + /** + * BAR descriptors for the device. + */ + struct pci_mem_region regions[6]; + + + /** + * Size, in bytes, of the device's expansion ROM. + */ + pciaddr_t rom_size; + + + /** + * IRQ associated with the device. If there is no IRQ, this value will + * be -1. + */ + int irq; + + + /** + * Storage for user data. Users of the library can store arbitrary + * data in this pointer. The library will not use it for any purpose. + * It is the user's responsability to free this memory before destroying + * the \c pci_device structure. + */ + intptr_t user_data; + + /** + * Used by the VGA arbiter. Type of resource decoded by the device and + * the file descriptor (/dev/vga_arbiter). */ + int vgaarb_rsrc; +}; + + +/** + * Description of the AGP capability of the device. + * + * \sa pci_device_get_agp_info + */ +struct pci_agp_info { + /** + * Offset of the AGP registers in the devices configuration register + * space. This is generally used so that the offset of the AGP command + * register can be determined. + */ + unsigned config_offset; + + + /** + * \name AGP major / minor version. + */ + /*@{*/ + uint8_t major_version; + uint8_t minor_version; + /*@}*/ + + /** + * Logical OR of the supported AGP rates. For example, a value of 0x07 + * means that the device can support 1x, 2x, and 4x. A value of 0x0c + * means that the device can support 8x and 4x. + */ + uint8_t rates; + + unsigned int fast_writes:1; /**< Are fast-writes supported? */ + unsigned int addr64:1; + unsigned int htrans:1; + unsigned int gart64:1; + unsigned int coherent:1; + unsigned int sideband:1; /**< Is side-band addressing supported? */ + unsigned int isochronus:1; + + uint8_t async_req_size; + uint8_t calibration_cycle_timing; + uint8_t max_requests; +}; + +/** + * Description of a PCI-to-PCI bridge device. + * + * \sa pci_device_get_bridge_info + */ +struct pci_bridge_info { + uint8_t primary_bus; + uint8_t secondary_bus; + uint8_t subordinate_bus; + uint8_t secondary_latency_timer; + + uint8_t io_type; + uint8_t mem_type; + uint8_t prefetch_mem_type; + + uint16_t secondary_status; + uint16_t bridge_control; + + uint32_t io_base; + uint32_t io_limit; + + uint32_t mem_base; + uint32_t mem_limit; + + uint64_t prefetch_mem_base; + uint64_t prefetch_mem_limit; +}; + +/** + * Description of a PCI-to-PCMCIA bridge device. + * + * \sa pci_device_get_pcmcia_bridge_info + */ +struct pci_pcmcia_bridge_info { + uint8_t primary_bus; + uint8_t card_bus; + uint8_t subordinate_bus; + uint8_t cardbus_latency_timer; + + uint16_t secondary_status; + uint16_t bridge_control; + + struct { + uint32_t base; + uint32_t limit; + } io[2]; + + struct { + uint32_t base; + uint32_t limit; + } mem[2]; + +}; + + +/** + * VGA Arbiter definitions, functions and related. + */ + +/* Legacy VGA regions */ +#define VGA_ARB_RSRC_NONE 0x00 +#define VGA_ARB_RSRC_LEGACY_IO 0x01 +#define VGA_ARB_RSRC_LEGACY_MEM 0x02 +/* Non-legacy access */ +#define VGA_ARB_RSRC_NORMAL_IO 0x04 +#define VGA_ARB_RSRC_NORMAL_MEM 0x08 + +int pci_device_vgaarb_init (void); +void pci_device_vgaarb_fini (void); +int pci_device_vgaarb_set_target (struct pci_device *dev); +/* use the targetted device */ +int pci_device_vgaarb_decodes (int new_vga_rsrc); +int pci_device_vgaarb_lock (void); +int pci_device_vgaarb_trylock (void); +int pci_device_vgaarb_unlock (void); +/* return the current device count + resource decodes for the device */ +int pci_device_vgaarb_get_info (struct pci_device *dev, int *vga_count, int *rsrc_decodes); + +/* + * I/O space access. + */ + +struct pci_io_handle; + +struct pci_io_handle *pci_device_open_io(struct pci_device *dev, pciaddr_t base, + pciaddr_t size); +struct pci_io_handle *pci_legacy_open_io(struct pci_device *dev, pciaddr_t base, + pciaddr_t size); +void pci_device_close_io(struct pci_device *dev, struct pci_io_handle *handle); +uint32_t pci_io_read32(struct pci_io_handle *handle, uint32_t reg); +uint16_t pci_io_read16(struct pci_io_handle *handle, uint32_t reg); +uint8_t pci_io_read8(struct pci_io_handle *handle, uint32_t reg); +void pci_io_write32(struct pci_io_handle *handle, uint32_t reg, uint32_t data); +void pci_io_write16(struct pci_io_handle *handle, uint32_t reg, uint16_t data); +void pci_io_write8(struct pci_io_handle *handle, uint32_t reg, uint8_t data); + +/* + * Legacy memory access + */ + +int pci_device_map_legacy(struct pci_device *dev, pciaddr_t base, + pciaddr_t size, unsigned map_flags, void **addr); +int pci_device_unmap_legacy(struct pci_device *dev, void *addr, pciaddr_t size); + +#endif /* PCIACCESS_H */ diff --git a/drivers/video/Intel-2D/render/exa_sf.g4b b/drivers/video/Intel-2D/render/exa_sf.g4b new file mode 100644 index 0000000000..f6ce9d2f28 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_sf.g4b @@ -0,0 +1,15 @@ + { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 }, + { 0x00400001, 0x206003be, 0x00690060, 0x00000000 }, + { 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 }, + { 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 }, + { 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 }, + { 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 }, + { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/drivers/video/Intel-2D/render/exa_sf_mask.g4b b/drivers/video/Intel-2D/render/exa_sf_mask.g4b new file mode 100644 index 0000000000..0b4f7ef0ce --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_sf_mask.g4b @@ -0,0 +1,15 @@ + { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 }, + { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 }, + { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 }, + { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 }, + { 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 }, + { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 }, + { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_ca.g4b b/drivers/video/Intel-2D/render/exa_wm_ca.g4b new file mode 100644 index 0000000000..73fa573f39 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_ca.g4b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 }, + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 }, + { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 }, + { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_ca.g6b b/drivers/video/Intel-2D/render/exa_wm_ca.g6b new file mode 100644 index 0000000000..8321491b32 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_ca.g6b @@ -0,0 +1,4 @@ + { 0x00800041, 0x21c077bd, 0x008d01c0, 0x008d02c0 }, + { 0x00800041, 0x220077bd, 0x008d0200, 0x008d0300 }, + { 0x00800041, 0x224077bd, 0x008d0240, 0x008d0340 }, + { 0x00800041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g4b b/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g4b new file mode 100644 index 0000000000..df2e37163d --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g4b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 }, + { 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 }, + { 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 }, + { 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g6b b/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g6b new file mode 100644 index 0000000000..946a95baf6 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g6b @@ -0,0 +1,4 @@ + { 0x00800041, 0x21c077bd, 0x008d02c0, 0x008d0280 }, + { 0x00800041, 0x220077bd, 0x008d0300, 0x008d0280 }, + { 0x00800041, 0x224077bd, 0x008d0340, 0x008d0280 }, + { 0x00800041, 0x228077bd, 0x008d0380, 0x008d0280 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_mask_affine.g4b b/drivers/video/Intel-2D/render/exa_wm_mask_affine.g4b new file mode 100644 index 0000000000..24f3083972 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_mask_affine.g4b @@ -0,0 +1,8 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x210077be, 0x008d03c0, 0x000000ac }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x214077be, 0x008d03c0, 0x000000bc }, diff --git a/drivers/video/Intel-2D/render/exa_wm_mask_affine.g6b b/drivers/video/Intel-2D/render/exa_wm_mask_affine.g6b new file mode 100644 index 0000000000..268c55ebcd --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_mask_affine.g6b @@ -0,0 +1,4 @@ + { 0x0060005a, 0x210077be, 0x00000100, 0x008d0040 }, + { 0x0060005a, 0x212077be, 0x00000100, 0x008d0080 }, + { 0x0060005a, 0x214077be, 0x00000110, 0x008d0040 }, + { 0x0060005a, 0x216077be, 0x00000110, 0x008d0080 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_mask_projective.g4b b/drivers/video/Intel-2D/render/exa_wm_mask_projective.g4b new file mode 100644 index 0000000000..f2b1ffb3ac --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_mask_projective.g4b @@ -0,0 +1,16 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc }, + { 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 }, + { 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac }, + { 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc }, + { 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_mask_projective.g6b b/drivers/video/Intel-2D/render/exa_wm_mask_projective.g6b new file mode 100644 index 0000000000..e6172e2bc4 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_mask_projective.g6b @@ -0,0 +1,12 @@ + { 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 }, + { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 }, + { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 }, + { 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 }, + { 0x00600041, 0x210077be, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x212077be, 0x008d03e0, 0x008d01a0 }, + { 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 }, + { 0x00600041, 0x214077be, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x216077be, 0x008d03e0, 0x008d01a0 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g4b b/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g4b new file mode 100644 index 0000000000..39703f29b8 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g4b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x23801c09, 0x00000000, 0x02520102 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g6b b/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g6b new file mode 100644 index 0000000000..2c7deb4cce --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g6b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x23801cc9, 0x000000e0, 0x0a2a0102 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g4b b/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g4b new file mode 100644 index 0000000000..c5d44138e9 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g4b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x22c01c09, 0x00000000, 0x02580102 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g6b b/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g6b new file mode 100644 index 0000000000..3b1c3f8a12 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g6b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22c01cc9, 0x000000e0, 0x0a8a0102 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_noca.g4b b/drivers/video/Intel-2D/render/exa_wm_noca.g4b new file mode 100644 index 0000000000..4a7691a264 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_noca.g4b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 }, + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 }, + { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 }, + { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_noca.g6b b/drivers/video/Intel-2D/render/exa_wm_noca.g6b new file mode 100644 index 0000000000..83b760ab99 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_noca.g6b @@ -0,0 +1,4 @@ + { 0x00800041, 0x21c077bd, 0x008d01c0, 0x008d0380 }, + { 0x00800041, 0x220077bd, 0x008d0200, 0x008d0380 }, + { 0x00800041, 0x224077bd, 0x008d0240, 0x008d0380 }, + { 0x00800041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_affine.g4b b/drivers/video/Intel-2D/render/exa_wm_src_affine.g4b new file mode 100644 index 0000000000..8cbf53bad4 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_affine.g4b @@ -0,0 +1,8 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_affine.g6b b/drivers/video/Intel-2D/render/exa_wm_src_affine.g6b new file mode 100644 index 0000000000..3f1da43929 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_affine.g6b @@ -0,0 +1,4 @@ + { 0x0060005a, 0x204077be, 0x000000c0, 0x008d0040 }, + { 0x0060005a, 0x206077be, 0x000000c0, 0x008d0080 }, + { 0x0060005a, 0x208077be, 0x000000d0, 0x008d0040 }, + { 0x0060005a, 0x20a077be, 0x000000d0, 0x008d0080 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_projective.g4b b/drivers/video/Intel-2D/render/exa_wm_src_projective.g4b new file mode 100644 index 0000000000..ee85387685 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_projective.g4b @@ -0,0 +1,16 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c }, + { 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 }, + { 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c }, + { 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c }, + { 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_projective.g6b b/drivers/video/Intel-2D/render/exa_wm_src_projective.g6b new file mode 100644 index 0000000000..2784a1f985 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_projective.g6b @@ -0,0 +1,12 @@ + { 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 }, + { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 }, + { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 }, + { 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 }, + { 0x00600041, 0x204077be, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x206077be, 0x008d03e0, 0x008d01a0 }, + { 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 }, + { 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 }, + { 0x00600041, 0x208077be, 0x008d03c0, 0x008d0180 }, + { 0x00600041, 0x20a077be, 0x008d03e0, 0x008d01a0 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g4b b/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g4b new file mode 100644 index 0000000000..f711c04a10 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g4b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x01800031, 0x22801c09, 0x00000000, 0x02520001 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g6b b/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g6b new file mode 100644 index 0000000000..9679c282cf --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g6b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22801cc9, 0x00000020, 0x0a2a0001 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g4b b/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g4b new file mode 100644 index 0000000000..9b93db8b4c --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g4b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x01800031, 0x21c01c09, 0x00000000, 0x02580001 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g6b b/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g6b new file mode 100644 index 0000000000..3f5a977f42 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g6b @@ -0,0 +1,3 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g4b b/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g4b new file mode 100644 index 0000000000..edc8471ad1 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g4b @@ -0,0 +1,5 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x01800031, 0x22001c09, 0x00000000, 0x02520001 }, + { 0x01800031, 0x21c01c09, 0x00000000, 0x02520003 }, + { 0x01800031, 0x22401c09, 0x00000000, 0x02520005 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g6b b/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g6b new file mode 100644 index 0000000000..0a41c3eb58 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g6b @@ -0,0 +1,5 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, + { 0x00600001, 0x20200022, 0x008d0000, 0x00000000 }, + { 0x02800031, 0x22001cc9, 0x00000020, 0x0a2a0001 }, + { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a2a0003 }, + { 0x02800031, 0x22401cc9, 0x00000020, 0x0a2a0005 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_write.g4b b/drivers/video/Intel-2D/render/exa_wm_write.g4b new file mode 100644 index 0000000000..582264fd2d --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_write.g4b @@ -0,0 +1,18 @@ + { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 }, + { 0x00601001, 0x20c003be, 0x008d01e0, 0x00000000 }, + { 0x00601001, 0x20e003be, 0x008d0220, 0x00000000 }, + { 0x00601001, 0x210003be, 0x008d0260, 0x00000000 }, + { 0x00601001, 0x212003be, 0x008d02a0, 0x00000000 }, + { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 }, + { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_write.g6b b/drivers/video/Intel-2D/render/exa_wm_write.g6b new file mode 100644 index 0000000000..0498746590 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_write.g6b @@ -0,0 +1,17 @@ + { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x20a003be, 0x008d0220, 0x00000000 }, + { 0x00600001, 0x20c003be, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 }, + { 0x00600001, 0x210003be, 0x008d0280, 0x00000000 }, + { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 }, + { 0x05800031, 0x24001cc8, 0x00000040, 0x90019000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_xy.g4b b/drivers/video/Intel-2D/render/exa_wm_xy.g4b new file mode 100644 index 0000000000..a676de4187 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_xy.g4b @@ -0,0 +1,4 @@ + { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 }, + { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 }, + { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 }, + { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g4b b/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g4b new file mode 100644 index 0000000000..91aa563abe --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g4b @@ -0,0 +1,12 @@ + { 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 }, + { 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 }, + { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 }, + { 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 }, + { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g6b b/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g6b new file mode 100644 index 0000000000..8813082468 --- /dev/null +++ b/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g6b @@ -0,0 +1,12 @@ + { 0x00800040, 0x23007fbd, 0x008d0200, 0xbd808081 }, + { 0x00800041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 }, + { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbf008084 }, + { 0x00800040, 0x23407fbd, 0x008d0240, 0xbf008084 }, + { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80800048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba }, + { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x00800048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 }, + { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, + { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/drivers/video/Intel-2D/sna.c b/drivers/video/Intel-2D/sna.c new file mode 100644 index 0000000000..db7d21f466 --- /dev/null +++ b/drivers/video/Intel-2D/sna.c @@ -0,0 +1,598 @@ +//#include "../bitmap.h" + +#include +#include + +#include "sna.h" + + +const struct intel_device_info * +intel_detect_chipset(struct pci_device *pci); + +//struct kgem_bo *create_bo(bitmap_t *bitmap); + +static bool sna_solid_cache_init(struct sna *sna); + +struct sna *sna_device; + +void no_render_init(struct sna *sna) +{ + struct sna_render *render = &sna->render; + + memset (render,0, sizeof (*render)); + + render->prefer_gpu = PREFER_GPU_BLT; + + render->vertices = render->vertex_data; + render->vertex_size = ARRAY_SIZE(render->vertex_data); + +// render->composite = no_render_composite; + +// render->copy_boxes = no_render_copy_boxes; +// render->copy = no_render_copy; + +// render->fill_boxes = no_render_fill_boxes; +// render->fill = no_render_fill; +// render->fill_one = no_render_fill_one; +// render->clear = no_render_clear; + +// render->reset = no_render_reset; +// render->flush = no_render_flush; +// render->fini = no_render_fini; + +// sna->kgem.context_switch = no_render_context_switch; +// sna->kgem.retire = no_render_retire; + +// if (sna->kgem.gen >= 60) + sna->kgem.ring = KGEM_RENDER; + + sna_vertex_init(sna); +} + +void sna_vertex_init(struct sna *sna) +{ +// pthread_mutex_init(&sna->render.lock, NULL); +// pthread_cond_init(&sna->render.wait, NULL); + sna->render.active = 0; +} + +bool sna_accel_init(struct sna *sna) +{ + const char *backend; + +// list_init(&sna->deferred_free); +// list_init(&sna->dirty_pixmaps); +// list_init(&sna->active_pixmaps); +// list_init(&sna->inactive_clock[0]); +// list_init(&sna->inactive_clock[1]); + +// sna_accel_install_timers(sna); + + + backend = "no"; + no_render_init(sna); + + if (sna->info->gen >= 0100) { +/* } else if (sna->info->gen >= 070) { + if (gen7_render_init(sna)) + backend = "IvyBridge"; */ + } else if (sna->info->gen >= 060) { + if (gen6_render_init(sna)) + backend = "SandyBridge"; +/* } else if (sna->info->gen >= 050) { + if (gen5_render_init(sna)) + backend = "Ironlake"; + } else if (sna->info->gen >= 040) { + if (gen4_render_init(sna)) + backend = "Broadwater/Crestline"; + } else if (sna->info->gen >= 030) { + if (gen3_render_init(sna)) + backend = "gen3"; + } else if (sna->info->gen >= 020) { + if (gen2_render_init(sna)) + backend = "gen2"; */ + } + + DBG(("%s(backend=%s, prefer_gpu=%x)\n", + __FUNCTION__, backend, sna->render.prefer_gpu)); + + kgem_reset(&sna->kgem); + +// if (!sna_solid_cache_init(sna)) +// return false; + + sna_device = sna; +#if 0 + { + struct kgem_bo *screen_bo; + bitmap_t screen; + + screen.pitch = 1024*4; + screen.gaddr = 0; + screen.width = 1024; + screen.height = 768; + screen.obj = (void*)-1; + + screen_bo = create_bo(&screen); + + sna->render.clear(sna, &screen, screen_bo); + } +#endif + + return true; +} + +int sna_init(uint32_t service) +{ + ioctl_t io; + + static struct pci_device device; + struct sna *sna; + + DBG(("%s\n", __FUNCTION__)); + + sna = malloc(sizeof(struct sna)); + if (sna == NULL) + return false; + + io.handle = service; + io.io_code = SRV_GET_INFO; + io.input = &device; + io.inp_size = sizeof(device); + io.output = NULL; + io.out_size = 0; + + if (call_service(&io)!=0) + return false; + + sna->PciInfo = &device; + + sna->info = intel_detect_chipset(sna->PciInfo); + + kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen); +/* + if (!xf86ReturnOptValBool(sna->Options, + OPTION_RELAXED_FENCING, + sna->kgem.has_relaxed_fencing)) { + xf86DrvMsg(scrn->scrnIndex, + sna->kgem.has_relaxed_fencing ? X_CONFIG : X_PROBED, + "Disabling use of relaxed fencing\n"); + sna->kgem.has_relaxed_fencing = 0; + } + if (!xf86ReturnOptValBool(sna->Options, + OPTION_VMAP, + sna->kgem.has_vmap)) { + xf86DrvMsg(scrn->scrnIndex, + sna->kgem.has_vmap ? X_CONFIG : X_PROBED, + "Disabling use of vmap\n"); + sna->kgem.has_vmap = 0; + } +*/ + + /* Disable tiling by default */ + sna->tiling = SNA_TILING_DISABLE; + + /* Default fail-safe value of 75 Hz */ +// sna->vblank_interval = 1000 * 1000 * 1000 / 75; + + sna->flags = 0; + + return sna_accel_init(sna); +} + +#if 0 + +static bool sna_solid_cache_init(struct sna *sna) +{ + struct sna_solid_cache *cache = &sna->render.solid_cache; + + DBG(("%s\n", __FUNCTION__)); + + cache->cache_bo = + kgem_create_linear(&sna->kgem, sizeof(cache->color)); + if (!cache->cache_bo) + return FALSE; + + /* + * Initialise [0] with white since it is very common and filling the + * zeroth slot simplifies some of the checks. + */ + cache->color[0] = 0xffffffff; + cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t)); + cache->bo[0]->pitch = 4; + cache->dirty = 1; + cache->size = 1; + cache->last = 0; + + return TRUE; +} + +void +sna_render_flush_solid(struct sna *sna) +{ + struct sna_solid_cache *cache = &sna->render.solid_cache; + + DBG(("sna_render_flush_solid(size=%d)\n", cache->size)); + assert(cache->dirty); + assert(cache->size); + + kgem_bo_write(&sna->kgem, cache->cache_bo, + cache->color, cache->size*sizeof(uint32_t)); + cache->dirty = 0; + cache->last = 0; +} + +static void +sna_render_finish_solid(struct sna *sna, bool force) +{ + struct sna_solid_cache *cache = &sna->render.solid_cache; + int i; + + DBG(("sna_render_finish_solid(force=%d, domain=%d, busy=%d, dirty=%d)\n", + force, cache->cache_bo->domain, cache->cache_bo->rq != NULL, cache->dirty)); + + if (!force && cache->cache_bo->domain != DOMAIN_GPU) + return; + + if (cache->dirty) + sna_render_flush_solid(sna); + + for (i = 0; i < cache->size; i++) { + if (cache->bo[i] == NULL) + continue; + + kgem_bo_destroy(&sna->kgem, cache->bo[i]); + cache->bo[i] = NULL; + } + kgem_bo_destroy(&sna->kgem, cache->cache_bo); + + DBG(("sna_render_finish_solid reset\n")); + + cache->cache_bo = kgem_create_linear(&sna->kgem, sizeof(cache->color)); + cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t)); + cache->bo[0]->pitch = 4; + if (force) + cache->size = 1; +} + + +struct kgem_bo * +sna_render_get_solid(struct sna *sna, uint32_t color) +{ + struct sna_solid_cache *cache = &sna->render.solid_cache; + int i; + + DBG(("%s: %08x\n", __FUNCTION__, color)); + +// if ((color & 0xffffff) == 0) /* alpha only */ +// return kgem_bo_reference(sna->render.alpha_cache.bo[color>>24]); + + if (color == 0xffffffff) { + DBG(("%s(white)\n", __FUNCTION__)); + return kgem_bo_reference(cache->bo[0]); + } + + if (cache->color[cache->last] == color) { + DBG(("sna_render_get_solid(%d) = %x (last)\n", + cache->last, color)); + return kgem_bo_reference(cache->bo[cache->last]); + } + + for (i = 1; i < cache->size; i++) { + if (cache->color[i] == color) { + if (cache->bo[i] == NULL) { + DBG(("sna_render_get_solid(%d) = %x (recreate)\n", + i, color)); + goto create; + } else { + DBG(("sna_render_get_solid(%d) = %x (old)\n", + i, color)); + goto done; + } + } + } + + sna_render_finish_solid(sna, i == ARRAY_SIZE(cache->color)); + + i = cache->size++; + cache->color[i] = color; + cache->dirty = 1; + DBG(("sna_render_get_solid(%d) = %x (new)\n", i, color)); + +create: + cache->bo[i] = kgem_create_proxy(cache->cache_bo, + i*sizeof(uint32_t), sizeof(uint32_t)); + cache->bo[i]->pitch = 4; + +done: + cache->last = i; + return kgem_bo_reference(cache->bo[i]); +} + +#endif + + +int sna_blit_copy(uint32_t dst_bitmap, int dst_x, int dst_y, + int w, int h, uint32_t src_bitmap, int src_x, int src_y) + +{ + struct sna_copy_op copy; + struct kgem_bo src_bo, dst_bo; + + memset(&src_bo, 0, sizeof(src_bo)); + memset(&dst_bo, 0, sizeof(dst_bo)); + +// src_bo.gaddr = src_bitmap->gaddr; +// src_bo.pitch = src_bitmap->pitch; +// src_bo.tiling = 0; + +// dst_bo.gaddr = dst_bitmap->gaddr; +// dst_bo.pitch = dst_bitmap->pitch; +// dst_bo.tiling = 0; + + memset(©, 0, sizeof(copy)); + + sna_device->render.copy(sna_device, GXcopy, NULL, &src_bo, NULL, &dst_bo, ©); + copy.blt(sna_device, ©, src_x, src_y, w, h, dst_x, dst_y); + copy.done(sna_device, ©); + + + +// _kgem_submit(&sna_device->kgem, &execbuffer); + +}; + + +/* + +int sna_blit_tex(bitmap_t *dst_bitmap, int dst_x, int dst_y, + int w, int h, bitmap_t *src_bitmap, int src_x, int src_y, + bitmap_t *mask_bitmap) + +{ + struct sna_composite_op cop; + batchbuffer_t execbuffer; + BoxRec box; + + struct kgem_bo src_bo, mask_bo, dst_bo; + + memset(&cop, 0, sizeof(cop)); + memset(&execbuffer, 0, sizeof(execbuffer)); + memset(&src_bo, 0, sizeof(src_bo)); + memset(&dst_bo, 0, sizeof(dst_bo)); + memset(&mask_bo, 0, sizeof(mask_bo)); + + src_bo.gaddr = src_bitmap->gaddr; + src_bo.pitch = src_bitmap->pitch; + src_bo.tiling = 0; + + dst_bo.gaddr = dst_bitmap->gaddr; + dst_bo.pitch = dst_bitmap->pitch; + dst_bo.tiling = 0; + + mask_bo.gaddr = mask_bitmap->gaddr; + mask_bo.pitch = mask_bitmap->pitch; + mask_bo.tiling = 0; + + box.x1 = dst_x; + box.y1 = dst_y; + box.x2 = dst_x+w; + box.y2 = dst_y+h; + + sna_device->render.composite(sna_device, 0, + src_bitmap, &src_bo, + mask_bitmap, &mask_bo, + dst_bitmap, &dst_bo, + src_x, src_y, + src_x, src_y, + dst_x, dst_y, + w, h, &cop); + + cop.box(sna_device, &cop, &box); + cop.done(sna_device, &cop); + + INIT_LIST_HEAD(&execbuffer.objects); + list_add_tail(&src_bitmap->obj->exec_list, &execbuffer.objects); + list_add_tail(&mask_bitmap->obj->exec_list, &execbuffer.objects); + + _kgem_submit(&sna_device->kgem, &execbuffer); + +}; + +*/ + +static const struct intel_device_info intel_generic_info = { + .gen = -1, +}; + +static const struct intel_device_info intel_i915_info = { + .gen = 030, +}; +static const struct intel_device_info intel_i945_info = { + .gen = 031, +}; + +static const struct intel_device_info intel_g33_info = { + .gen = 033, +}; + +static const struct intel_device_info intel_i965_info = { + .gen = 040, +}; + +static const struct intel_device_info intel_g4x_info = { + .gen = 045, +}; + +static const struct intel_device_info intel_ironlake_info = { + .gen = 050, +}; + +static const struct intel_device_info intel_sandybridge_info = { + .gen = 060, +}; + +static const struct intel_device_info intel_ivybridge_info = { + .gen = 070, +}; + +static const struct intel_device_info intel_valleyview_info = { + .gen = 071, +}; + +static const struct intel_device_info intel_haswell_info = { + .gen = 075, +}; + +#define INTEL_DEVICE_MATCH(d,i) \ + { 0x8086, (d), PCI_MATCH_ANY, PCI_MATCH_ANY, 0x3 << 16, 0xff << 16, (intptr_t)(i) } + + +static const struct pci_id_match intel_device_match[] = { + + + INTEL_DEVICE_MATCH (PCI_CHIP_I915_G, &intel_i915_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_E7221_G, &intel_i915_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_I915_GM, &intel_i915_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_I945_G, &intel_i945_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_I945_GM, &intel_i945_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_I945_GME, &intel_i945_info ), + + INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_M, &intel_g33_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_G, &intel_g33_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_G33_G, &intel_g33_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_Q33_G, &intel_g33_info ), + /* Another marketing win: Q35 is another g33 device not a gen4 part + * like its G35 brethren. + */ + INTEL_DEVICE_MATCH (PCI_CHIP_Q35_G, &intel_g33_info ), + + INTEL_DEVICE_MATCH (PCI_CHIP_I965_G, &intel_i965_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_G35_G, &intel_i965_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_I965_Q, &intel_i965_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_I946_GZ, &intel_i965_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_I965_GM, &intel_i965_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_I965_GME, &intel_i965_info ), + + INTEL_DEVICE_MATCH (PCI_CHIP_GM45_GM, &intel_g4x_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_G45_E_G, &intel_g4x_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_G45_G, &intel_g4x_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_Q45_G, &intel_g4x_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_G41_G, &intel_g4x_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_B43_G, &intel_g4x_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_B43_G1, &intel_g4x_info ), + + INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_D_G, &intel_ironlake_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_M_G, &intel_ironlake_info ), + + INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT1, &intel_sandybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2, &intel_sandybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2_PLUS, &intel_sandybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT1, &intel_sandybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2, &intel_sandybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS, &intel_sandybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_S_GT, &intel_sandybridge_info ), + + INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT1, &intel_ivybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT2, &intel_ivybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT1, &intel_ivybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT2, &intel_ivybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT1, &intel_ivybridge_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT2, &intel_ivybridge_info ), + + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2_PLUS, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT1, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2, &intel_haswell_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2_PLUS, &intel_haswell_info ), + + INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_PO, &intel_valleyview_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_1, &intel_valleyview_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_2, &intel_valleyview_info ), + INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_3, &intel_valleyview_info ), + + INTEL_DEVICE_MATCH (PCI_MATCH_ANY, &intel_generic_info ), + + { 0, 0, 0 }, +}; + +const struct pci_id_match *PciDevMatch(uint16_t dev,const struct pci_id_match *list) +{ + while(list->device_id) + { + if(dev==list->device_id) + return list; + list++; + } + return NULL; +} + +const struct intel_device_info * +intel_detect_chipset(struct pci_device *pci) +{ + const struct pci_id_match *ent = NULL; + const char *name = NULL; + int i; + + ent = PciDevMatch(pci->device_id, intel_device_match); + + if(ent != NULL) + return (const struct intel_device_info*)ent->match_data; + else + return &intel_generic_info; + +#if 0 + for (i = 0; intel_chipsets[i].name != NULL; i++) { + if (DEVICE_ID(pci) == intel_chipsets[i].token) { + name = intel_chipsets[i].name; + break; + } + } + if (name == NULL) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, "unknown chipset\n"); + name = "unknown"; + } else { + xf86DrvMsg(scrn->scrnIndex, from, + "Integrated Graphics Chipset: Intel(R) %s\n", + name); + } + + scrn->chipset = name; +#endif + +} + + + + diff --git a/drivers/video/Intel-2D/sna.h b/drivers/video/Intel-2D/sna.h new file mode 100644 index 0000000000..c5788cb28e --- /dev/null +++ b/drivers/video/Intel-2D/sna.h @@ -0,0 +1,300 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright © 2002 David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * David Dawes + * + */ + +#ifndef _SNA_H_ +#define _SNA_H_ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + + +#include "intel_driver.h" +#include "pciaccess.h" + +#include "compiler.h" + +//#define DBG(x) +//#define DBG(x) ErrorF x + +#define assert(x) + + +typedef struct +{ + unsigned handle; + unsigned io_code; + void *input; + int inp_size; + void *output; + int out_size; +}ioctl_t; + +#define SRV_GET_INFO 20 +#define SRV_GET_PARAM 21 + +static int call_service(ioctl_t *io) +{ + int retval; + + asm volatile("int $0x40" + :"=a"(retval) + :"a"(68),"b"(17),"c"(io) + :"memory","cc"); + + return retval; +}; + + +#define PIXMAN_FORMAT(bpp,type,a,r,g,b) (((bpp) << 24) | \ + ((type) << 16) | \ + ((a) << 12) | \ + ((r) << 8) | \ + ((g) << 4) | \ + ((b))) +#define PIXMAN_TYPE_OTHER 0 +#define PIXMAN_TYPE_A 1 +#define PIXMAN_TYPE_ARGB 2 +#define PIXMAN_TYPE_ABGR 3 +#define PIXMAN_TYPE_COLOR 4 +#define PIXMAN_TYPE_GRAY 5 +#define PIXMAN_TYPE_YUY2 6 +#define PIXMAN_TYPE_YV12 7 +#define PIXMAN_TYPE_BGRA 8 +#define PIXMAN_TYPE_RGBA 9 +#define PIXMAN_TYPE_ARGB_SRGB 10 + +/* 32bpp formats */ +typedef enum { + PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8), + PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8), + PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8), + PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8), + PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8), + PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8), + PIXMAN_r8g8b8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8), + PIXMAN_r8g8b8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8), + PIXMAN_x14r6g6b6 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6), + PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10), + PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10), + PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10), + PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10) + +} pixman_format_code_t; + + +typedef unsigned long Picture; +typedef unsigned long PictFormat; + +typedef struct _Pixmap *PixmapPtr; +typedef struct _Picture *PicturePtr; + +typedef struct _Drawable { + unsigned char type; /* DRAWABLE_ */ + unsigned char class; /* specific to type */ + unsigned char depth; + unsigned char bitsPerPixel; + unsigned int id; /* resource id */ + short x; /* window: screen absolute, pixmap: 0 */ + short y; /* window: screen absolute, pixmap: 0 */ + unsigned short width; + unsigned short height; +} DrawableRec; + +/* + * PIXMAP -- device dependent + */ + +typedef struct _Pixmap { + DrawableRec drawable; +// PrivateRec *devPrivates; + int refcnt; + int devKind; /* This is the pitch of the pixmap, typically width*bpp/8. */ +// DevUnion devPrivate; /* When !NULL, devPrivate.ptr points to the raw pixel data. */ +#ifdef COMPOSITE + short screen_x; + short screen_y; +#endif + unsigned usage_hint; /* see CREATE_PIXMAP_USAGE_* */ + + PixmapPtr master_pixmap; /* pointer to master copy of pixmap for pixmap sharing */ +} PixmapRec; + + + +struct pixman_box16 +{ + int16_t x1, y1, x2, y2; +}; + +typedef struct pixman_box16 BoxRec; +typedef unsigned int CARD32; +typedef unsigned short CARD16; + +#include "sna_render.h" +#include "kgem.h" + +#define GXclear 0x0 +#define GXcopy 0x3 + +#define PictOpClear 0 +#define PictOpSrc 1 +#define PictOpDst 2 +#define PictOpOver 3 +#define PictOpOverReverse 4 +#define PictOpIn 5 +#define PictOpInReverse 6 +#define PictOpOut 7 +#define PictOpOutReverse 8 +#define PictOpAtop 9 +#define PictOpAtopReverse 10 +#define PictOpXor 11 +#define PictOpAdd 12 +#define PictOpSaturate 13 +#define PictOpMaximum 13 + + + +struct sna { + unsigned flags; +#define SNA_NO_WAIT 0x1 +#define SNA_NO_FLIP 0x2 +#define SNA_TRIPLE_BUFFER 0x4 +#define SNA_TEAR_FREE 0x10 +#define SNA_FORCE_SHADOW 0x20 + + struct list flush_pixmaps; + struct list active_pixmaps; + + + +// int vblank_interval; + +// struct list deferred_free; +// struct list dirty_pixmaps; +// struct list active_pixmaps; +// struct list inactive_clock[2]; + + unsigned int tiling; +#define SNA_TILING_DISABLE 0x0 +#define SNA_TILING_FB 0x1 +#define SNA_TILING_2D 0x2 +#define SNA_TILING_ALL (~0) + + struct pci_device *PciInfo; + const struct intel_device_info *info; + +// PicturePtr clear; + struct { + uint32_t fill_bo; + uint32_t fill_pixel; + uint32_t fill_alu; + } blt_state; + union { +// struct gen2_render_state gen2; +// struct gen3_render_state gen3; +// struct gen4_render_state gen4; +// struct gen5_render_state gen5; + struct gen6_render_state gen6; + struct gen7_render_state gen7; + } render_state; + + + /* Broken-out options. */ +// OptionInfoPtr Options; + + /* Driver phase/state information */ +// Bool suspended; + + struct kgem kgem; + struct sna_render render; + +#if DEBUG_MEMORY + struct { + int shadow_pixels_allocs; + int cpu_bo_allocs; + size_t shadow_pixels_bytes; + size_t cpu_bo_bytes; + } debug_memory; +#endif +}; + +static inline int vertex_space(struct sna *sna) +{ + return sna->render.vertex_size - sna->render.vertex_used; +} + +static inline void vertex_emit(struct sna *sna, float v) +{ + assert(sna->render.vertex_used < sna->render.vertex_size); + sna->render.vertices[sna->render.vertex_used++] = v; +} + +static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y) +{ + int16_t *v = (int16_t *)&sna->render.vertices[sna->render.vertex_used++]; + assert(sna->render.vertex_used <= sna->render.vertex_size); + v[0] = x; + v[1] = y; +} + +static inline void batch_emit(struct sna *sna, uint32_t dword) +{ + assert(sna->kgem.mode != KGEM_NONE); + assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED < sna->kgem.surface); + sna->kgem.batch[sna->kgem.nbatch++] = dword; +} + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif + +#ifndef ALIGN +#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1)) +#endif + +#ifndef MIN +#define MIN(a,b) ((a) <= (b) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a,b) ((a) >= (b) ? (a) : (b)) +#endif +#endif /* _SNA_H */ diff --git a/drivers/video/Intel-2D/sna_reg.h b/drivers/video/Intel-2D/sna_reg.h new file mode 100644 index 0000000000..551d64b0ef --- /dev/null +++ b/drivers/video/Intel-2D/sna_reg.h @@ -0,0 +1,81 @@ +#ifndef SNA_REG_H +#define SNA_REG_H + +/* Flush */ +#define MI_FLUSH (0x04<<23) +#define MI_FLUSH_DW (0x26<<23) + +#define MI_WRITE_DIRTY_STATE (1<<4) +#define MI_END_SCENE (1<<3) +#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3) +#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2) +#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1) +#define MI_INVALIDATE_MAP_CACHE (1<<0) +/* broadwater flush bits */ +#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3) + +#define MI_BATCH_BUFFER_END (0xA << 23) + +/* Noop */ +#define MI_NOOP 0x00 +#define MI_NOOP_WRITE_ID (1<<22) +#define MI_NOOP_ID_MASK (1<<22 - 1) + +/* Wait for Events */ +#define MI_WAIT_FOR_EVENT (0x03<<23) +#define MI_WAIT_FOR_PIPEB_SVBLANK (1<<18) +#define MI_WAIT_FOR_PIPEA_SVBLANK (1<<17) +#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) +#define MI_WAIT_FOR_PIPEB_VBLANK (1<<7) +#define MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW (1<<5) +#define MI_WAIT_FOR_PIPEA_VBLANK (1<<3) +#define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW (1<<1) + +/* Set the scan line for MI_WAIT_FOR_PIPE?_SCAN_LINE_WINDOW */ +#define MI_LOAD_SCAN_LINES_INCL (0x12<<23) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20) + +/* BLT commands */ +#define BLT_WRITE_ALPHA (1<<21) +#define BLT_WRITE_RGB (1<<20) +#define BLT_SRC_TILED (1<<15) +#define BLT_DST_TILED (1<<11) + +#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) +#define XY_COLOR_BLT ((2<<29)|(0x50<<22)|(0x4)) +#define XY_SETUP_BLT ((2<<29)|(1<<22)|6) +#define XY_SETUP_MONO_PATTERN_SL_BLT ((2<<29)|(0x11<<22)|7) +#define XY_SETUP_CLIP ((2<<29)|(3<<22)|1) +#define XY_SCANLINE_BLT ((2<<29)|(0x25<<22)|1) +#define XY_TEXT_IMMEDIATE_BLT ((2<<29)|(0x31<<22)|(1<<16)) +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4) +#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22)) +#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7) +#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6)) +#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22)) +#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa) +#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa) + +/* FLUSH commands */ +#define BRW_3D(Pipeline,Opcode,Subopcode) \ + ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) +#define PIPE_CONTROL BRW_3D(3, 2, 0) +#define PIPE_CONTROL_NOWRITE (0 << 14) +#define PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define PIPE_CONTROL_WRITE_TIME (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WC_FLUSH (1 << 12) +#define PIPE_CONTROL_IS_FLUSH (1 << 11) +#define PIPE_CONTROL_TC_FLUSH (1 << 10) +#define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + +#endif diff --git a/drivers/video/Intel-2D/sna_render.h b/drivers/video/Intel-2D/sna_render.h new file mode 100644 index 0000000000..8c56594b8c --- /dev/null +++ b/drivers/video/Intel-2D/sna_render.h @@ -0,0 +1,690 @@ +#ifndef SNA_RENDER_H +#define SNA_RENDER_H + +#include "compiler.h" + +#include +#include + +#define GRADIENT_CACHE_SIZE 16 + +#define GXinvalid 0xff + +struct sna; +struct sna_glyph; +struct sna_video; +struct sna_video_frame; +struct brw_compile; + +struct sna_composite_rectangles { + struct sna_coordinate { + int16_t x, y; + } src, mask, dst; + int16_t width, height; +}; + +struct sna_composite_op { + fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + fastcall void (*box)(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box); + void (*boxes)(struct sna *sna, const struct sna_composite_op *op, + const BoxRec *box, int nbox); + void (*done)(struct sna *sna, const struct sna_composite_op *op); + + struct sna_damage **damage; + + uint32_t op; + + struct { + PixmapPtr pixmap; + CARD32 format; + struct kgem_bo *bo; + int16_t x, y; + uint16_t width, height; + } dst; + + struct sna_composite_channel { + struct kgem_bo *bo; +// PictTransform *transform; + uint16_t width; + uint16_t height; + uint32_t pict_format; + uint32_t card_format; + uint32_t filter; + uint32_t repeat; + uint32_t is_affine : 1; + uint32_t is_solid : 1; + uint32_t is_linear : 1; + uint32_t is_opaque : 1; + uint32_t alpha_fixup : 1; + uint32_t rb_reversed : 1; + int16_t offset[2]; + float scale[2]; + +// pixman_transform_t embedded_transform; + + union { + struct { + float dx, dy, offset; + } linear; + struct { + uint32_t pixel; + } gen2; + struct gen3_shader_channel { + int type; + uint32_t mode; + uint32_t constants; + } gen3; + } u; + } src, mask; + uint32_t is_affine : 1; + uint32_t has_component_alpha : 1; + uint32_t need_magic_ca_pass : 1; + uint32_t rb_reversed : 1; + + int16_t floats_per_vertex; + int16_t floats_per_rect; + fastcall void (*prim_emit)(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + + struct sna_composite_redirect { + struct kgem_bo *real_bo; + struct sna_damage **real_damage, *damage; + BoxRec box; + } redirect; + + union { + struct sna_blt_state { + PixmapPtr src_pixmap; + int16_t sx, sy; + + uint32_t inplace :1; + uint32_t overwrites:1; + uint32_t bpp : 6; + + uint32_t cmd; + uint32_t br13; + uint32_t pitch[2]; + uint32_t pixel; + struct kgem_bo *bo[2]; + } blt; + + struct { + float constants[8]; + uint32_t num_constants; + } gen3; + + struct { + int wm_kernel; + int ve_id; + } gen4; + + struct { + int16_t wm_kernel; + int16_t ve_id; + } gen5; + + struct { + uint32_t flags; + } gen6; + + struct { + uint32_t flags; + } gen7; + } u; + + void *priv; +}; + +struct sna_copy_op { + struct sna_composite_op base; + + void (*blt)(struct sna *sna, const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy); + void (*done)(struct sna *sna, const struct sna_copy_op *op); +}; + +struct sna_render { + int active; + + int max_3d_size; + int max_3d_pitch; + + unsigned prefer_gpu; +#define PREFER_GPU_BLT 0x1 +#define PREFER_GPU_RENDER 0x2 +#define PREFER_GPU_SPANS 0x4 + +#if 0 + + bool (*composite)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, PicturePtr mask, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + struct sna_composite_op *tmp); + + bool (*check_composite_spans)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, + int16_t w, int16_t h, unsigned flags); + bool (*composite_spans)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + unsigned flags, + struct sna_composite_spans_op *tmp); +#define COMPOSITE_SPANS_RECTILINEAR 0x1 +#define COMPOSITE_SPANS_INPLACE_HINT 0x2 + + bool (*video)(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + short src_w, short src_h, + short drw_w, short drw_h, + short dx, short dy, + PixmapPtr pixmap); + + bool (*fill_boxes)(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n); + bool (*fill)(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *tmp); + bool (*fill_one)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + int16_t x1, int16_t y1, int16_t x2, int16_t y2, + uint8_t alu); + bool (*clear)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo); + + bool (*copy_boxes)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, unsigned flags); +#define COPY_LAST 0x1 +#define COPY_SYNC 0x2 + +#endif + + bool (*copy)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op); + + void (*flush)(struct sna *sna); + void (*reset)(struct sna *sna); + void (*fini)(struct sna *sna); + +#if 0 + + struct sna_alpha_cache { + struct kgem_bo *cache_bo; + struct kgem_bo *bo[256+7]; + } alpha_cache; + + struct sna_solid_cache { + struct kgem_bo *cache_bo; + struct kgem_bo *bo[1024]; + uint32_t color[1025]; + int last; + int size; + int dirty; + } solid_cache; + + struct { + struct sna_gradient_cache { + struct kgem_bo *bo; + int nstops; + PictGradientStop *stops; + } cache[GRADIENT_CACHE_SIZE]; + int size; + } gradient_cache; + + struct sna_glyph_cache{ + PicturePtr picture; + struct sna_glyph **glyphs; + uint16_t count; + uint16_t evict; + } glyph[2]; + pixman_image_t *white_image; + PicturePtr white_picture; +#if HAS_PIXMAN_GLYPHS + pixman_glyph_cache_t *glyph_cache; +#endif + +#endif + + uint16_t vb_id; + uint16_t vertex_offset; + uint16_t vertex_start; + uint16_t vertex_index; + uint16_t vertex_used; + uint16_t vertex_size; + uint16_t vertex_reloc[16]; + int nvertex_reloc; + + struct kgem_bo *vbo; + float *vertices; + + float vertex_data[1024]; +}; + +struct gen2_render_state { + uint32_t target; + bool need_invariant; + uint32_t logic_op_enabled; + uint32_t ls1, ls2, vft; + uint32_t diffuse; + uint32_t specular; +}; + +struct gen3_render_state { + uint32_t current_dst; + bool need_invariant; + uint32_t tex_count; + uint32_t last_drawrect_limit; + uint32_t last_target; + uint32_t last_blend; + uint32_t last_constants; + uint32_t last_sampler; + uint32_t last_shader; + uint32_t last_diffuse; + uint32_t last_specular; + + uint16_t last_vertex_offset; + uint16_t floats_per_vertex; + uint16_t last_floats_per_vertex; + + uint32_t tex_map[4]; + uint32_t tex_handle[2]; + uint32_t tex_delta[2]; +}; + +struct gen4_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t last_pipelined_pointers; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool needs_urb; +}; + +struct gen5_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf[2]; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + uint16_t last_pipelined_pointers; + + bool needs_invariant; +}; + +enum { + GEN6_WM_KERNEL_NOMASK = 0, + GEN6_WM_KERNEL_NOMASK_P, + + GEN6_WM_KERNEL_MASK, + GEN6_WM_KERNEL_MASK_P, + + GEN6_WM_KERNEL_MASKCA, + GEN6_WM_KERNEL_MASKCA_P, + + GEN6_WM_KERNEL_MASKSA, + GEN6_WM_KERNEL_MASKSA_P, + + GEN6_WM_KERNEL_OPACITY, + GEN6_WM_KERNEL_OPACITY_P, + + GEN6_WM_KERNEL_VIDEO_PLANAR, + GEN6_WM_KERNEL_VIDEO_PACKED, + GEN6_KERNEL_COUNT +}; + +struct gen6_render_state { + const struct gt_info *info; + struct kgem_bo *general_bo; + + uint32_t vs_state; + uint32_t sf_state; + uint32_t sf_mask_state; + uint32_t wm_state; + uint32_t wm_kernel[GEN6_KERNEL_COUNT][3]; + + uint32_t cc_blend; + + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t blend; + uint32_t samplers; + uint32_t kernel; + + uint16_t num_sf_outputs; + uint16_t ve_id; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool first_state_packet; +}; + +enum { + GEN7_WM_KERNEL_NOMASK = 0, + GEN7_WM_KERNEL_NOMASK_P, + + GEN7_WM_KERNEL_MASK, + GEN7_WM_KERNEL_MASK_P, + + GEN7_WM_KERNEL_MASKCA, + GEN7_WM_KERNEL_MASKCA_P, + + GEN7_WM_KERNEL_MASKSA, + GEN7_WM_KERNEL_MASKSA_P, + + GEN7_WM_KERNEL_OPACITY, + GEN7_WM_KERNEL_OPACITY_P, + + GEN7_WM_KERNEL_VIDEO_PLANAR, + GEN7_WM_KERNEL_VIDEO_PACKED, + GEN7_WM_KERNEL_COUNT +}; + +struct gen7_render_state { + const struct gt_info *info; + struct kgem_bo *general_bo; + + uint32_t vs_state; + uint32_t sf_state; + uint32_t sf_mask_state; + uint32_t wm_state; + uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3]; + + uint32_t cc_blend; + + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t blend; + uint32_t samplers; + uint32_t kernel; + + uint16_t num_sf_outputs; + uint16_t ve_id; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool emit_flush; +}; + +struct sna_static_stream { + uint32_t size, used; + uint8_t *data; +}; + +int sna_static_stream_init(struct sna_static_stream *stream); +uint32_t sna_static_stream_add(struct sna_static_stream *stream, + const void *data, uint32_t len, uint32_t align); +void *sna_static_stream_map(struct sna_static_stream *stream, + uint32_t len, uint32_t align); +uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, + void *ptr); +unsigned sna_static_stream_compile_sf(struct sna *sna, + struct sna_static_stream *stream, + bool (*compile)(struct brw_compile *)); + +unsigned sna_static_stream_compile_wm(struct sna *sna, + struct sna_static_stream *stream, + bool (*compile)(struct brw_compile *, int), + int width); +struct kgem_bo *sna_static_stream_fini(struct sna *sna, + struct sna_static_stream *stream); + +/* +struct kgem_bo * +sna_render_get_solid(struct sna *sna, + uint32_t color); + +void +sna_render_flush_solid(struct sna *sna); + +struct kgem_bo * +sna_render_get_gradient(struct sna *sna, + PictGradient *pattern); + +uint32_t sna_rgba_for_color(uint32_t color, int depth); +uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format); +bool sna_get_rgba_from_pixel(uint32_t pixel, + uint16_t *red, + uint16_t *green, + uint16_t *blue, + uint16_t *alpha, + uint32_t format); +bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); + +*/ + +void no_render_init(struct sna *sna); + +bool gen2_render_init(struct sna *sna); +bool gen3_render_init(struct sna *sna); +bool gen4_render_init(struct sna *sna); +bool gen5_render_init(struct sna *sna); +bool gen6_render_init(struct sna *sna); +bool gen7_render_init(struct sna *sna); + +#if 0 + +bool sna_tiling_composite(uint32_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp); +bool sna_tiling_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n); + +bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n); + +bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, const BoxRec *box, int nbox); + +bool sna_blt_composite(struct sna *sna, + uint32_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp, + bool fallback); +bool sna_blt_composite__convert(struct sna *sna, + int x, int y, + int width, int height, + struct sna_composite_op *tmp); + +bool sna_blt_fill(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + struct sna_fill_op *fill); + +bool sna_blt_copy(struct sna *sna, uint8_t alu, + struct kgem_bo *src, + struct kgem_bo *dst, + int bpp, + struct sna_copy_op *copy); + +bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + const BoxRec *box, int n); + +bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, + const BoxRec *box, int n); +bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int nbox); + +bool _sna_get_pixel_from_rgba(uint32_t *pixel, + uint16_t red, + uint16_t green, + uint16_t blue, + uint16_t alpha, + uint32_t format); + +static inline bool +sna_get_pixel_from_rgba(uint32_t * pixel, + uint16_t red, + uint16_t green, + uint16_t blue, + uint16_t alpha, + uint32_t format) +{ + switch (format) { + case PICT_x8r8g8b8: + alpha = 0xffff; + /* fall through to re-use a8r8g8b8 expansion */ + case PICT_a8r8g8b8: + *pixel = ((alpha >> 8 << 24) | + (red >> 8 << 16) | + (green & 0xff00) | + (blue >> 8)); + return TRUE; + case PICT_a8: + *pixel = alpha >> 8; + return TRUE; + } + + return _sna_get_pixel_from_rgba(pixel, red, green, blue, alpha, format); +} + +struct kgem_bo * +__sna_render_pixmap_bo(struct sna *sna, + PixmapPtr pixmap, + const BoxRec *box, + bool blt); + +int +sna_render_pixmap_bo(struct sna *sna, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +bool +sna_render_pixmap_partial(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo *bo, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h); + +int +sna_render_picture_extract(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_approximate_gradient(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_fixup(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_convert(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y, + bool fixup_alpha); + +inline static void sna_render_composite_redirect_init(struct sna_composite_op *op) +{ + struct sna_composite_redirect *t = &op->redirect; + t->real_bo = NULL; + t->damage = NULL; +} + +bool +sna_render_composite_redirect(struct sna *sna, + struct sna_composite_op *op, + int x, int y, int width, int height); + +void +sna_render_composite_redirect_done(struct sna *sna, + const struct sna_composite_op *op); + +bool +sna_composite_mask_is_opaque(PicturePtr mask); + +#endif + +void sna_vertex_init(struct sna *sna); + + +#endif /* SNA_RENDER_H */ diff --git a/drivers/video/Intel-2D/sna_stream.c b/drivers/video/Intel-2D/sna_stream.c new file mode 100644 index 0000000000..ce49f673d2 --- /dev/null +++ b/drivers/video/Intel-2D/sna_stream.c @@ -0,0 +1,117 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#include "sna.h" +#include "sna_render.h" +#include "brw/brw.h" + +int sna_static_stream_init(struct sna_static_stream *stream) +{ + stream->used = 0; + stream->size = 64*1024; + + stream->data = malloc(stream->size); + return stream->data != NULL; +} + +static uint32_t sna_static_stream_alloc(struct sna_static_stream *stream, + uint32_t len, uint32_t align) +{ + uint32_t offset = ALIGN(stream->used, align); + uint32_t size = offset + len; + + if (size > stream->size) { + do + stream->size *= 2; + while (stream->size < size); + + stream->data = realloc(stream->data, stream->size); + } + + stream->used = size; + return offset; +} + +uint32_t sna_static_stream_add(struct sna_static_stream *stream, + const void *data, uint32_t len, uint32_t align) +{ + uint32_t offset = sna_static_stream_alloc(stream, len, align); + memcpy(stream->data + offset, data, len); + return offset; +} + +void *sna_static_stream_map(struct sna_static_stream *stream, + uint32_t len, uint32_t align) +{ + uint32_t offset = sna_static_stream_alloc(stream, len, align); + return memset(stream->data + offset, 0, len); +} + +uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, void *ptr) +{ + return (uint8_t *)ptr - stream->data; +} + +struct kgem_bo *sna_static_stream_fini(struct sna *sna, + struct sna_static_stream *stream) +{ + struct kgem_bo *bo; + + DBG(("uploaded %d bytes of static state\n", stream->used)); + + bo = kgem_create_linear(&sna->kgem, stream->used, 0); + if (bo && !kgem_bo_write(&sna->kgem, bo, stream->data, stream->used)) { +// kgem_bo_destroy(&sna->kgem, bo); + return NULL; + } + + free(stream->data); + + return bo; +} +unsigned +sna_static_stream_compile_wm(struct sna *sna, + struct sna_static_stream *stream, + bool (*compile)(struct brw_compile *, int), + int dispatch_width) +{ + struct brw_compile p; + + brw_compile_init(&p, sna->kgem.gen, + sna_static_stream_map(stream, + 256*sizeof(uint32_t), 64)); + + if (!compile(&p, dispatch_width)) { + stream->used -= 256*sizeof(uint32_t); + return 0; + } + + assert(p.nr_insn*sizeof(struct brw_instruction) <= 256*sizeof(uint32_t)); + + stream->used -= 256*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction); + return sna_static_stream_offsetof(stream, p.store); +}