diff --git a/drivers/video/drm/radeon/r600_reg_r7xx.h b/drivers/video/drm/radeon/r600_reg_r7xx.h new file mode 100644 index 0000000000..f46f86be37 --- /dev/null +++ b/drivers/video/drm/radeon/r600_reg_r7xx.h @@ -0,0 +1,149 @@ +/* + * RadeonHD R6xx, R7xx Register documentation + * + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_REG_R7xx_H_ +#define _R600_REG_R7xx_H_ + +/* + * Register update for R7xx chips + */ + +enum { + + R7XX_MC_VM_FB_LOCATION = 0x00002024, + +// GRBM_STATUS = 0x00008010, + R7XX_TA_BUSY_bit = 1 << 14, + + R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x00008d8c, + RING0_OFFSET_mask = 0xff << 0, + RING0_OFFSET_shift = 0, + ISOLATE_ES_ENABLE_bit = 1 << 12, + ISOLATE_GS_ENABLE_bit = 1 << 13, + VS_PC_LIMIT_ENABLE_bit = 1 << 14, + +// SQ_ALU_WORD0 = 0x00008dfc, +// SRC0_SEL_mask = 0x1ff << 0, +// SRC1_SEL_mask = 0x1ff << 13, + R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, + R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, + R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, + R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, +// INDEX_MODE_mask = 0x07 << 26, + R7xx_SQ_INDEX_GLOBAL = 0x05, + R7xx_SQ_INDEX_GLOBAL_AR_X = 0x06, + R6xx_SQ_ALU_WORD1_OP2 = 0x00008dfc, + R7xx_SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, + R6xx_FOG_MERGE_bit = 1 << 5, + R6xx_OMOD_mask = 0x03 << 6, + R7xx_OMOD_mask = 0x03 << 5, + R6xx_OMOD_shift = 6, + R7xx_OMOD_shift = 5, + R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, + R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, + R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift = 8, + R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7, + R7xx_SQ_OP2_INST_FREXP_64 = 0x07, + R7xx_SQ_OP2_INST_ADD_64 = 0x17, + R7xx_SQ_OP2_INST_MUL_64 = 0x1b, + R7xx_SQ_OP2_INST_FLT64_TO_FLT32 = 0x1c, + R7xx_SQ_OP2_INST_FLT32_TO_FLT64 = 0x1d, + R7xx_SQ_OP2_INST_LDEXP_64 = 0x7a, + R7xx_SQ_OP2_INST_FRACT_64 = 0x7b, + R7xx_SQ_OP2_INST_PRED_SETGT_64 = 0x7c, + R7xx_SQ_OP2_INST_PRED_SETE_64 = 0x7d, + R7xx_SQ_OP2_INST_PRED_SETGE_64 = 0x7e, +// SQ_ALU_WORD1_OP3 = 0x00008dfc, +// SRC2_SEL_mask = 0x1ff << 0, +// R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, +// R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, +// R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, +// R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, +// SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, + R7xx_SQ_OP3_INST_MULADD_64 = 0x08, + R7xx_SQ_OP3_INST_MULADD_64_M2 = 0x09, + R7xx_SQ_OP3_INST_MULADD_64_M4 = 0x0a, + R7xx_SQ_OP3_INST_MULADD_64_D2 = 0x0b, +// SQ_CF_ALU_WORD1 = 0x00008dfc, + R6xx_USES_WATERFALL_bit = 1 << 25, + R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, +// SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, +// ARRAY_BASE_mask = 0x1fff << 0, +// TYPE_mask = 0x03 << 13, +// SQ_EXPORT_PARAM = 0x02, +// X_UNUSED_FOR_SX_EXPORTS = 0x03, +// ELEM_SIZE_mask = 0x03 << 30, +// SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, +// SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, + R7xx_SQ_CF_INST_MEM_EXPORT = 0x3a, +// SQ_CF_WORD1 = 0x00008dfc, +// SQ_CF_WORD1__COUNT_mask = 0x07 << 10, + R7xx_COUNT_3_bit = 1 << 19, +// SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, + R7xx_SQ_CF_INST_END_PROGRAM = 0x19, + R7xx_SQ_CF_INST_WAIT_ACK = 0x1a, + R7xx_SQ_CF_INST_TEX_ACK = 0x1b, + R7xx_SQ_CF_INST_VTX_ACK = 0x1c, + R7xx_SQ_CF_INST_VTX_TC_ACK = 0x1d, +// SQ_VTX_WORD0 = 0x00008dfc, +// VTX_INST_mask = 0x1f << 0, + R7xx_SQ_VTX_INST_MEM = 0x02, +// SQ_VTX_WORD2 = 0x00008dfc, + R7xx_SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, + +// SQ_TEX_WORD0 = 0x00008dfc, +// TEX_INST_mask = 0x1f << 0, + R7xx_X_MEMORY_READ = 0x02, + R7xx_SQ_TEX_INST_KEEP_GRADIENTS = 0x0a, + R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1 = 0x0f, + R7xx_SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, + + R7xx_PA_SC_EDGERULE = 0x00028230, + R7xx_SPI_THREAD_GROUPING = 0x000286c8, + PS_GROUPING_mask = 0x1f << 0, + PS_GROUPING_shift = 0, + VS_GROUPING_mask = 0x1f << 8, + VS_GROUPING_shift = 8, + GS_GROUPING_mask = 0x1f << 16, + GS_GROUPING_shift = 16, + ES_GROUPING_mask = 0x1f << 24, + ES_GROUPING_shift = 24, + R7xx_CB_SHADER_CONTROL = 0x000287a0, + RT0_ENABLE_bit = 1 << 0, + RT1_ENABLE_bit = 1 << 1, + RT2_ENABLE_bit = 1 << 2, + RT3_ENABLE_bit = 1 << 3, + RT4_ENABLE_bit = 1 << 4, + RT5_ENABLE_bit = 1 << 5, + RT6_ENABLE_bit = 1 << 6, + RT7_ENABLE_bit = 1 << 7, +// DB_ALPHA_TO_MASK = 0x00028d44, + R7xx_OFFSET_ROUND_bit = 1 << 16, +// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c, + R7xx_TRUNCATE_COORD_bit = 1 << 9, + R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10, + +} ; + +#endif /* _R600_REG_R7xx_H_ */ diff --git a/drivers/video/drm/radeon/r600_shader.h b/drivers/video/drm/radeon/r600_shader.h new file mode 100644 index 0000000000..5494ad4427 --- /dev/null +++ b/drivers/video/drm/radeon/r600_shader.h @@ -0,0 +1,350 @@ +/* + * RadeonHD R6xx, R7xx DRI driver + * + * Copyright (C) 2008-2009 Alexander Deucher + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Shader macros + */ + +#ifndef __SHADER_H__ +#define __SHADER_H__ + +//#include "radeon.h" + +/* Restrictions of ALU instructions + * order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1. + * max of 3 different src GPRs per instr. + * max of 4 different cfile constant components per instr. + * max of 2 (different) constants (any type) for t. + * bank swizzle (see below). + * GPR write stalls read of same register. Auto-replaced by PV/PS, NOP needed if registers are relative to + * different indices (gpr,loop,nothing). + * may use constant registers or constant cache, but not both. + */ + +/* Bank_swizzle: (pp. 297ff) + * Only one of each x,y,z,w GPR component can be loaded per cycle (3 cycles per instr, called 0-2). + * per scalar instruction bank_swizzle can select which cycle each operand comes from. e.g.: + * SRC0 SRC1 SRC2 SWIZZLE cycle0 cycle1 cycle2 + * 1.x 2.x 012 1.x 2.x - + * 3.x 1.y 201 1.y - 3.x + * 2.x 1.y 102 (1.y) (2.x) - + * If data is read in a cycle, multiple scalar instructions can reference it. + * Special case: square() - i.e. same component in src0+src1 doesn't need read port -> ignores swizzle for src1. + * No restrictions for constants or PV/PS. + * t can load multiple components in a single cycle slot, but has to share cycles with xyzw. + * t with single constant may not load GPRs or PV/PS in cycle 0 (carefull with ALU_TRANS_210). + * t with two constants may only load GPRs or PV/PS in cycle 2. + */ + + +/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */ + + +// CF insts +// addr +#define ADDR(x) (x) +// pc +#define POP_COUNT(x) (x) +// const +#define CF_CONST(x) (x) +// cond +#define COND(x) (x) // SQ_COND_* +// count +#define I_COUNT(x) ((x) ? ((x) - 1) : 0) +//r7xx +#define COUNT_3(x) (x) +// call count +#define CALL_COUNT(x) (x) +// eop +#define END_OF_PROGRAM(x) (x) +// vpm +#define VALID_PIXEL_MODE(x) (x) +// cf inst +#define CF_INST(x) (x) // SQ_CF_INST_* + +// wqm +#define WHOLE_QUAD_MODE(x) (x) +// barrier +#define BARRIER(x) (x) +//kb0 +#define KCACHE_BANK0(x) (x) +//kb1 +#define KCACHE_BANK1(x) (x) +// km0/1 +#define KCACHE_MODE0(x) (x) +#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_* +// +#define KCACHE_ADDR0(x) (x) +#define KCACHE_ADDR1(x) (x) +// uw +#define USES_WATERFALL(x) (x) + +#define ARRAY_BASE(x) (x) +// export pixel +#define CF_PIXEL_MRT0 0 +#define CF_PIXEL_MRT1 1 +#define CF_PIXEL_MRT2 2 +#define CF_PIXEL_MRT3 3 +#define CF_PIXEL_MRT4 4 +#define CF_PIXEL_MRT5 5 +#define CF_PIXEL_MRT6 6 +#define CF_PIXEL_MRT7 7 +// *_FOG: r6xx only +#define CF_PIXEL_MRT0_FOG 16 +#define CF_PIXEL_MRT1_FOG 17 +#define CF_PIXEL_MRT2_FOG 18 +#define CF_PIXEL_MRT3_FOG 19 +#define CF_PIXEL_MRT4_FOG 20 +#define CF_PIXEL_MRT5_FOG 21 +#define CF_PIXEL_MRT6_FOG 22 +#define CF_PIXEL_MRT7_FOG 23 +#define CF_PIXEL_Z 61 +// export pos +#define CF_POS0 60 +#define CF_POS1 61 +#define CF_POS2 62 +#define CF_POS3 63 +// export param +// 0...31 +#define TYPE(x) (x) // SQ_EXPORT_* +#if 0 +// type export +#define SQ_EXPORT_PIXEL 0 +#define SQ_EXPORT_POS 1 +#define SQ_EXPORT_PARAM 2 +// reserved 3 +// type mem +#define SQ_EXPORT_WRITE 0 +#define SQ_EXPORT_WRITE_IND 1 +#define SQ_EXPORT_WRITE_ACK 2 +#define SQ_EXPORT_WRITE_IND_ACK 3 +#endif + +#define RW_GPR(x) (x) +#define RW_REL(x) (x) +#define ABSOLUTE 0 +#define RELATIVE 1 +#define INDEX_GPR(x) (x) +#define ELEM_SIZE(x) (x ? (x - 1) : 0) +#define COMP_MASK(x) (x) +#define R6xx_ELEM_LOOP(x) (x) +#define BURST_COUNT(x) (x ? (x - 1) : 0) + +// swiz +#define SRC_SEL_X(x) (x) // SQ_SEL_* each +#define SRC_SEL_Y(x) (x) +#define SRC_SEL_Z(x) (x) +#define SRC_SEL_W(x) (x) + +#define CF_DWORD0(addr) cpu_to_le32((addr)) +// R7xx has another entry (COUNT3), but that is only used for adding a bit to count. +// We allow one more bit for count in the argument of the macro on R7xx instead. +// R6xx: [0,7] R7xx: [1,16] +#define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \ + cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \ + ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31))) + +#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))) +#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \ + cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \ + ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))) + +#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \ + cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \ + ((es) << 30))) +// R7xx apparently doesn't have the ELEM_LOOP entry any more +// We still expose it, but ELEM_LOOP is explicitely R6xx now. +// TODO: is this just forgotten in the docs, or really not available any more? +#define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \ + cpu_to_le32((((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \ + ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31))) +#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \ + cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \ + ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \ + ((wqm) << 30) | ((b) << 31))) + +// ALU clause insts +#define SRC0_SEL(x) (x) +#define SRC1_SEL(x) (x) +#define SRC2_SEL(x) (x) +// src[0-2]_sel +// 0-127 GPR +// 128-159 kcache constants bank 0 +// 160-191 kcache constants bank 1 +// 248-255 special SQ_ALU_SRC_* (0, 1, etc.) +#define ALU_SRC_GPR_BASE 0 +#define ALU_SRC_KCACHE0_BASE 128 +#define ALU_SRC_KCACHE1_BASE 160 +#define ALU_SRC_CFILE_BASE 256 + +#define SRC0_REL(x) (x) +#define SRC1_REL(x) (x) +#define SRC2_REL(x) (x) +// elem +#define SRC0_ELEM(x) (x) +#define SRC1_ELEM(x) (x) +#define SRC2_ELEM(x) (x) +#define ELEM_X 0 +#define ELEM_Y 1 +#define ELEM_Z 2 +#define ELEM_W 3 +// neg +#define SRC0_NEG(x) (x) +#define SRC1_NEG(x) (x) +#define SRC2_NEG(x) (x) +// im +#define INDEX_MODE(x) (x) // SQ_INDEX_* +// ps +#define PRED_SEL(x) (x) // SQ_PRED_SEL_* +// last +#define LAST(x) (x) +// abs +#define SRC0_ABS(x) (x) +#define SRC1_ABS(x) (x) +// uem +#define UPDATE_EXECUTE_MASK(x) (x) +// up +#define UPDATE_PRED(x) (x) +// wm +#define WRITE_MASK(x) (x) +// fm +#define FOG_MERGE(x) (x) +// omod +#define OMOD(x) (x) // SQ_ALU_OMOD_* +// alu inst +#define ALU_INST(x) (x) // SQ_ALU_INST_* +//bs +#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_* +#define DST_GPR(x) (x) +#define DST_REL(x) (x) +#define DST_ELEM(x) (x) +#define CLAMP(x) (x) + +#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \ + cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \ + ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \ + ((im) << 26) | ((ps) << 29) | ((last) << 31))) +// R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more) +#define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ + cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ + ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \ + ((dr) << 28) | ((de) << 29) | ((clamp) << 31))) +#define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ + cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ + ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \ + ((dr) << 28) | ((de) << 29) | ((clamp) << 31))) +// This is a general chipset macro, but due to selection by chipid typically not usable in static arrays +// Fog is NOT USED on R7xx, even if specified. +#define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ + ((chipfamily) < CHIP_FAMILY_RV770 ? \ + R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \ + R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp)) +#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \ + cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \ + ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \ + ((de) << 29) | ((clamp) << 31))) + +// VTX clause insts +// vxt insts +#define VTX_INST(x) (x) // SQ_VTX_INST_* + +// fetch type +#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_* + +#define FETCH_WHOLE_QUAD(x) (x) +#define BUFFER_ID(x) (x) +#define SRC_GPR(x) (x) +#define SRC_REL(x) (x) +#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0) + +#define SEMANTIC_ID(x) (x) +#define DST_SEL_X(x) (x) +#define DST_SEL_Y(x) (x) +#define DST_SEL_Z(x) (x) +#define DST_SEL_W(x) (x) +#define USE_CONST_FIELDS(x) (x) +#define DATA_FORMAT(x) (x) +// num format +#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_* +// format comp +#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_* +// sma +#define SRF_MODE_ALL(x) (x) +#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0 +#define SRF_MODE_NO_ZERO 1 +#define OFFSET(x) (x) +// endian swap +#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_* +#define CONST_BUF_NO_STRIDE(x) (x) +// mf +#define MEGA_FETCH(x) (x) + +#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \ + cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))) +#define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ + cpu_to_le32((((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))) +#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ + cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))) +#define VTX_DWORD2(offset, es, cbns, mf) \ + cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19))) +#define VTX_DWORD_PAD cpu_to_le32(0x00000000) + +// TEX clause insts +// tex insts +#define TEX_INST(x) (x) // SQ_TEX_INST_* + +#define BC_FRAC_MODE(x) (x) +#define FETCH_WHOLE_QUAD(x) (x) +#define RESOURCE_ID(x) (x) +#define R7xx_ALT_CONST(x) (x) + +#define LOD_BIAS(x) (x) +//ct +#define COORD_TYPE_X(x) (x) +#define COORD_TYPE_Y(x) (x) +#define COORD_TYPE_Z(x) (x) +#define COORD_TYPE_W(x) (x) +#define TEX_UNNORMALIZED 0 +#define TEX_NORMALIZED 1 +#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */ +#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f) +#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f) +#define SAMPLER_ID(x) (x) + +// R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only +#define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \ + cpu_to_le32((((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24))) +#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \ + cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))) +#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \ + cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \ + ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))) +#define TEX_DWORD_PAD cpu_to_le32(0x00000000) + +#endif diff --git a/drivers/video/drm/radeon/r600_video.c b/drivers/video/drm/radeon/r600_video.c new file mode 100644 index 0000000000..5007f3e7d1 --- /dev/null +++ b/drivers/video/drm/radeon/r600_video.c @@ -0,0 +1,784 @@ +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon.h" + +#include "r600d.h" + +#define DI_PT_RECTLIST 0x11 +#define DI_INDEX_SIZE_16_BIT 0x0 +#define DI_SRC_SEL_AUTO_INDEX 0x2 + +#define FMT_8 0x1 +#define FMT_5_6_5 0x8 +#define FMT_8_8_8_8 0x1a +#define COLOR_8 0x1 +#define COLOR_5_6_5 0x8 +#define COLOR_8_8_8_8 0x1a + +//#define CP_PACKET2 0x80000000 +//#define PACKET2_PAD_SHIFT 0 +//#define PACKET2_PAD_MASK (0x3fffffff << 0) + +//#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +extern const u32 r7xx_default_state[]; +extern const u32 r6xx_default_state[]; +extern const u32 r6xx_default_size, r7xx_default_size; + +extern const u32 R600_video_ps[]; +extern const u32 R600_video_vs[]; + +extern const u32 r600_video_ps_size; +extern const u32 r600_video_vs_size; + +extern struct radeon_device *main_device; + +int r600_video_init(struct radeon_device *rdev) +{ + u32 obj_size; + int i, r, dwords; + void *ptr; + u32 packet2s[16]; + int num_packet2s = 0; + + /* pin copy shader into vram if already initialized */ + if (rdev->r600_video.shader_obj) + goto done; + + mutex_init(&rdev->r600_video.mutex); + rdev->r600_video.state_offset = 0; + + if (rdev->family >= CHIP_RV770) + rdev->r600_video.state_len = r7xx_default_size; + else + rdev->r600_video.state_len = r6xx_default_size; + + dwords = rdev->r600_video.state_len; + while (dwords & 0xf) { + packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0)); + dwords++; + } + + obj_size = dwords * 4; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_video.vs_offset = obj_size; + obj_size += r600_video_vs_size * 4; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_video.ps_offset = obj_size; + obj_size += r600_video_ps_size * 4; + obj_size = ALIGN(obj_size, 256); + + r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_video.shader_obj); + if (r) { + DRM_ERROR("r600 failed to allocate video shader\n"); + return r; + } + + DRM_DEBUG("r6xx video blit allocated bo %08x vs %08x ps %08x\n", + obj_size, + rdev->r600_video.vs_offset, rdev->r600_video.ps_offset); + + r = radeon_bo_reserve(rdev->r600_video.shader_obj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_kmap(rdev->r600_video.shader_obj, &ptr); + if (r) { + DRM_ERROR("failed to map blit object %d\n", r); + return r; + } + if (rdev->family >= CHIP_RV770) + memcpy(ptr + rdev->r600_video.state_offset, + r7xx_default_state, rdev->r600_video.state_len * 4); + else + memcpy(ptr + rdev->r600_video.state_offset, + r6xx_default_state, rdev->r600_video.state_len * 4); + if (num_packet2s) + memcpy(ptr + rdev->r600_video.state_offset + (rdev->r600_video.state_len * 4), + packet2s, num_packet2s * 4); + for (i = 0; i < r600_video_vs_size; i++) + *(u32 *)((unsigned long)ptr + rdev->r600_video.vs_offset + i * 4) = cpu_to_le32(R600_video_vs[i]); + for (i = 0; i < r600_video_ps_size; i++) + *(u32 *)((unsigned long)ptr + rdev->r600_video.ps_offset + i * 4) = cpu_to_le32(R600_video_ps[i]); + radeon_bo_kunmap(rdev->r600_video.shader_obj); + radeon_bo_unreserve(rdev->r600_video.shader_obj); + +done: + r = radeon_bo_reserve(rdev->r600_video.shader_obj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_pin(rdev->r600_video.shader_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_video.shader_gpu_addr); + radeon_bo_unreserve(rdev->r600_video.shader_obj); + if (r) { + dev_err(rdev->dev, "(%d) pin blit object failed\n", r); + return r; + } + + return 0; +} + +/* emits 21 on rv770+, 23 on r600 */ +static void +set_render_target(struct radeon_device *rdev, int format, + int w, int h, u64 gpu_addr) +{ + u32 cb_color_info; + int pitch, slice; + + h = ALIGN(h, 8); + if (h < 8) + h = 8; + + cb_color_info = ((format << 2) | (1 << 27) | (1 << 8)); + pitch = (w / 8) - 1; + slice = ((w * h) / 64) - 1; + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) { + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0)); + radeon_ring_write(rdev, 2 << 0); + } + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (pitch << 0) | (slice << 10)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, cb_color_info); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); +} + +/* emits 5dw */ +static void +cp_set_surface_sync(struct radeon_device *rdev, + u32 sync_type, u32 size, + u64 mc_addr) +{ + u32 cp_coher_size; + + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(rdev, sync_type); + radeon_ring_write(rdev, cp_coher_size); + radeon_ring_write(rdev, mc_addr >> 8); + radeon_ring_write(rdev, 10); /* poll interval */ +} + +/* emits 21dw + 1 surface sync = 26dw */ +static void +set_shaders(struct radeon_device *rdev) +{ + u64 gpu_addr; + u32 sq_pgm_resources; + + /* setup shader regs */ + sq_pgm_resources = (1 << 0); + + /* VS */ + gpu_addr = rdev->r600_video.shader_gpu_addr + rdev->r600_video.vs_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_pgm_resources); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + /* PS */ + gpu_addr = rdev->r600_video.shader_gpu_addr + rdev->r600_video.ps_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_pgm_resources | (1 << 28)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 2); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, 0); + + gpu_addr = rdev->r600_video.shader_gpu_addr + rdev->r600_video.vs_offset; + cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); +} + +/* emits 9 + 1 sync (5) = 14*/ +static void +set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) +{ + u32 sq_vtx_constant_word2; + + sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); +#ifdef __BIG_ENDIAN + sq_vtx_constant_word2 |= (2 << 30); +#endif + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(rdev, 0x460); + radeon_ring_write(rdev, gpu_addr & 0xffffffff); + radeon_ring_write(rdev, 48 - 1); + radeon_ring_write(rdev, sq_vtx_constant_word2); + radeon_ring_write(rdev, 1 << 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); + + if ((rdev->family == CHIP_RV610) || + (rdev->family == CHIP_RV620) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RS880) || + (rdev->family == CHIP_RV710)) + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, 48, gpu_addr); + else + cp_set_surface_sync(rdev, + PACKET3_VC_ACTION_ENA, 48, gpu_addr); +} + +/* emits 9 */ +static void +set_tex_resource(struct radeon_device *rdev, + int format, int w, int h, int pitch, + u64 gpu_addr) +{ + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + + if (h < 1) + h = 1; + + sq_tex_resource_word0 = (1 << 0) | (1 << 3); + sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | + ((w - 1) << 19)); + + sq_tex_resource_word1 = (format << 26); + sq_tex_resource_word1 |= ((h - 1) << 0); + + sq_tex_resource_word4 = ((1 << 14) | + (0 << 16) | + (1 << 19) | + (2 << 22) | + (3 << 25)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_tex_resource_word0); + radeon_ring_write(rdev, sq_tex_resource_word1); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, sq_tex_resource_word4); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30); +} + +/* emits 12 */ +static void +set_scissors(struct radeon_device *rdev, int x1, int y1, + int x2, int y2) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); +} + +/* emits 10 */ +static void +draw_auto(struct radeon_device *rdev) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, DI_PT_RECTLIST); + + radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0)); + radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT); + + radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0)); + radeon_ring_write(rdev, 1); + + radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); + radeon_ring_write(rdev, 3); + radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX); + +} + + +/* emits 14 */ +static void +set_default_state(struct radeon_device *rdev) +{ + u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; + int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; + int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; + u64 gpu_addr; + int dwords; + + switch (rdev->family) { + case CHIP_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV630: + case CHIP_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + if ((rdev->family == CHIP_RV610) || + (rdev->family == CHIP_RV620) || + (rdev->family == CHIP_RS780) || + (rdev->family == CHIP_RS880) || + (rdev->family == CHIP_RV710)) + sq_config = 0; + else + sq_config = VC_ENABLE; + + sq_config |= (DX9_CONSTS | + ALU_INST_PREFER_VECTOR | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | + NUM_VS_GPRS(num_vs_gprs) | + NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | + NUM_ES_GPRS(num_es_gprs)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | + NUM_VS_THREADS(num_vs_threads) | + NUM_GS_THREADS(num_gs_threads) | + NUM_ES_THREADS(num_es_threads)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + + /* emit an IB pointing at default state */ + dwords = ALIGN(rdev->r600_video.state_len, 0x10); + gpu_addr = rdev->r600_video.shader_gpu_addr + rdev->r600_video.state_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(rdev, (gpu_addr & 0xFFFFFFFC)); + radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF); + radeon_ring_write(rdev, dwords); + + /* SQ config */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6)); + radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(rdev, sq_config); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); + radeon_ring_write(rdev, sq_thread_resource_mgmt); + radeon_ring_write(rdev, sq_stack_resource_mgmt_1); + radeon_ring_write(rdev, sq_stack_resource_mgmt_2); +} + +static inline uint32_t i2f(uint32_t input) +{ + u32 result, i, exponent, fraction; + + if ((input & 0x3fff) == 0) + result = 0; /* 0 is a special case */ + else { + exponent = 140; /* exponent biased by 127; */ + fraction = (input & 0x3fff) << 10; /* cheat and only + handle numbers below 2^^15 */ + for (i = 0; i < 14; i++) { + if (fraction & 0x800000) + break; + else { + fraction = fraction << 1; /* keep + shifting left until top bit = 1 */ + exponent = exponent - 1; + } + } + result = exponent << 23 | (fraction & 0x7fffff); /* mask + off top bit; assumed 1 */ + } + return result; +} + +static int r600_vb_ib_get(struct radeon_device *rdev) +{ + int r; + r = radeon_ib_get(rdev, &rdev->r600_video.vb_ib); + if (r) { + DRM_ERROR("failed to get IB for vertex buffer\n"); + return r; + } + + rdev->r600_video.vb_total = 64*1024; + rdev->r600_video.vb_used = 0; + return 0; +} + +static void r600_vb_ib_put(struct radeon_device *rdev) +{ + radeon_fence_emit(rdev, rdev->r600_video.vb_ib->fence); + radeon_ib_free(rdev, &rdev->r600_video.vb_ib); +} + + +int r600_video_prepare_copy(struct radeon_device *rdev, int size_bytes) +{ + int r; + int ring_size, line_size; + int max_size; + /* loops of emits 64 + fence emit possible */ + int dwords_per_loop = 76, num_loops; + + r = r600_vb_ib_get(rdev); + if (r) + return r; + + /* set_render_target emits 2 extra dwords on rv6xx */ + if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) + dwords_per_loop += 2; + + /* 8 bpp vs 32 bpp for xfer unit */ + if (size_bytes & 3) + line_size = 8192; + else + line_size = 8192*4; + + max_size = 8192 * line_size; + + /* major loops cover the max size transfer */ + num_loops = ((size_bytes + max_size) / max_size); + /* minor loops cover the extra non aligned bits */ + num_loops += ((size_bytes % line_size) ? 1 : 0); + /* calculate number of loops correctly */ + ring_size = num_loops * dwords_per_loop; + /* set default + shaders */ + ring_size += 40; /* shaders + def state */ + ring_size += 10; /* fence emit for VB IB */ + ring_size += 5; /* done copy */ + ring_size += 10; /* fence emit for done copy */ + r = radeon_ring_lock(rdev, ring_size); + if (r) + return r; + + set_default_state(rdev); /* 14 */ + set_shaders(rdev); /* 26 */ + return 0; +} + +void r600_video_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) +{ + int r; + + if (rdev->r600_video.vb_ib) + r600_vb_ib_put(rdev); + + if (fence) + r = radeon_fence_emit(rdev, fence); + + radeon_ring_unlock_commit(rdev); +} + +void r600_kms_video_blit(struct radeon_device *rdev, + u64 src_gpu_addr, int dstx, int dsty, int w, int h, int pitch) +{ + u64 vb_gpu_addr; + u32 *vb; + +// DRM_DEBUG("emitting video copy\n"); + vb = (u32 *)(rdev->r600_video.vb_ib->ptr + rdev->r600_video.vb_used); + + if ((rdev->r600_video.vb_used + 48) > rdev->r600_video.vb_total) { + // WARN_ON(1); + } + + vb[0] = i2f(dstx); + vb[1] = i2f(dsty); + vb[2] = 0; + vb[3] = 0; + + vb[4] = i2f(dstx); + vb[5] = i2f(dsty+h); + vb[6] = 0; + vb[7] = i2f(h); + + vb[8] = i2f(dstx + w); + vb[9] = i2f(dsty + h); + vb[10] = i2f(w); + vb[11] = i2f(h); + + /* src 9 */ + set_tex_resource(rdev, FMT_8_8_8_8, + w, h, pitch/4, src_gpu_addr); + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, pitch * h, src_gpu_addr); + + /* dst 23 */ + set_render_target(rdev, COLOR_8_8_8_8, + 1024, 768, rdev->mc.vram_start); + + /* scissors 12 */ + set_scissors(rdev, 0, 0, 1024, 768); + + /* Vertex buffer setup 14 */ + vb_gpu_addr = rdev->r600_video.vb_ib->gpu_addr + rdev->r600_video.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + 1024*4*768, rdev->mc.vram_start); + + /* 78 ring dwords per loop */ + vb += 12; + rdev->r600_video.vb_used += 12 * 4; + +} + + + +int r600_video_blit(uint64_t src_offset, int x, int y, + int w, int h, int pitch) +{ + struct radeon_device *rdev = main_device; + static struct radeon_fence *fence; + unsigned long irq_flags; + + int r; + + if(fence == NULL) + { + r = radeon_fence_create(rdev, &fence); + if (r) { + printf("%s epic fail", __FUNCTION__); + return r; + } + }; + + fence->evnt = CreateEvent(NULL, 0); + + mutex_lock(&rdev->r600_video.mutex); + rdev->r600_video.vb_ib = NULL; + r = r600_video_prepare_copy(rdev, h*pitch); + if (r) { +// if (rdev->r600_blit.vb_ib) +// radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); + mutex_unlock(&rdev->r600_video.mutex); + return r; + } + + r600_kms_video_blit(rdev, src_offset,x,y,w,h,pitch); + r600_video_done_copy(rdev, fence); + mutex_unlock(&rdev->r600_video.mutex); + + r = radeon_fence_wait(fence, false); + + write_lock_irqsave(&rdev->fence_drv.lock, irq_flags); + list_del(&fence->list); + fence->emited = false; + fence->signaled = false; + write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); + + return r; +}; + + +int r600_create_video(int w, int h, u32_t *outp) +{ + int r; + struct radeon_device *rdev = main_device; + struct radeon_bo *sobj = NULL; + uint64_t saddr; + void *uaddr; + + size_t size; + size_t pitch; + + pitch = radeon_align_pitch(rdev, w, 32, false) * 4; + + size = pitch * h; + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_GTT, &sobj); + if (r) { + goto fail; + } + r = radeon_bo_reserve(sobj, false); + if (unlikely(r != 0)) + goto fail; + r = radeon_bo_pin(sobj, RADEON_GEM_DOMAIN_GTT, &saddr); +// radeon_bo_unreserve(sobj); + if (r) { + goto fail; + } + + r = radeon_bo_user_map(sobj, &uaddr); + if (r) { + goto fail; + } + + ((uint64_t*)outp)[0] = saddr; + outp[2] = uaddr; + outp[3] = pitch; + +// dbgprintf("Create video surface %x, mapped at %x pitch %d\n", +// (uint32_t)saddr, uaddr, pitch); + return 0; + +fail: + return -1; +}; + + + + + diff --git a/drivers/video/drm/radeon/r700_vs.c b/drivers/video/drm/radeon/r700_vs.c new file mode 100644 index 0000000000..3758888c34 --- /dev/null +++ b/drivers/video/drm/radeon/r700_vs.c @@ -0,0 +1,280 @@ + +//typedef unsigned int uint32_t; + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon.h" + +#include "r600_shader.h" +#include "r600_reg.h" +#include "r600_reg_auto_r6xx.h" +#include "r600_reg_r6xx.h" +#include "r600_reg_r7xx.h" + + + +/* +vertex format + +struct vertex +{ + float x, y; + float s, t; +}; + +vertex shader + +VFETCH: ADDR(4) CNT(1) VALID_PIX + FETCH R0 +EXP_DONE POS0, R0.XY01 +EXT_DONE PARAM0, R0.ZW01 +*/ + +uint32_t R600_video_vs[] = +{ + + /* 0 */ + CF_DWORD0(ADDR(4)), + CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + /* I_COUNT(1),*/ 0, + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_VTX), + WHOLE_QUAD_MODE(0), + BARRIER(1)), + /* 1 */ + CF_ALLOC_IMP_EXP_DWORD0( + ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)), + CF_ALLOC_IMP_EXP_DWORD1_SWIZ( + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)), + /* 2 */ + CF_ALLOC_IMP_EXP_DWORD0( + ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)), + CF_ALLOC_IMP_EXP_DWORD1_SWIZ( + SRC_SEL_X(SQ_SEL_Z), + SRC_SEL_Y(SQ_SEL_W), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)), + /* 3 */ + 0x00000000, + 0x00000000, + /* 4/5 */ + VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)), + VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)), + VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(SQ_ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1)), + VTX_DWORD_PAD +}; + + +/* +pixel shader + +00 TEX: ADDR(2) CNT(1) VALID_PIX + 0 SAMPLE R0, v0.xy01, t0, s0 + 1 SAMPLE R1, v0.xy01, t0, s1 + +01 EXP_DONE: PIX0, R0 + +02 ALU: ADDR() CNT(1) + 2 KILLNE ____, R1.x___, c0.x___ + +END_OF_PROGRAM + + + + */ +uint32_t R600_video_ps[]= +{ + /* CF INST 0 */ + CF_DWORD0(ADDR(2)), + CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + /* I_COUNT(1), */ 0, + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(1)), + +#if 0 + /* CF INST 1 */ + CF_ALU_DWORD0(ADDR( ), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(1), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); +#endif + + /* CF INST 1 */ + CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)), + CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)), + +#if 0 + /* KILLNE c0.x, r1.x */ + ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)), + R7xx_ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_KILLNE), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)), + +#endif + + /* TEX INST 0 */ + TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)), + TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), /* R */ + DST_SEL_Y(SQ_SEL_Y), /* G */ + DST_SEL_Z(SQ_SEL_Z), /* B */ + DST_SEL_W(SQ_SEL_W), /* A */ + LOD_BIAS(0), + COORD_TYPE_X(TEX_UNNORMALIZED), + COORD_TYPE_Y(TEX_UNNORMALIZED), + COORD_TYPE_Z(TEX_UNNORMALIZED), + COORD_TYPE_W(TEX_UNNORMALIZED)), + TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)), + TEX_DWORD_PAD + +#if 0 + TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)), + TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), /* R */ + DST_SEL_Y(SQ_SEL_MASK), /* G */ + DST_SEL_Z(SQ_SEL_MASK), /* B */ + DST_SEL_W(SQ_SEL_MASK), /* A */ + LOD_BIAS(0), + COORD_TYPE_X(TEX_UNNORMALIZED), + COORD_TYPE_Y(TEX_UNNORMALIZED), + COORD_TYPE_Z(TEX_UNNORMALIZED), + COORD_TYPE_W(TEX_UNNORMALIZED)), + TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)), + TEX_DWORD_PAD + +#endif +}; + +const u32 r600_video_ps_size = ARRAY_SIZE(R600_video_ps); +const u32 r600_video_vs_size = ARRAY_SIZE(R600_video_vs); + + diff --git a/drivers/video/drm/radeon/tracker/bitmap.c b/drivers/video/drm/radeon/tracker/bitmap.c new file mode 100644 index 0000000000..8dedfb64b3 --- /dev/null +++ b/drivers/video/drm/radeon/tracker/bitmap.c @@ -0,0 +1,79 @@ + +#include +#include +#include "radeon_drm.h" +#include "../radeon.h" +#include "../display.h" + +extern struct radeon_device *main_device; + +typedef struct +{ + kobj_t header; + + int width; + int height; + int stride; + uint64_t gaddr; + void *uaddr; + struct radeon_bo *robj; +}bitmap_t; + +int create_bitmap(bitmap_t **pbitmap, int width, int height) +{ + size_t size; + size_t pitch; + bitmap_t *bitmap; + uint64_t gaddr; + void *uaddr; + + struct radeon_device *rdev = main_device; + struct radeon_bo *sobj = NULL; + + int r; + + bitmap = CreateObject(GetPid(), sizeof(bitmap_t)); + if( bitmap == NULL) + { + *pbitmap = NULL; + return -1; + } + + pitch = radeon_align_pitch(rdev, width, 32, false) * 4; + + size = pitch * height; + + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_GTT, &sobj); + if (r) { + goto fail; + } + r = radeon_bo_reserve(sobj, false); + if (unlikely(r != 0)) + goto fail; + r = radeon_bo_pin(sobj, RADEON_GEM_DOMAIN_GTT, &gaddr); + if (r) { + goto fail; + } + + r = radeon_bo_user_map(sobj, &uaddr); + if (r) { + goto fail; + } + + bitmap->width = width; + bitmap->height = height; + bitmap->stride = pitch; + bitmap->gaddr = gaddr; + bitmap->uaddr = uaddr; + bitmap->robj = sobj; + + *pbitmap = bitmap; + return 0; + +fail: + + DestroyObject(bitmap); + return -1; + +};