forked from KolibriOS/kolibrios
atikms: missing files :(
git-svn-id: svn://kolibrios.org@2176 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
parent
37f3ab9eca
commit
e7b0ce1747
149
drivers/video/drm/radeon/r600_reg_r7xx.h
Normal file
149
drivers/video/drm/radeon/r600_reg_r7xx.h
Normal file
@ -0,0 +1,149 @@
|
||||
/*
|
||||
* RadeonHD R6xx, R7xx Register documentation
|
||||
*
|
||||
* Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2008-2009 Matthias Hopf
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _R600_REG_R7xx_H_
|
||||
#define _R600_REG_R7xx_H_
|
||||
|
||||
/*
|
||||
* Register update for R7xx chips
|
||||
*/
|
||||
|
||||
enum {
|
||||
|
||||
R7XX_MC_VM_FB_LOCATION = 0x00002024,
|
||||
|
||||
// GRBM_STATUS = 0x00008010,
|
||||
R7XX_TA_BUSY_bit = 1 << 14,
|
||||
|
||||
R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x00008d8c,
|
||||
RING0_OFFSET_mask = 0xff << 0,
|
||||
RING0_OFFSET_shift = 0,
|
||||
ISOLATE_ES_ENABLE_bit = 1 << 12,
|
||||
ISOLATE_GS_ENABLE_bit = 1 << 13,
|
||||
VS_PC_LIMIT_ENABLE_bit = 1 << 14,
|
||||
|
||||
// SQ_ALU_WORD0 = 0x00008dfc,
|
||||
// SRC0_SEL_mask = 0x1ff << 0,
|
||||
// SRC1_SEL_mask = 0x1ff << 13,
|
||||
R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4,
|
||||
R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5,
|
||||
R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6,
|
||||
R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7,
|
||||
// INDEX_MODE_mask = 0x07 << 26,
|
||||
R7xx_SQ_INDEX_GLOBAL = 0x05,
|
||||
R7xx_SQ_INDEX_GLOBAL_AR_X = 0x06,
|
||||
R6xx_SQ_ALU_WORD1_OP2 = 0x00008dfc,
|
||||
R7xx_SQ_ALU_WORD1_OP2_V2 = 0x00008dfc,
|
||||
R6xx_FOG_MERGE_bit = 1 << 5,
|
||||
R6xx_OMOD_mask = 0x03 << 6,
|
||||
R7xx_OMOD_mask = 0x03 << 5,
|
||||
R6xx_OMOD_shift = 6,
|
||||
R7xx_OMOD_shift = 5,
|
||||
R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8,
|
||||
R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7,
|
||||
R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift = 8,
|
||||
R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7,
|
||||
R7xx_SQ_OP2_INST_FREXP_64 = 0x07,
|
||||
R7xx_SQ_OP2_INST_ADD_64 = 0x17,
|
||||
R7xx_SQ_OP2_INST_MUL_64 = 0x1b,
|
||||
R7xx_SQ_OP2_INST_FLT64_TO_FLT32 = 0x1c,
|
||||
R7xx_SQ_OP2_INST_FLT32_TO_FLT64 = 0x1d,
|
||||
R7xx_SQ_OP2_INST_LDEXP_64 = 0x7a,
|
||||
R7xx_SQ_OP2_INST_FRACT_64 = 0x7b,
|
||||
R7xx_SQ_OP2_INST_PRED_SETGT_64 = 0x7c,
|
||||
R7xx_SQ_OP2_INST_PRED_SETE_64 = 0x7d,
|
||||
R7xx_SQ_OP2_INST_PRED_SETGE_64 = 0x7e,
|
||||
// SQ_ALU_WORD1_OP3 = 0x00008dfc,
|
||||
// SRC2_SEL_mask = 0x1ff << 0,
|
||||
// R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4,
|
||||
// R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5,
|
||||
// R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6,
|
||||
// R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7,
|
||||
// SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
|
||||
R7xx_SQ_OP3_INST_MULADD_64 = 0x08,
|
||||
R7xx_SQ_OP3_INST_MULADD_64_M2 = 0x09,
|
||||
R7xx_SQ_OP3_INST_MULADD_64_M4 = 0x0a,
|
||||
R7xx_SQ_OP3_INST_MULADD_64_D2 = 0x0b,
|
||||
// SQ_CF_ALU_WORD1 = 0x00008dfc,
|
||||
R6xx_USES_WATERFALL_bit = 1 << 25,
|
||||
R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
|
||||
// SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
|
||||
// ARRAY_BASE_mask = 0x1fff << 0,
|
||||
// TYPE_mask = 0x03 << 13,
|
||||
// SQ_EXPORT_PARAM = 0x02,
|
||||
// X_UNUSED_FOR_SX_EXPORTS = 0x03,
|
||||
// ELEM_SIZE_mask = 0x03 << 30,
|
||||
// SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
|
||||
// SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23,
|
||||
R7xx_SQ_CF_INST_MEM_EXPORT = 0x3a,
|
||||
// SQ_CF_WORD1 = 0x00008dfc,
|
||||
// SQ_CF_WORD1__COUNT_mask = 0x07 << 10,
|
||||
R7xx_COUNT_3_bit = 1 << 19,
|
||||
// SQ_CF_WORD1__CF_INST_mask = 0x7f << 23,
|
||||
R7xx_SQ_CF_INST_END_PROGRAM = 0x19,
|
||||
R7xx_SQ_CF_INST_WAIT_ACK = 0x1a,
|
||||
R7xx_SQ_CF_INST_TEX_ACK = 0x1b,
|
||||
R7xx_SQ_CF_INST_VTX_ACK = 0x1c,
|
||||
R7xx_SQ_CF_INST_VTX_TC_ACK = 0x1d,
|
||||
// SQ_VTX_WORD0 = 0x00008dfc,
|
||||
// VTX_INST_mask = 0x1f << 0,
|
||||
R7xx_SQ_VTX_INST_MEM = 0x02,
|
||||
// SQ_VTX_WORD2 = 0x00008dfc,
|
||||
R7xx_SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
|
||||
|
||||
// SQ_TEX_WORD0 = 0x00008dfc,
|
||||
// TEX_INST_mask = 0x1f << 0,
|
||||
R7xx_X_MEMORY_READ = 0x02,
|
||||
R7xx_SQ_TEX_INST_KEEP_GRADIENTS = 0x0a,
|
||||
R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1 = 0x0f,
|
||||
R7xx_SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
|
||||
|
||||
R7xx_PA_SC_EDGERULE = 0x00028230,
|
||||
R7xx_SPI_THREAD_GROUPING = 0x000286c8,
|
||||
PS_GROUPING_mask = 0x1f << 0,
|
||||
PS_GROUPING_shift = 0,
|
||||
VS_GROUPING_mask = 0x1f << 8,
|
||||
VS_GROUPING_shift = 8,
|
||||
GS_GROUPING_mask = 0x1f << 16,
|
||||
GS_GROUPING_shift = 16,
|
||||
ES_GROUPING_mask = 0x1f << 24,
|
||||
ES_GROUPING_shift = 24,
|
||||
R7xx_CB_SHADER_CONTROL = 0x000287a0,
|
||||
RT0_ENABLE_bit = 1 << 0,
|
||||
RT1_ENABLE_bit = 1 << 1,
|
||||
RT2_ENABLE_bit = 1 << 2,
|
||||
RT3_ENABLE_bit = 1 << 3,
|
||||
RT4_ENABLE_bit = 1 << 4,
|
||||
RT5_ENABLE_bit = 1 << 5,
|
||||
RT6_ENABLE_bit = 1 << 6,
|
||||
RT7_ENABLE_bit = 1 << 7,
|
||||
// DB_ALPHA_TO_MASK = 0x00028d44,
|
||||
R7xx_OFFSET_ROUND_bit = 1 << 16,
|
||||
// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c,
|
||||
R7xx_TRUNCATE_COORD_bit = 1 << 9,
|
||||
R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10,
|
||||
|
||||
} ;
|
||||
|
||||
#endif /* _R600_REG_R7xx_H_ */
|
350
drivers/video/drm/radeon/r600_shader.h
Normal file
350
drivers/video/drm/radeon/r600_shader.h
Normal file
@ -0,0 +1,350 @@
|
||||
/*
|
||||
* RadeonHD R6xx, R7xx DRI driver
|
||||
*
|
||||
* Copyright (C) 2008-2009 Alexander Deucher
|
||||
* Copyright (C) 2008-2009 Matthias Hopf
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Shader macros
|
||||
*/
|
||||
|
||||
#ifndef __SHADER_H__
|
||||
#define __SHADER_H__
|
||||
|
||||
//#include "radeon.h"
|
||||
|
||||
/* Restrictions of ALU instructions
|
||||
* order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1.
|
||||
* max of 3 different src GPRs per instr.
|
||||
* max of 4 different cfile constant components per instr.
|
||||
* max of 2 (different) constants (any type) for t.
|
||||
* bank swizzle (see below).
|
||||
* GPR write stalls read of same register. Auto-replaced by PV/PS, NOP needed if registers are relative to
|
||||
* different indices (gpr,loop,nothing).
|
||||
* may use constant registers or constant cache, but not both.
|
||||
*/
|
||||
|
||||
/* Bank_swizzle: (pp. 297ff)
|
||||
* Only one of each x,y,z,w GPR component can be loaded per cycle (3 cycles per instr, called 0-2).
|
||||
* per scalar instruction bank_swizzle can select which cycle each operand comes from. e.g.:
|
||||
* SRC0 SRC1 SRC2 SWIZZLE cycle0 cycle1 cycle2
|
||||
* 1.x 2.x 012 1.x 2.x -
|
||||
* 3.x 1.y 201 1.y - 3.x
|
||||
* 2.x 1.y 102 (1.y) (2.x) -
|
||||
* If data is read in a cycle, multiple scalar instructions can reference it.
|
||||
* Special case: square() - i.e. same component in src0+src1 doesn't need read port -> ignores swizzle for src1.
|
||||
* No restrictions for constants or PV/PS.
|
||||
* t can load multiple components in a single cycle slot, but has to share cycles with xyzw.
|
||||
* t with single constant may not load GPRs or PV/PS in cycle 0 (carefull with ALU_TRANS_210).
|
||||
* t with two constants may only load GPRs or PV/PS in cycle 2.
|
||||
*/
|
||||
|
||||
|
||||
/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
|
||||
|
||||
|
||||
// CF insts
|
||||
// addr
|
||||
#define ADDR(x) (x)
|
||||
// pc
|
||||
#define POP_COUNT(x) (x)
|
||||
// const
|
||||
#define CF_CONST(x) (x)
|
||||
// cond
|
||||
#define COND(x) (x) // SQ_COND_*
|
||||
// count
|
||||
#define I_COUNT(x) ((x) ? ((x) - 1) : 0)
|
||||
//r7xx
|
||||
#define COUNT_3(x) (x)
|
||||
// call count
|
||||
#define CALL_COUNT(x) (x)
|
||||
// eop
|
||||
#define END_OF_PROGRAM(x) (x)
|
||||
// vpm
|
||||
#define VALID_PIXEL_MODE(x) (x)
|
||||
// cf inst
|
||||
#define CF_INST(x) (x) // SQ_CF_INST_*
|
||||
|
||||
// wqm
|
||||
#define WHOLE_QUAD_MODE(x) (x)
|
||||
// barrier
|
||||
#define BARRIER(x) (x)
|
||||
//kb0
|
||||
#define KCACHE_BANK0(x) (x)
|
||||
//kb1
|
||||
#define KCACHE_BANK1(x) (x)
|
||||
// km0/1
|
||||
#define KCACHE_MODE0(x) (x)
|
||||
#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_*
|
||||
//
|
||||
#define KCACHE_ADDR0(x) (x)
|
||||
#define KCACHE_ADDR1(x) (x)
|
||||
// uw
|
||||
#define USES_WATERFALL(x) (x)
|
||||
|
||||
#define ARRAY_BASE(x) (x)
|
||||
// export pixel
|
||||
#define CF_PIXEL_MRT0 0
|
||||
#define CF_PIXEL_MRT1 1
|
||||
#define CF_PIXEL_MRT2 2
|
||||
#define CF_PIXEL_MRT3 3
|
||||
#define CF_PIXEL_MRT4 4
|
||||
#define CF_PIXEL_MRT5 5
|
||||
#define CF_PIXEL_MRT6 6
|
||||
#define CF_PIXEL_MRT7 7
|
||||
// *_FOG: r6xx only
|
||||
#define CF_PIXEL_MRT0_FOG 16
|
||||
#define CF_PIXEL_MRT1_FOG 17
|
||||
#define CF_PIXEL_MRT2_FOG 18
|
||||
#define CF_PIXEL_MRT3_FOG 19
|
||||
#define CF_PIXEL_MRT4_FOG 20
|
||||
#define CF_PIXEL_MRT5_FOG 21
|
||||
#define CF_PIXEL_MRT6_FOG 22
|
||||
#define CF_PIXEL_MRT7_FOG 23
|
||||
#define CF_PIXEL_Z 61
|
||||
// export pos
|
||||
#define CF_POS0 60
|
||||
#define CF_POS1 61
|
||||
#define CF_POS2 62
|
||||
#define CF_POS3 63
|
||||
// export param
|
||||
// 0...31
|
||||
#define TYPE(x) (x) // SQ_EXPORT_*
|
||||
#if 0
|
||||
// type export
|
||||
#define SQ_EXPORT_PIXEL 0
|
||||
#define SQ_EXPORT_POS 1
|
||||
#define SQ_EXPORT_PARAM 2
|
||||
// reserved 3
|
||||
// type mem
|
||||
#define SQ_EXPORT_WRITE 0
|
||||
#define SQ_EXPORT_WRITE_IND 1
|
||||
#define SQ_EXPORT_WRITE_ACK 2
|
||||
#define SQ_EXPORT_WRITE_IND_ACK 3
|
||||
#endif
|
||||
|
||||
#define RW_GPR(x) (x)
|
||||
#define RW_REL(x) (x)
|
||||
#define ABSOLUTE 0
|
||||
#define RELATIVE 1
|
||||
#define INDEX_GPR(x) (x)
|
||||
#define ELEM_SIZE(x) (x ? (x - 1) : 0)
|
||||
#define COMP_MASK(x) (x)
|
||||
#define R6xx_ELEM_LOOP(x) (x)
|
||||
#define BURST_COUNT(x) (x ? (x - 1) : 0)
|
||||
|
||||
// swiz
|
||||
#define SRC_SEL_X(x) (x) // SQ_SEL_* each
|
||||
#define SRC_SEL_Y(x) (x)
|
||||
#define SRC_SEL_Z(x) (x)
|
||||
#define SRC_SEL_W(x) (x)
|
||||
|
||||
#define CF_DWORD0(addr) cpu_to_le32((addr))
|
||||
// R7xx has another entry (COUNT3), but that is only used for adding a bit to count.
|
||||
// We allow one more bit for count in the argument of the macro on R7xx instead.
|
||||
// R6xx: [0,7] R7xx: [1,16]
|
||||
#define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \
|
||||
cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \
|
||||
((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)))
|
||||
|
||||
#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)))
|
||||
#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \
|
||||
cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
|
||||
((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)))
|
||||
|
||||
#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
|
||||
cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \
|
||||
((es) << 30)))
|
||||
// R7xx apparently doesn't have the ELEM_LOOP entry any more
|
||||
// We still expose it, but ELEM_LOOP is explicitely R6xx now.
|
||||
// TODO: is this just forgotten in the docs, or really not available any more?
|
||||
#define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \
|
||||
cpu_to_le32((((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \
|
||||
((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)))
|
||||
#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \
|
||||
cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \
|
||||
((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \
|
||||
((wqm) << 30) | ((b) << 31)))
|
||||
|
||||
// ALU clause insts
|
||||
#define SRC0_SEL(x) (x)
|
||||
#define SRC1_SEL(x) (x)
|
||||
#define SRC2_SEL(x) (x)
|
||||
// src[0-2]_sel
|
||||
// 0-127 GPR
|
||||
// 128-159 kcache constants bank 0
|
||||
// 160-191 kcache constants bank 1
|
||||
// 248-255 special SQ_ALU_SRC_* (0, 1, etc.)
|
||||
#define ALU_SRC_GPR_BASE 0
|
||||
#define ALU_SRC_KCACHE0_BASE 128
|
||||
#define ALU_SRC_KCACHE1_BASE 160
|
||||
#define ALU_SRC_CFILE_BASE 256
|
||||
|
||||
#define SRC0_REL(x) (x)
|
||||
#define SRC1_REL(x) (x)
|
||||
#define SRC2_REL(x) (x)
|
||||
// elem
|
||||
#define SRC0_ELEM(x) (x)
|
||||
#define SRC1_ELEM(x) (x)
|
||||
#define SRC2_ELEM(x) (x)
|
||||
#define ELEM_X 0
|
||||
#define ELEM_Y 1
|
||||
#define ELEM_Z 2
|
||||
#define ELEM_W 3
|
||||
// neg
|
||||
#define SRC0_NEG(x) (x)
|
||||
#define SRC1_NEG(x) (x)
|
||||
#define SRC2_NEG(x) (x)
|
||||
// im
|
||||
#define INDEX_MODE(x) (x) // SQ_INDEX_*
|
||||
// ps
|
||||
#define PRED_SEL(x) (x) // SQ_PRED_SEL_*
|
||||
// last
|
||||
#define LAST(x) (x)
|
||||
// abs
|
||||
#define SRC0_ABS(x) (x)
|
||||
#define SRC1_ABS(x) (x)
|
||||
// uem
|
||||
#define UPDATE_EXECUTE_MASK(x) (x)
|
||||
// up
|
||||
#define UPDATE_PRED(x) (x)
|
||||
// wm
|
||||
#define WRITE_MASK(x) (x)
|
||||
// fm
|
||||
#define FOG_MERGE(x) (x)
|
||||
// omod
|
||||
#define OMOD(x) (x) // SQ_ALU_OMOD_*
|
||||
// alu inst
|
||||
#define ALU_INST(x) (x) // SQ_ALU_INST_*
|
||||
//bs
|
||||
#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_*
|
||||
#define DST_GPR(x) (x)
|
||||
#define DST_REL(x) (x)
|
||||
#define DST_ELEM(x) (x)
|
||||
#define CLAMP(x) (x)
|
||||
|
||||
#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
|
||||
cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
|
||||
((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
|
||||
((im) << 26) | ((ps) << 29) | ((last) << 31)))
|
||||
// R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more)
|
||||
#define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
|
||||
cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
|
||||
((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \
|
||||
((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
|
||||
#define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
|
||||
cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
|
||||
((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
|
||||
((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
|
||||
// This is a general chipset macro, but due to selection by chipid typically not usable in static arrays
|
||||
// Fog is NOT USED on R7xx, even if specified.
|
||||
#define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
|
||||
((chipfamily) < CHIP_FAMILY_RV770 ? \
|
||||
R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \
|
||||
R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp))
|
||||
#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
|
||||
cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
|
||||
((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
|
||||
((de) << 29) | ((clamp) << 31)))
|
||||
|
||||
// VTX clause insts
|
||||
// vxt insts
|
||||
#define VTX_INST(x) (x) // SQ_VTX_INST_*
|
||||
|
||||
// fetch type
|
||||
#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_*
|
||||
|
||||
#define FETCH_WHOLE_QUAD(x) (x)
|
||||
#define BUFFER_ID(x) (x)
|
||||
#define SRC_GPR(x) (x)
|
||||
#define SRC_REL(x) (x)
|
||||
#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0)
|
||||
|
||||
#define SEMANTIC_ID(x) (x)
|
||||
#define DST_SEL_X(x) (x)
|
||||
#define DST_SEL_Y(x) (x)
|
||||
#define DST_SEL_Z(x) (x)
|
||||
#define DST_SEL_W(x) (x)
|
||||
#define USE_CONST_FIELDS(x) (x)
|
||||
#define DATA_FORMAT(x) (x)
|
||||
// num format
|
||||
#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_*
|
||||
// format comp
|
||||
#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_*
|
||||
// sma
|
||||
#define SRF_MODE_ALL(x) (x)
|
||||
#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0
|
||||
#define SRF_MODE_NO_ZERO 1
|
||||
#define OFFSET(x) (x)
|
||||
// endian swap
|
||||
#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_*
|
||||
#define CONST_BUF_NO_STRIDE(x) (x)
|
||||
// mf
|
||||
#define MEGA_FETCH(x) (x)
|
||||
|
||||
#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
|
||||
cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
|
||||
((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)))
|
||||
#define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
|
||||
cpu_to_le32((((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
|
||||
((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
|
||||
#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
|
||||
cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
|
||||
((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
|
||||
#define VTX_DWORD2(offset, es, cbns, mf) \
|
||||
cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19)))
|
||||
#define VTX_DWORD_PAD cpu_to_le32(0x00000000)
|
||||
|
||||
// TEX clause insts
|
||||
// tex insts
|
||||
#define TEX_INST(x) (x) // SQ_TEX_INST_*
|
||||
|
||||
#define BC_FRAC_MODE(x) (x)
|
||||
#define FETCH_WHOLE_QUAD(x) (x)
|
||||
#define RESOURCE_ID(x) (x)
|
||||
#define R7xx_ALT_CONST(x) (x)
|
||||
|
||||
#define LOD_BIAS(x) (x)
|
||||
//ct
|
||||
#define COORD_TYPE_X(x) (x)
|
||||
#define COORD_TYPE_Y(x) (x)
|
||||
#define COORD_TYPE_Z(x) (x)
|
||||
#define COORD_TYPE_W(x) (x)
|
||||
#define TEX_UNNORMALIZED 0
|
||||
#define TEX_NORMALIZED 1
|
||||
#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */
|
||||
#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f)
|
||||
#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f)
|
||||
#define SAMPLER_ID(x) (x)
|
||||
|
||||
// R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only
|
||||
#define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \
|
||||
cpu_to_le32((((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
|
||||
((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24)))
|
||||
#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
|
||||
cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
|
||||
((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)))
|
||||
#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
|
||||
cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
|
||||
((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)))
|
||||
#define TEX_DWORD_PAD cpu_to_le32(0x00000000)
|
||||
|
||||
#endif
|
784
drivers/video/drm/radeon/r600_video.c
Normal file
784
drivers/video/drm/radeon/r600_video.c
Normal file
@ -0,0 +1,784 @@
|
||||
#include "drmP.h"
|
||||
#include "drm.h"
|
||||
#include "radeon_drm.h"
|
||||
#include "radeon.h"
|
||||
|
||||
#include "r600d.h"
|
||||
|
||||
#define DI_PT_RECTLIST 0x11
|
||||
#define DI_INDEX_SIZE_16_BIT 0x0
|
||||
#define DI_SRC_SEL_AUTO_INDEX 0x2
|
||||
|
||||
#define FMT_8 0x1
|
||||
#define FMT_5_6_5 0x8
|
||||
#define FMT_8_8_8_8 0x1a
|
||||
#define COLOR_8 0x1
|
||||
#define COLOR_5_6_5 0x8
|
||||
#define COLOR_8_8_8_8 0x1a
|
||||
|
||||
//#define CP_PACKET2 0x80000000
|
||||
//#define PACKET2_PAD_SHIFT 0
|
||||
//#define PACKET2_PAD_MASK (0x3fffffff << 0)
|
||||
|
||||
//#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
|
||||
|
||||
extern const u32 r7xx_default_state[];
|
||||
extern const u32 r6xx_default_state[];
|
||||
extern const u32 r6xx_default_size, r7xx_default_size;
|
||||
|
||||
extern const u32 R600_video_ps[];
|
||||
extern const u32 R600_video_vs[];
|
||||
|
||||
extern const u32 r600_video_ps_size;
|
||||
extern const u32 r600_video_vs_size;
|
||||
|
||||
extern struct radeon_device *main_device;
|
||||
|
||||
int r600_video_init(struct radeon_device *rdev)
|
||||
{
|
||||
u32 obj_size;
|
||||
int i, r, dwords;
|
||||
void *ptr;
|
||||
u32 packet2s[16];
|
||||
int num_packet2s = 0;
|
||||
|
||||
/* pin copy shader into vram if already initialized */
|
||||
if (rdev->r600_video.shader_obj)
|
||||
goto done;
|
||||
|
||||
mutex_init(&rdev->r600_video.mutex);
|
||||
rdev->r600_video.state_offset = 0;
|
||||
|
||||
if (rdev->family >= CHIP_RV770)
|
||||
rdev->r600_video.state_len = r7xx_default_size;
|
||||
else
|
||||
rdev->r600_video.state_len = r6xx_default_size;
|
||||
|
||||
dwords = rdev->r600_video.state_len;
|
||||
while (dwords & 0xf) {
|
||||
packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
|
||||
dwords++;
|
||||
}
|
||||
|
||||
obj_size = dwords * 4;
|
||||
obj_size = ALIGN(obj_size, 256);
|
||||
|
||||
rdev->r600_video.vs_offset = obj_size;
|
||||
obj_size += r600_video_vs_size * 4;
|
||||
obj_size = ALIGN(obj_size, 256);
|
||||
|
||||
rdev->r600_video.ps_offset = obj_size;
|
||||
obj_size += r600_video_ps_size * 4;
|
||||
obj_size = ALIGN(obj_size, 256);
|
||||
|
||||
r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
|
||||
&rdev->r600_video.shader_obj);
|
||||
if (r) {
|
||||
DRM_ERROR("r600 failed to allocate video shader\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
DRM_DEBUG("r6xx video blit allocated bo %08x vs %08x ps %08x\n",
|
||||
obj_size,
|
||||
rdev->r600_video.vs_offset, rdev->r600_video.ps_offset);
|
||||
|
||||
r = radeon_bo_reserve(rdev->r600_video.shader_obj, false);
|
||||
if (unlikely(r != 0))
|
||||
return r;
|
||||
r = radeon_bo_kmap(rdev->r600_video.shader_obj, &ptr);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to map blit object %d\n", r);
|
||||
return r;
|
||||
}
|
||||
if (rdev->family >= CHIP_RV770)
|
||||
memcpy(ptr + rdev->r600_video.state_offset,
|
||||
r7xx_default_state, rdev->r600_video.state_len * 4);
|
||||
else
|
||||
memcpy(ptr + rdev->r600_video.state_offset,
|
||||
r6xx_default_state, rdev->r600_video.state_len * 4);
|
||||
if (num_packet2s)
|
||||
memcpy(ptr + rdev->r600_video.state_offset + (rdev->r600_video.state_len * 4),
|
||||
packet2s, num_packet2s * 4);
|
||||
for (i = 0; i < r600_video_vs_size; i++)
|
||||
*(u32 *)((unsigned long)ptr + rdev->r600_video.vs_offset + i * 4) = cpu_to_le32(R600_video_vs[i]);
|
||||
for (i = 0; i < r600_video_ps_size; i++)
|
||||
*(u32 *)((unsigned long)ptr + rdev->r600_video.ps_offset + i * 4) = cpu_to_le32(R600_video_ps[i]);
|
||||
radeon_bo_kunmap(rdev->r600_video.shader_obj);
|
||||
radeon_bo_unreserve(rdev->r600_video.shader_obj);
|
||||
|
||||
done:
|
||||
r = radeon_bo_reserve(rdev->r600_video.shader_obj, false);
|
||||
if (unlikely(r != 0))
|
||||
return r;
|
||||
r = radeon_bo_pin(rdev->r600_video.shader_obj, RADEON_GEM_DOMAIN_VRAM,
|
||||
&rdev->r600_video.shader_gpu_addr);
|
||||
radeon_bo_unreserve(rdev->r600_video.shader_obj);
|
||||
if (r) {
|
||||
dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* emits 21 on rv770+, 23 on r600 */
|
||||
static void
|
||||
set_render_target(struct radeon_device *rdev, int format,
|
||||
int w, int h, u64 gpu_addr)
|
||||
{
|
||||
u32 cb_color_info;
|
||||
int pitch, slice;
|
||||
|
||||
h = ALIGN(h, 8);
|
||||
if (h < 8)
|
||||
h = 8;
|
||||
|
||||
cb_color_info = ((format << 2) | (1 << 27) | (1 << 8));
|
||||
pitch = (w / 8) - 1;
|
||||
slice = ((w * h) / 64) - 1;
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, gpu_addr >> 8);
|
||||
|
||||
if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
|
||||
radeon_ring_write(rdev, 2 << 0);
|
||||
}
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, (pitch << 0) | (slice << 10));
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, 0);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, cb_color_info);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, 0);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, 0);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, 0);
|
||||
}
|
||||
|
||||
/* emits 5dw */
|
||||
static void
|
||||
cp_set_surface_sync(struct radeon_device *rdev,
|
||||
u32 sync_type, u32 size,
|
||||
u64 mc_addr)
|
||||
{
|
||||
u32 cp_coher_size;
|
||||
|
||||
if (size == 0xffffffff)
|
||||
cp_coher_size = 0xffffffff;
|
||||
else
|
||||
cp_coher_size = ((size + 255) >> 8);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
|
||||
radeon_ring_write(rdev, sync_type);
|
||||
radeon_ring_write(rdev, cp_coher_size);
|
||||
radeon_ring_write(rdev, mc_addr >> 8);
|
||||
radeon_ring_write(rdev, 10); /* poll interval */
|
||||
}
|
||||
|
||||
/* emits 21dw + 1 surface sync = 26dw */
|
||||
static void
|
||||
set_shaders(struct radeon_device *rdev)
|
||||
{
|
||||
u64 gpu_addr;
|
||||
u32 sq_pgm_resources;
|
||||
|
||||
/* setup shader regs */
|
||||
sq_pgm_resources = (1 << 0);
|
||||
|
||||
/* VS */
|
||||
gpu_addr = rdev->r600_video.shader_gpu_addr + rdev->r600_video.vs_offset;
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, gpu_addr >> 8);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, sq_pgm_resources);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, 0);
|
||||
|
||||
/* PS */
|
||||
gpu_addr = rdev->r600_video.shader_gpu_addr + rdev->r600_video.ps_offset;
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, gpu_addr >> 8);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, sq_pgm_resources | (1 << 28));
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, 2);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
|
||||
radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, 0);
|
||||
|
||||
gpu_addr = rdev->r600_video.shader_gpu_addr + rdev->r600_video.vs_offset;
|
||||
cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
|
||||
}
|
||||
|
||||
/* emits 9 + 1 sync (5) = 14*/
|
||||
static void
|
||||
set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
|
||||
{
|
||||
u32 sq_vtx_constant_word2;
|
||||
|
||||
sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
|
||||
#ifdef __BIG_ENDIAN
|
||||
sq_vtx_constant_word2 |= (2 << 30);
|
||||
#endif
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
|
||||
radeon_ring_write(rdev, 0x460);
|
||||
radeon_ring_write(rdev, gpu_addr & 0xffffffff);
|
||||
radeon_ring_write(rdev, 48 - 1);
|
||||
radeon_ring_write(rdev, sq_vtx_constant_word2);
|
||||
radeon_ring_write(rdev, 1 << 0);
|
||||
radeon_ring_write(rdev, 0);
|
||||
radeon_ring_write(rdev, 0);
|
||||
radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
|
||||
|
||||
if ((rdev->family == CHIP_RV610) ||
|
||||
(rdev->family == CHIP_RV620) ||
|
||||
(rdev->family == CHIP_RS780) ||
|
||||
(rdev->family == CHIP_RS880) ||
|
||||
(rdev->family == CHIP_RV710))
|
||||
cp_set_surface_sync(rdev,
|
||||
PACKET3_TC_ACTION_ENA, 48, gpu_addr);
|
||||
else
|
||||
cp_set_surface_sync(rdev,
|
||||
PACKET3_VC_ACTION_ENA, 48, gpu_addr);
|
||||
}
|
||||
|
||||
/* emits 9 */
|
||||
static void
|
||||
set_tex_resource(struct radeon_device *rdev,
|
||||
int format, int w, int h, int pitch,
|
||||
u64 gpu_addr)
|
||||
{
|
||||
uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
|
||||
|
||||
if (h < 1)
|
||||
h = 1;
|
||||
|
||||
sq_tex_resource_word0 = (1 << 0) | (1 << 3);
|
||||
sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
|
||||
((w - 1) << 19));
|
||||
|
||||
sq_tex_resource_word1 = (format << 26);
|
||||
sq_tex_resource_word1 |= ((h - 1) << 0);
|
||||
|
||||
sq_tex_resource_word4 = ((1 << 14) |
|
||||
(0 << 16) |
|
||||
(1 << 19) |
|
||||
(2 << 22) |
|
||||
(3 << 25));
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
|
||||
radeon_ring_write(rdev, 0);
|
||||
radeon_ring_write(rdev, sq_tex_resource_word0);
|
||||
radeon_ring_write(rdev, sq_tex_resource_word1);
|
||||
radeon_ring_write(rdev, gpu_addr >> 8);
|
||||
radeon_ring_write(rdev, gpu_addr >> 8);
|
||||
radeon_ring_write(rdev, sq_tex_resource_word4);
|
||||
radeon_ring_write(rdev, 0);
|
||||
radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30);
|
||||
}
|
||||
|
||||
/* emits 12 */
|
||||
static void
|
||||
set_scissors(struct radeon_device *rdev, int x1, int y1,
|
||||
int x2, int y2)
|
||||
{
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
|
||||
radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
|
||||
radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
|
||||
radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
|
||||
radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
|
||||
radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
|
||||
radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
|
||||
}
|
||||
|
||||
/* emits 10 */
|
||||
static void
|
||||
draw_auto(struct radeon_device *rdev)
|
||||
{
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, DI_PT_RECTLIST);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
|
||||
radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
|
||||
radeon_ring_write(rdev, 1);
|
||||
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
|
||||
radeon_ring_write(rdev, 3);
|
||||
radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* emits 14 */
|
||||
static void
|
||||
set_default_state(struct radeon_device *rdev)
|
||||
{
|
||||
u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
|
||||
u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
|
||||
int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
|
||||
int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
|
||||
int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
|
||||
u64 gpu_addr;
|
||||
int dwords;
|
||||
|
||||
switch (rdev->family) {
|
||||
case CHIP_R600:
|
||||
num_ps_gprs = 192;
|
||||
num_vs_gprs = 56;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 0;
|
||||
num_es_gprs = 0;
|
||||
num_ps_threads = 136;
|
||||
num_vs_threads = 48;
|
||||
num_gs_threads = 4;
|
||||
num_es_threads = 4;
|
||||
num_ps_stack_entries = 128;
|
||||
num_vs_stack_entries = 128;
|
||||
num_gs_stack_entries = 0;
|
||||
num_es_stack_entries = 0;
|
||||
break;
|
||||
case CHIP_RV630:
|
||||
case CHIP_RV635:
|
||||
num_ps_gprs = 84;
|
||||
num_vs_gprs = 36;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 0;
|
||||
num_es_gprs = 0;
|
||||
num_ps_threads = 144;
|
||||
num_vs_threads = 40;
|
||||
num_gs_threads = 4;
|
||||
num_es_threads = 4;
|
||||
num_ps_stack_entries = 40;
|
||||
num_vs_stack_entries = 40;
|
||||
num_gs_stack_entries = 32;
|
||||
num_es_stack_entries = 16;
|
||||
break;
|
||||
case CHIP_RV610:
|
||||
case CHIP_RV620:
|
||||
case CHIP_RS780:
|
||||
case CHIP_RS880:
|
||||
default:
|
||||
num_ps_gprs = 84;
|
||||
num_vs_gprs = 36;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 0;
|
||||
num_es_gprs = 0;
|
||||
num_ps_threads = 136;
|
||||
num_vs_threads = 48;
|
||||
num_gs_threads = 4;
|
||||
num_es_threads = 4;
|
||||
num_ps_stack_entries = 40;
|
||||
num_vs_stack_entries = 40;
|
||||
num_gs_stack_entries = 32;
|
||||
num_es_stack_entries = 16;
|
||||
break;
|
||||
case CHIP_RV670:
|
||||
num_ps_gprs = 144;
|
||||
num_vs_gprs = 40;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 0;
|
||||
num_es_gprs = 0;
|
||||
num_ps_threads = 136;
|
||||
num_vs_threads = 48;
|
||||
num_gs_threads = 4;
|
||||
num_es_threads = 4;
|
||||
num_ps_stack_entries = 40;
|
||||
num_vs_stack_entries = 40;
|
||||
num_gs_stack_entries = 32;
|
||||
num_es_stack_entries = 16;
|
||||
break;
|
||||
case CHIP_RV770:
|
||||
num_ps_gprs = 192;
|
||||
num_vs_gprs = 56;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 0;
|
||||
num_es_gprs = 0;
|
||||
num_ps_threads = 188;
|
||||
num_vs_threads = 60;
|
||||
num_gs_threads = 0;
|
||||
num_es_threads = 0;
|
||||
num_ps_stack_entries = 256;
|
||||
num_vs_stack_entries = 256;
|
||||
num_gs_stack_entries = 0;
|
||||
num_es_stack_entries = 0;
|
||||
break;
|
||||
case CHIP_RV730:
|
||||
case CHIP_RV740:
|
||||
num_ps_gprs = 84;
|
||||
num_vs_gprs = 36;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 0;
|
||||
num_es_gprs = 0;
|
||||
num_ps_threads = 188;
|
||||
num_vs_threads = 60;
|
||||
num_gs_threads = 0;
|
||||
num_es_threads = 0;
|
||||
num_ps_stack_entries = 128;
|
||||
num_vs_stack_entries = 128;
|
||||
num_gs_stack_entries = 0;
|
||||
num_es_stack_entries = 0;
|
||||
break;
|
||||
case CHIP_RV710:
|
||||
num_ps_gprs = 192;
|
||||
num_vs_gprs = 56;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 0;
|
||||
num_es_gprs = 0;
|
||||
num_ps_threads = 144;
|
||||
num_vs_threads = 48;
|
||||
num_gs_threads = 0;
|
||||
num_es_threads = 0;
|
||||
num_ps_stack_entries = 128;
|
||||
num_vs_stack_entries = 128;
|
||||
num_gs_stack_entries = 0;
|
||||
num_es_stack_entries = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((rdev->family == CHIP_RV610) ||
|
||||
(rdev->family == CHIP_RV620) ||
|
||||
(rdev->family == CHIP_RS780) ||
|
||||
(rdev->family == CHIP_RS880) ||
|
||||
(rdev->family == CHIP_RV710))
|
||||
sq_config = 0;
|
||||
else
|
||||
sq_config = VC_ENABLE;
|
||||
|
||||
sq_config |= (DX9_CONSTS |
|
||||
ALU_INST_PREFER_VECTOR |
|
||||
PS_PRIO(0) |
|
||||
VS_PRIO(1) |
|
||||
GS_PRIO(2) |
|
||||
ES_PRIO(3));
|
||||
|
||||
sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
|
||||
NUM_VS_GPRS(num_vs_gprs) |
|
||||
NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
|
||||
sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
|
||||
NUM_ES_GPRS(num_es_gprs));
|
||||
sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
|
||||
NUM_VS_THREADS(num_vs_threads) |
|
||||
NUM_GS_THREADS(num_gs_threads) |
|
||||
NUM_ES_THREADS(num_es_threads));
|
||||
sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
|
||||
NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
|
||||
sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
|
||||
NUM_ES_STACK_ENTRIES(num_es_stack_entries));
|
||||
|
||||
/* emit an IB pointing at default state */
|
||||
dwords = ALIGN(rdev->r600_video.state_len, 0x10);
|
||||
gpu_addr = rdev->r600_video.shader_gpu_addr + rdev->r600_video.state_offset;
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
|
||||
radeon_ring_write(rdev, (gpu_addr & 0xFFFFFFFC));
|
||||
radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
|
||||
radeon_ring_write(rdev, dwords);
|
||||
|
||||
/* SQ config */
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
|
||||
radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
|
||||
radeon_ring_write(rdev, sq_config);
|
||||
radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
|
||||
radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
|
||||
radeon_ring_write(rdev, sq_thread_resource_mgmt);
|
||||
radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
|
||||
radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
|
||||
}
|
||||
|
||||
static inline uint32_t i2f(uint32_t input)
|
||||
{
|
||||
u32 result, i, exponent, fraction;
|
||||
|
||||
if ((input & 0x3fff) == 0)
|
||||
result = 0; /* 0 is a special case */
|
||||
else {
|
||||
exponent = 140; /* exponent biased by 127; */
|
||||
fraction = (input & 0x3fff) << 10; /* cheat and only
|
||||
handle numbers below 2^^15 */
|
||||
for (i = 0; i < 14; i++) {
|
||||
if (fraction & 0x800000)
|
||||
break;
|
||||
else {
|
||||
fraction = fraction << 1; /* keep
|
||||
shifting left until top bit = 1 */
|
||||
exponent = exponent - 1;
|
||||
}
|
||||
}
|
||||
result = exponent << 23 | (fraction & 0x7fffff); /* mask
|
||||
off top bit; assumed 1 */
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static int r600_vb_ib_get(struct radeon_device *rdev)
|
||||
{
|
||||
int r;
|
||||
r = radeon_ib_get(rdev, &rdev->r600_video.vb_ib);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to get IB for vertex buffer\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
rdev->r600_video.vb_total = 64*1024;
|
||||
rdev->r600_video.vb_used = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void r600_vb_ib_put(struct radeon_device *rdev)
|
||||
{
|
||||
radeon_fence_emit(rdev, rdev->r600_video.vb_ib->fence);
|
||||
radeon_ib_free(rdev, &rdev->r600_video.vb_ib);
|
||||
}
|
||||
|
||||
|
||||
int r600_video_prepare_copy(struct radeon_device *rdev, int size_bytes)
|
||||
{
|
||||
int r;
|
||||
int ring_size, line_size;
|
||||
int max_size;
|
||||
/* loops of emits 64 + fence emit possible */
|
||||
int dwords_per_loop = 76, num_loops;
|
||||
|
||||
r = r600_vb_ib_get(rdev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* set_render_target emits 2 extra dwords on rv6xx */
|
||||
if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
|
||||
dwords_per_loop += 2;
|
||||
|
||||
/* 8 bpp vs 32 bpp for xfer unit */
|
||||
if (size_bytes & 3)
|
||||
line_size = 8192;
|
||||
else
|
||||
line_size = 8192*4;
|
||||
|
||||
max_size = 8192 * line_size;
|
||||
|
||||
/* major loops cover the max size transfer */
|
||||
num_loops = ((size_bytes + max_size) / max_size);
|
||||
/* minor loops cover the extra non aligned bits */
|
||||
num_loops += ((size_bytes % line_size) ? 1 : 0);
|
||||
/* calculate number of loops correctly */
|
||||
ring_size = num_loops * dwords_per_loop;
|
||||
/* set default + shaders */
|
||||
ring_size += 40; /* shaders + def state */
|
||||
ring_size += 10; /* fence emit for VB IB */
|
||||
ring_size += 5; /* done copy */
|
||||
ring_size += 10; /* fence emit for done copy */
|
||||
r = radeon_ring_lock(rdev, ring_size);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
set_default_state(rdev); /* 14 */
|
||||
set_shaders(rdev); /* 26 */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void r600_video_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (rdev->r600_video.vb_ib)
|
||||
r600_vb_ib_put(rdev);
|
||||
|
||||
if (fence)
|
||||
r = radeon_fence_emit(rdev, fence);
|
||||
|
||||
radeon_ring_unlock_commit(rdev);
|
||||
}
|
||||
|
||||
void r600_kms_video_blit(struct radeon_device *rdev,
|
||||
u64 src_gpu_addr, int dstx, int dsty, int w, int h, int pitch)
|
||||
{
|
||||
u64 vb_gpu_addr;
|
||||
u32 *vb;
|
||||
|
||||
// DRM_DEBUG("emitting video copy\n");
|
||||
vb = (u32 *)(rdev->r600_video.vb_ib->ptr + rdev->r600_video.vb_used);
|
||||
|
||||
if ((rdev->r600_video.vb_used + 48) > rdev->r600_video.vb_total) {
|
||||
// WARN_ON(1);
|
||||
}
|
||||
|
||||
vb[0] = i2f(dstx);
|
||||
vb[1] = i2f(dsty);
|
||||
vb[2] = 0;
|
||||
vb[3] = 0;
|
||||
|
||||
vb[4] = i2f(dstx);
|
||||
vb[5] = i2f(dsty+h);
|
||||
vb[6] = 0;
|
||||
vb[7] = i2f(h);
|
||||
|
||||
vb[8] = i2f(dstx + w);
|
||||
vb[9] = i2f(dsty + h);
|
||||
vb[10] = i2f(w);
|
||||
vb[11] = i2f(h);
|
||||
|
||||
/* src 9 */
|
||||
set_tex_resource(rdev, FMT_8_8_8_8,
|
||||
w, h, pitch/4, src_gpu_addr);
|
||||
/* 5 */
|
||||
cp_set_surface_sync(rdev,
|
||||
PACKET3_TC_ACTION_ENA, pitch * h, src_gpu_addr);
|
||||
|
||||
/* dst 23 */
|
||||
set_render_target(rdev, COLOR_8_8_8_8,
|
||||
1024, 768, rdev->mc.vram_start);
|
||||
|
||||
/* scissors 12 */
|
||||
set_scissors(rdev, 0, 0, 1024, 768);
|
||||
|
||||
/* Vertex buffer setup 14 */
|
||||
vb_gpu_addr = rdev->r600_video.vb_ib->gpu_addr + rdev->r600_video.vb_used;
|
||||
set_vtx_resource(rdev, vb_gpu_addr);
|
||||
|
||||
/* draw 10 */
|
||||
draw_auto(rdev);
|
||||
|
||||
/* 5 */
|
||||
cp_set_surface_sync(rdev,
|
||||
PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
|
||||
1024*4*768, rdev->mc.vram_start);
|
||||
|
||||
/* 78 ring dwords per loop */
|
||||
vb += 12;
|
||||
rdev->r600_video.vb_used += 12 * 4;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
int r600_video_blit(uint64_t src_offset, int x, int y,
|
||||
int w, int h, int pitch)
|
||||
{
|
||||
struct radeon_device *rdev = main_device;
|
||||
static struct radeon_fence *fence;
|
||||
unsigned long irq_flags;
|
||||
|
||||
int r;
|
||||
|
||||
if(fence == NULL)
|
||||
{
|
||||
r = radeon_fence_create(rdev, &fence);
|
||||
if (r) {
|
||||
printf("%s epic fail", __FUNCTION__);
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
fence->evnt = CreateEvent(NULL, 0);
|
||||
|
||||
mutex_lock(&rdev->r600_video.mutex);
|
||||
rdev->r600_video.vb_ib = NULL;
|
||||
r = r600_video_prepare_copy(rdev, h*pitch);
|
||||
if (r) {
|
||||
// if (rdev->r600_blit.vb_ib)
|
||||
// radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
|
||||
mutex_unlock(&rdev->r600_video.mutex);
|
||||
return r;
|
||||
}
|
||||
|
||||
r600_kms_video_blit(rdev, src_offset,x,y,w,h,pitch);
|
||||
r600_video_done_copy(rdev, fence);
|
||||
mutex_unlock(&rdev->r600_video.mutex);
|
||||
|
||||
r = radeon_fence_wait(fence, false);
|
||||
|
||||
write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
|
||||
list_del(&fence->list);
|
||||
fence->emited = false;
|
||||
fence->signaled = false;
|
||||
write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
|
||||
|
||||
return r;
|
||||
};
|
||||
|
||||
|
||||
int r600_create_video(int w, int h, u32_t *outp)
|
||||
{
|
||||
int r;
|
||||
struct radeon_device *rdev = main_device;
|
||||
struct radeon_bo *sobj = NULL;
|
||||
uint64_t saddr;
|
||||
void *uaddr;
|
||||
|
||||
size_t size;
|
||||
size_t pitch;
|
||||
|
||||
pitch = radeon_align_pitch(rdev, w, 32, false) * 4;
|
||||
|
||||
size = pitch * h;
|
||||
r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
|
||||
RADEON_GEM_DOMAIN_GTT, &sobj);
|
||||
if (r) {
|
||||
goto fail;
|
||||
}
|
||||
r = radeon_bo_reserve(sobj, false);
|
||||
if (unlikely(r != 0))
|
||||
goto fail;
|
||||
r = radeon_bo_pin(sobj, RADEON_GEM_DOMAIN_GTT, &saddr);
|
||||
// radeon_bo_unreserve(sobj);
|
||||
if (r) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
r = radeon_bo_user_map(sobj, &uaddr);
|
||||
if (r) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
((uint64_t*)outp)[0] = saddr;
|
||||
outp[2] = uaddr;
|
||||
outp[3] = pitch;
|
||||
|
||||
// dbgprintf("Create video surface %x, mapped at %x pitch %d\n",
|
||||
// (uint32_t)saddr, uaddr, pitch);
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
return -1;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
280
drivers/video/drm/radeon/r700_vs.c
Normal file
280
drivers/video/drm/radeon/r700_vs.c
Normal file
@ -0,0 +1,280 @@
|
||||
|
||||
//typedef unsigned int uint32_t;
|
||||
|
||||
#include "drmP.h"
|
||||
#include "drm.h"
|
||||
#include "radeon_drm.h"
|
||||
#include "radeon.h"
|
||||
|
||||
#include "r600_shader.h"
|
||||
#include "r600_reg.h"
|
||||
#include "r600_reg_auto_r6xx.h"
|
||||
#include "r600_reg_r6xx.h"
|
||||
#include "r600_reg_r7xx.h"
|
||||
|
||||
|
||||
|
||||
/*
|
||||
vertex format
|
||||
|
||||
struct vertex
|
||||
{
|
||||
float x, y;
|
||||
float s, t;
|
||||
};
|
||||
|
||||
vertex shader
|
||||
|
||||
VFETCH: ADDR(4) CNT(1) VALID_PIX
|
||||
FETCH R0
|
||||
EXP_DONE POS0, R0.XY01
|
||||
EXT_DONE PARAM0, R0.ZW01
|
||||
*/
|
||||
|
||||
uint32_t R600_video_vs[] =
|
||||
{
|
||||
|
||||
/* 0 */
|
||||
CF_DWORD0(ADDR(4)),
|
||||
CF_DWORD1(POP_COUNT(0),
|
||||
CF_CONST(0),
|
||||
COND(SQ_CF_COND_ACTIVE),
|
||||
/* I_COUNT(1),*/ 0,
|
||||
CALL_COUNT(0),
|
||||
END_OF_PROGRAM(0),
|
||||
VALID_PIXEL_MODE(0),
|
||||
CF_INST(SQ_CF_INST_VTX),
|
||||
WHOLE_QUAD_MODE(0),
|
||||
BARRIER(1)),
|
||||
/* 1 */
|
||||
CF_ALLOC_IMP_EXP_DWORD0(
|
||||
ARRAY_BASE(CF_POS0),
|
||||
TYPE(SQ_EXPORT_POS),
|
||||
RW_GPR(0),
|
||||
RW_REL(ABSOLUTE),
|
||||
INDEX_GPR(0),
|
||||
ELEM_SIZE(0)),
|
||||
CF_ALLOC_IMP_EXP_DWORD1_SWIZ(
|
||||
SRC_SEL_X(SQ_SEL_X),
|
||||
SRC_SEL_Y(SQ_SEL_Y),
|
||||
SRC_SEL_Z(SQ_SEL_0),
|
||||
SRC_SEL_W(SQ_SEL_1),
|
||||
R6xx_ELEM_LOOP(0),
|
||||
BURST_COUNT(0),
|
||||
END_OF_PROGRAM(0),
|
||||
VALID_PIXEL_MODE(0),
|
||||
CF_INST(SQ_CF_INST_EXPORT_DONE),
|
||||
WHOLE_QUAD_MODE(0),
|
||||
BARRIER(1)),
|
||||
/* 2 */
|
||||
CF_ALLOC_IMP_EXP_DWORD0(
|
||||
ARRAY_BASE(0),
|
||||
TYPE(SQ_EXPORT_PARAM),
|
||||
RW_GPR(0),
|
||||
RW_REL(ABSOLUTE),
|
||||
INDEX_GPR(0),
|
||||
ELEM_SIZE(0)),
|
||||
CF_ALLOC_IMP_EXP_DWORD1_SWIZ(
|
||||
SRC_SEL_X(SQ_SEL_Z),
|
||||
SRC_SEL_Y(SQ_SEL_W),
|
||||
SRC_SEL_Z(SQ_SEL_0),
|
||||
SRC_SEL_W(SQ_SEL_1),
|
||||
R6xx_ELEM_LOOP(0),
|
||||
BURST_COUNT(0),
|
||||
END_OF_PROGRAM(1),
|
||||
VALID_PIXEL_MODE(0),
|
||||
CF_INST(SQ_CF_INST_EXPORT_DONE),
|
||||
WHOLE_QUAD_MODE(0),
|
||||
BARRIER(0)),
|
||||
/* 3 */
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
/* 4/5 */
|
||||
VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
|
||||
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
|
||||
FETCH_WHOLE_QUAD(0),
|
||||
BUFFER_ID(0),
|
||||
SRC_GPR(0),
|
||||
SRC_REL(ABSOLUTE),
|
||||
SRC_SEL_X(SQ_SEL_X),
|
||||
MEGA_FETCH_COUNT(16)),
|
||||
VTX_DWORD1_GPR(DST_GPR(0),
|
||||
DST_REL(0),
|
||||
DST_SEL_X(SQ_SEL_X),
|
||||
DST_SEL_Y(SQ_SEL_Y),
|
||||
DST_SEL_Z(SQ_SEL_Z),
|
||||
DST_SEL_W(SQ_SEL_W),
|
||||
USE_CONST_FIELDS(0),
|
||||
DATA_FORMAT(FMT_32_32_32_32_FLOAT),
|
||||
NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
|
||||
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
|
||||
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)),
|
||||
VTX_DWORD2(OFFSET(0),
|
||||
ENDIAN_SWAP(SQ_ENDIAN_NONE),
|
||||
CONST_BUF_NO_STRIDE(0),
|
||||
MEGA_FETCH(1)),
|
||||
VTX_DWORD_PAD
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
pixel shader
|
||||
|
||||
00 TEX: ADDR(2) CNT(1) VALID_PIX
|
||||
0 SAMPLE R0, v0.xy01, t0, s0
|
||||
1 SAMPLE R1, v0.xy01, t0, s1
|
||||
|
||||
01 EXP_DONE: PIX0, R0
|
||||
|
||||
02 ALU: ADDR() CNT(1)
|
||||
2 KILLNE ____, R1.x___, c0.x___
|
||||
|
||||
END_OF_PROGRAM
|
||||
|
||||
|
||||
|
||||
*/
|
||||
uint32_t R600_video_ps[]=
|
||||
{
|
||||
/* CF INST 0 */
|
||||
CF_DWORD0(ADDR(2)),
|
||||
CF_DWORD1(POP_COUNT(0),
|
||||
CF_CONST(0),
|
||||
COND(SQ_CF_COND_ACTIVE),
|
||||
/* I_COUNT(1), */ 0,
|
||||
CALL_COUNT(0),
|
||||
END_OF_PROGRAM(0),
|
||||
VALID_PIXEL_MODE(0),
|
||||
CF_INST(SQ_CF_INST_TEX),
|
||||
WHOLE_QUAD_MODE(0),
|
||||
BARRIER(1)),
|
||||
|
||||
#if 0
|
||||
/* CF INST 1 */
|
||||
CF_ALU_DWORD0(ADDR( ),
|
||||
KCACHE_BANK0(0),
|
||||
KCACHE_BANK1(0),
|
||||
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
|
||||
CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
|
||||
KCACHE_ADDR0(0),
|
||||
KCACHE_ADDR1(0),
|
||||
I_COUNT(1),
|
||||
USES_WATERFALL(0),
|
||||
CF_INST(SQ_CF_INST_ALU),
|
||||
WHOLE_QUAD_MODE(0),
|
||||
BARRIER(1));
|
||||
#endif
|
||||
|
||||
/* CF INST 1 */
|
||||
CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
|
||||
TYPE(SQ_EXPORT_PIXEL),
|
||||
RW_GPR(0),
|
||||
RW_REL(ABSOLUTE),
|
||||
INDEX_GPR(0),
|
||||
ELEM_SIZE(1)),
|
||||
CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
|
||||
SRC_SEL_Y(SQ_SEL_Y),
|
||||
SRC_SEL_Z(SQ_SEL_Z),
|
||||
SRC_SEL_W(SQ_SEL_W),
|
||||
R6xx_ELEM_LOOP(0),
|
||||
BURST_COUNT(1),
|
||||
END_OF_PROGRAM(1),
|
||||
VALID_PIXEL_MODE(0),
|
||||
CF_INST(SQ_CF_INST_EXPORT_DONE),
|
||||
WHOLE_QUAD_MODE(0),
|
||||
BARRIER(1)),
|
||||
|
||||
#if 0
|
||||
/* KILLNE c0.x, r1.x */
|
||||
ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
|
||||
SRC0_REL(ABSOLUTE),
|
||||
SRC0_ELEM(ELEM_X),
|
||||
SRC0_NEG(0),
|
||||
SRC1_SEL(ALU_SRC_GPR_BASE + 1),
|
||||
SRC1_REL(ABSOLUTE),
|
||||
SRC1_ELEM(ELEM_X),
|
||||
SRC1_NEG(0),
|
||||
INDEX_MODE(SQ_INDEX_LOOP),
|
||||
PRED_SEL(SQ_PRED_SEL_OFF),
|
||||
LAST(1)),
|
||||
R7xx_ALU_DWORD1_OP2(SRC0_ABS(0),
|
||||
SRC1_ABS(0),
|
||||
UPDATE_EXECUTE_MASK(0),
|
||||
UPDATE_PRED(0),
|
||||
WRITE_MASK(0),
|
||||
FOG_MERGE(0),
|
||||
OMOD(SQ_ALU_OMOD_OFF),
|
||||
ALU_INST(SQ_OP2_INST_KILLNE),
|
||||
BANK_SWIZZLE(SQ_ALU_VEC_012),
|
||||
DST_GPR(0),
|
||||
DST_REL(ABSOLUTE),
|
||||
DST_ELEM(ELEM_X),
|
||||
CLAMP(0)),
|
||||
|
||||
#endif
|
||||
|
||||
/* TEX INST 0 */
|
||||
TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
|
||||
BC_FRAC_MODE(0),
|
||||
FETCH_WHOLE_QUAD(0),
|
||||
RESOURCE_ID(0),
|
||||
SRC_GPR(0),
|
||||
SRC_REL(ABSOLUTE),
|
||||
R7xx_ALT_CONST(0)),
|
||||
TEX_DWORD1(DST_GPR(0),
|
||||
DST_REL(ABSOLUTE),
|
||||
DST_SEL_X(SQ_SEL_X), /* R */
|
||||
DST_SEL_Y(SQ_SEL_Y), /* G */
|
||||
DST_SEL_Z(SQ_SEL_Z), /* B */
|
||||
DST_SEL_W(SQ_SEL_W), /* A */
|
||||
LOD_BIAS(0),
|
||||
COORD_TYPE_X(TEX_UNNORMALIZED),
|
||||
COORD_TYPE_Y(TEX_UNNORMALIZED),
|
||||
COORD_TYPE_Z(TEX_UNNORMALIZED),
|
||||
COORD_TYPE_W(TEX_UNNORMALIZED)),
|
||||
TEX_DWORD2(OFFSET_X(0),
|
||||
OFFSET_Y(0),
|
||||
OFFSET_Z(0),
|
||||
SAMPLER_ID(0),
|
||||
SRC_SEL_X(SQ_SEL_X),
|
||||
SRC_SEL_Y(SQ_SEL_Y),
|
||||
SRC_SEL_Z(SQ_SEL_0),
|
||||
SRC_SEL_W(SQ_SEL_1)),
|
||||
TEX_DWORD_PAD
|
||||
|
||||
#if 0
|
||||
TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
|
||||
BC_FRAC_MODE(0),
|
||||
FETCH_WHOLE_QUAD(0),
|
||||
RESOURCE_ID(1),
|
||||
SRC_GPR(0),
|
||||
SRC_REL(ABSOLUTE),
|
||||
R7xx_ALT_CONST(0)),
|
||||
TEX_DWORD1(DST_GPR(1),
|
||||
DST_REL(ABSOLUTE),
|
||||
DST_SEL_X(SQ_SEL_X), /* R */
|
||||
DST_SEL_Y(SQ_SEL_MASK), /* G */
|
||||
DST_SEL_Z(SQ_SEL_MASK), /* B */
|
||||
DST_SEL_W(SQ_SEL_MASK), /* A */
|
||||
LOD_BIAS(0),
|
||||
COORD_TYPE_X(TEX_UNNORMALIZED),
|
||||
COORD_TYPE_Y(TEX_UNNORMALIZED),
|
||||
COORD_TYPE_Z(TEX_UNNORMALIZED),
|
||||
COORD_TYPE_W(TEX_UNNORMALIZED)),
|
||||
TEX_DWORD2(OFFSET_X(0),
|
||||
OFFSET_Y(0),
|
||||
OFFSET_Z(0),
|
||||
SAMPLER_ID(0),
|
||||
SRC_SEL_X(SQ_SEL_X),
|
||||
SRC_SEL_Y(SQ_SEL_Y),
|
||||
SRC_SEL_Z(SQ_SEL_0),
|
||||
SRC_SEL_W(SQ_SEL_1)),
|
||||
TEX_DWORD_PAD
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
const u32 r600_video_ps_size = ARRAY_SIZE(R600_video_ps);
|
||||
const u32 r600_video_vs_size = ARRAY_SIZE(R600_video_vs);
|
||||
|
||||
|
79
drivers/video/drm/radeon/tracker/bitmap.c
Normal file
79
drivers/video/drm/radeon/tracker/bitmap.c
Normal file
@ -0,0 +1,79 @@
|
||||
|
||||
#include <drmP.h>
|
||||
#include <drm.h>
|
||||
#include "radeon_drm.h"
|
||||
#include "../radeon.h"
|
||||
#include "../display.h"
|
||||
|
||||
extern struct radeon_device *main_device;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
kobj_t header;
|
||||
|
||||
int width;
|
||||
int height;
|
||||
int stride;
|
||||
uint64_t gaddr;
|
||||
void *uaddr;
|
||||
struct radeon_bo *robj;
|
||||
}bitmap_t;
|
||||
|
||||
int create_bitmap(bitmap_t **pbitmap, int width, int height)
|
||||
{
|
||||
size_t size;
|
||||
size_t pitch;
|
||||
bitmap_t *bitmap;
|
||||
uint64_t gaddr;
|
||||
void *uaddr;
|
||||
|
||||
struct radeon_device *rdev = main_device;
|
||||
struct radeon_bo *sobj = NULL;
|
||||
|
||||
int r;
|
||||
|
||||
bitmap = CreateObject(GetPid(), sizeof(bitmap_t));
|
||||
if( bitmap == NULL)
|
||||
{
|
||||
*pbitmap = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
pitch = radeon_align_pitch(rdev, width, 32, false) * 4;
|
||||
|
||||
size = pitch * height;
|
||||
|
||||
r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
|
||||
RADEON_GEM_DOMAIN_GTT, &sobj);
|
||||
if (r) {
|
||||
goto fail;
|
||||
}
|
||||
r = radeon_bo_reserve(sobj, false);
|
||||
if (unlikely(r != 0))
|
||||
goto fail;
|
||||
r = radeon_bo_pin(sobj, RADEON_GEM_DOMAIN_GTT, &gaddr);
|
||||
if (r) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
r = radeon_bo_user_map(sobj, &uaddr);
|
||||
if (r) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
bitmap->width = width;
|
||||
bitmap->height = height;
|
||||
bitmap->stride = pitch;
|
||||
bitmap->gaddr = gaddr;
|
||||
bitmap->uaddr = uaddr;
|
||||
bitmap->robj = sobj;
|
||||
|
||||
*pbitmap = bitmap;
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
|
||||
DestroyObject(bitmap);
|
||||
return -1;
|
||||
|
||||
};
|
Loading…
Reference in New Issue
Block a user