763 lines
19 KiB
PHP
763 lines
19 KiB
PHP
|
; Bilinear filtering, real Phongs shading and glass like parallel.
|
||
|
; Thanks to authors of 3dica tutorial.
|
||
|
; Implemented in FASM by Maciej Guba.
|
||
|
; http://macgub.j.pl
|
||
|
|
||
|
ROUND2 equ 10
|
||
|
|
||
|
glass_tex_tri:
|
||
|
;----Procedure render Phongs shaded triangle with z coord
|
||
|
;----interpolation ( Catmull alghoritm ), each pixel is -
|
||
|
;----covered by texture using bilinear filtering.--------
|
||
|
;----I normalize normal vector in every pixel -----------
|
||
|
;------------------in - eax - x1 shl 16 + y1 ------------
|
||
|
;---------------------- ebx - x2 shl 16 + y2 ------------
|
||
|
;---------------------- ecx - x3 shl 16 + y3 ------------
|
||
|
;---------------------- esi - pointer to stencil buffer--
|
||
|
;---------------------- filled with dd float variables-
|
||
|
;---------------------- edi - pointer to screen buffer---
|
||
|
;---------------------- edx - pointer to texture---------
|
||
|
;---------------------- xmm0 - 1st normal vector --------
|
||
|
;---------------------- xmm1 - 2cond normal vector ------
|
||
|
;---------------------- xmm2 - 3rd normal vector --------
|
||
|
;---------------------- xmm3 - normalized light vector --
|
||
|
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
|
||
|
;---------------------- as dwords floats ---------------
|
||
|
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
|
||
|
;---------------------- x_min, x_max as dword integers -
|
||
|
;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, --
|
||
|
;---------------------- ty2, tx3, ty3 as word, xres as--
|
||
|
;---------------------- dword integers------------------
|
||
|
;---------------------- stack - no parameters -----------
|
||
|
;--------------------------------------------------------
|
||
|
;----------------- procedure don't save registers !! ----
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
push ebp
|
||
|
mov ebp,esp
|
||
|
sub esp,512
|
||
|
sub ebp,16
|
||
|
and ebp,0xfffffff0
|
||
|
|
||
|
.1_nv equ [ebp-16]
|
||
|
.2_nv equ [ebp-32]
|
||
|
.3_nv equ [ebp-48]
|
||
|
.l_v equ [ebp-64]
|
||
|
.z3 equ [ebp-72]
|
||
|
.z2 equ [ebp-76]
|
||
|
.z1 equ [ebp-80]
|
||
|
.x1 equ [ebp-82]
|
||
|
.y1 equ [ebp-84]
|
||
|
.x2 equ [ebp-86]
|
||
|
.y2 equ [ebp-88]
|
||
|
.x3 equ [ebp-90]
|
||
|
.y3 equ [ebp-92]
|
||
|
.Zbuf equ [ebp-96]
|
||
|
.x_max equ [ebp-100]
|
||
|
.x_min equ [ebp-104]
|
||
|
.y_max equ [ebp-108]
|
||
|
.y_min equ [ebp-112]
|
||
|
.screen equ [ebp-116]
|
||
|
.dx12 equ [ebp-120]
|
||
|
.dx13 equ [ebp-124]
|
||
|
.dx23 equ [ebp-128]
|
||
|
.dn12 equ [ebp-144]
|
||
|
.dn13 equ [ebp-160]
|
||
|
.dn23 equ [ebp-176]
|
||
|
|
||
|
.cnv1 equ [ebp-192] ; cur normal vectors
|
||
|
.cnv2 equ [ebp-208]
|
||
|
.x_res equ [ebp-212]
|
||
|
.ty3 equ [ebp-214]
|
||
|
.tx3 equ [ebp-216]
|
||
|
.ty2 equ [ebp-218]
|
||
|
.tx2 equ [ebp-220]
|
||
|
.ty1 equ [ebp-222]
|
||
|
.tx1 equ [ebp-224]
|
||
|
.dz12 equ [ebp-232]
|
||
|
.dty12 equ [ebp-236]
|
||
|
.dtx12 equ [ebp-240]
|
||
|
.dz13 equ [ebp-248]
|
||
|
.dty13 equ [ebp-252]
|
||
|
.dtx13 equ [ebp-256]
|
||
|
.dz23 equ [ebp-264]
|
||
|
.dty23 equ [ebp-268]
|
||
|
.dtx23 equ [ebp-272]
|
||
|
.cz1 equ [ebp-280]
|
||
|
.cty1 equ [ebp-284]
|
||
|
.ctx1 equ [ebp-288]
|
||
|
.cz2 equ [ebp-296]
|
||
|
.cty2 equ [ebp-300]
|
||
|
.ctx2 equ [ebp-304]
|
||
|
.tx_ptr equ [ebp-308]
|
||
|
|
||
|
|
||
|
emms
|
||
|
; movd .x_res,xmm7
|
||
|
.sort3: ; sort triangle coordinates...
|
||
|
cmp ax,bx
|
||
|
jle .sort1
|
||
|
xchg eax,ebx
|
||
|
shufps xmm4,xmm4,11100001b
|
||
|
shufps xmm6,xmm6,11100001b
|
||
|
movaps xmm7,xmm0
|
||
|
movaps xmm0,xmm1
|
||
|
movaps xmm1,xmm7
|
||
|
|
||
|
|
||
|
.sort1:
|
||
|
cmp bx,cx
|
||
|
jle .sort2
|
||
|
xchg ebx,ecx
|
||
|
shufps xmm4,xmm4,11011000b
|
||
|
shufps xmm6,xmm6,11011000b
|
||
|
movaps xmm7,xmm1
|
||
|
movaps xmm1,xmm2
|
||
|
movaps xmm2,xmm7
|
||
|
|
||
|
jmp .sort3
|
||
|
|
||
|
.sort2:
|
||
|
; movq .tx1,xmm6
|
||
|
; pshufd xmm6,xmm6,01001110b
|
||
|
; movd .tx3,xmm6
|
||
|
movaps .tx1,xmm6
|
||
|
movaps .z1,xmm4
|
||
|
mov .y1,eax
|
||
|
mov .y2,ebx
|
||
|
mov .y3,ecx
|
||
|
|
||
|
movdqa .y_min,xmm5
|
||
|
if 1 ; check if at last only fragment
|
||
|
packssdw xmm5,xmm5 ; of triangle is in visable area
|
||
|
pshuflw xmm5,xmm5,11011000b
|
||
|
movdqu xmm7,.y3
|
||
|
movdqa xmm6,xmm5
|
||
|
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
|
||
|
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
|
||
|
movdqa xmm4,xmm7
|
||
|
pcmpgtw xmm7,xmm5
|
||
|
pcmpgtw xmm4,xmm6
|
||
|
pxor xmm7,xmm4
|
||
|
pmovmskb eax,xmm7
|
||
|
and eax,0x00aaaaaa
|
||
|
or eax,eax
|
||
|
jz .rpt_loop2_end
|
||
|
end if
|
||
|
movaps .1_nv,xmm0
|
||
|
movaps .2_nv,xmm1
|
||
|
movaps .3_nv,xmm2
|
||
|
movaps .l_v,xmm3
|
||
|
mov .Zbuf,esi
|
||
|
mov .screen,edi
|
||
|
mov .tx_ptr,edx
|
||
|
|
||
|
|
||
|
|
||
|
mov bx,.y2 ; calc deltas
|
||
|
sub bx,.y1
|
||
|
jnz .rpt_dx12_make
|
||
|
|
||
|
xorps xmm7,xmm7
|
||
|
mov dword .dx12,0
|
||
|
movaps .dtx12,xmm7
|
||
|
movaps .dn12,xmm7
|
||
|
jmp .rpt_dx12_done
|
||
|
|
||
|
.rpt_dx12_make:
|
||
|
mov ax,.x2
|
||
|
sub ax,.x1
|
||
|
cwde
|
||
|
movsx ebx,bx
|
||
|
shl eax,ROUND2
|
||
|
cdq
|
||
|
idiv ebx
|
||
|
mov .dx12,eax
|
||
|
|
||
|
cvtsi2ss xmm6,ebx
|
||
|
shufps xmm6,xmm6,0
|
||
|
movss xmm5,.z2
|
||
|
subss xmm5,.z1
|
||
|
divss xmm5,xmm6
|
||
|
movss .dz12,xmm5
|
||
|
|
||
|
movd xmm0,.tx1
|
||
|
movd xmm2,.tx2
|
||
|
pxor xmm1,xmm1
|
||
|
punpcklwd xmm0,xmm1
|
||
|
punpcklwd xmm2,xmm1
|
||
|
psubd xmm2,xmm0
|
||
|
; cvtdq2ps xmm0,xmm0
|
||
|
cvtdq2ps xmm2,xmm2
|
||
|
; movlps .ctx1,xmm0
|
||
|
; movlps .ctx2,xmm2
|
||
|
; subps xmm2,xmm0
|
||
|
divps xmm2,xmm6
|
||
|
movlps .dtx12,xmm2
|
||
|
|
||
|
movaps xmm0,.2_nv
|
||
|
subps xmm0,.1_nv
|
||
|
divps xmm0,xmm6
|
||
|
movaps .dn12,xmm0
|
||
|
|
||
|
|
||
|
.rpt_dx12_done:
|
||
|
|
||
|
mov bx,.y3 ; calc deltas
|
||
|
sub bx,.y1
|
||
|
jnz .rpt_dx13_make
|
||
|
|
||
|
xorps xmm7,xmm7
|
||
|
mov dword .dx13,0
|
||
|
movaps .dtx13,xmm7
|
||
|
movaps .dn13,xmm7
|
||
|
jmp .rpt_dx13_done
|
||
|
|
||
|
.rpt_dx13_make:
|
||
|
mov ax,.x3
|
||
|
sub ax,.x1
|
||
|
cwde
|
||
|
movsx ebx,bx
|
||
|
shl eax,ROUND2
|
||
|
cdq
|
||
|
idiv ebx
|
||
|
mov .dx13,eax
|
||
|
|
||
|
|
||
|
cvtsi2ss xmm6,ebx
|
||
|
shufps xmm6,xmm6,0
|
||
|
|
||
|
movss xmm5,.z3
|
||
|
subss xmm5,.z1
|
||
|
divss xmm5,xmm6
|
||
|
movss .dz13,xmm5
|
||
|
|
||
|
movd xmm0,.tx1
|
||
|
movd xmm2,.tx3
|
||
|
pxor xmm1,xmm1
|
||
|
punpcklwd xmm0,xmm1
|
||
|
punpcklwd xmm2,xmm1
|
||
|
psubd xmm2,xmm0
|
||
|
; cvtdq2ps xmm0,xmm0
|
||
|
cvtdq2ps xmm2,xmm2
|
||
|
; subps xmm2,xmm0
|
||
|
divps xmm2,xmm6
|
||
|
movlps .dtx13,xmm2
|
||
|
|
||
|
|
||
|
|
||
|
movaps xmm0,.3_nv
|
||
|
subps xmm0,.1_nv
|
||
|
divps xmm0,xmm6
|
||
|
movaps .dn13,xmm0
|
||
|
|
||
|
.rpt_dx13_done:
|
||
|
|
||
|
mov bx,.y3 ; calc deltas
|
||
|
sub bx,.y2
|
||
|
jnz .rpt_dx23_make
|
||
|
|
||
|
xorps xmm7,xmm7
|
||
|
mov dword .dx23,0
|
||
|
movaps .dtx23,xmm7
|
||
|
movaps .dn23,xmm7
|
||
|
jmp .rpt_dx23_done
|
||
|
|
||
|
.rpt_dx23_make:
|
||
|
mov ax,.x3
|
||
|
sub ax,.x2
|
||
|
cwde
|
||
|
movsx ebx,bx
|
||
|
shl eax,ROUND2
|
||
|
cdq
|
||
|
idiv ebx
|
||
|
mov .dx23,eax
|
||
|
|
||
|
cvtsi2ss xmm6,ebx
|
||
|
shufps xmm6,xmm6,0
|
||
|
movss xmm5,.z3
|
||
|
subss xmm5,.z2
|
||
|
divss xmm5,xmm6
|
||
|
movss .dz23,xmm5
|
||
|
|
||
|
movd xmm0,.tx2
|
||
|
movd xmm2,.tx3
|
||
|
pxor xmm1,xmm1
|
||
|
punpcklwd xmm0,xmm1
|
||
|
punpcklwd xmm2,xmm1
|
||
|
psubd xmm2,xmm0
|
||
|
; cvtdq2ps xmm0,xmm0
|
||
|
cvtdq2ps xmm2,xmm2
|
||
|
; movlps .ctx1,xmm0
|
||
|
; movlps .ctx2,xmm2
|
||
|
; subps xmm2,xmm0
|
||
|
divps xmm2,xmm6
|
||
|
movlps .dtx23,xmm2
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
movaps xmm0,.3_nv
|
||
|
subps xmm0,.2_nv
|
||
|
divps xmm0,xmm6
|
||
|
movaps .dn23,xmm0
|
||
|
|
||
|
.rpt_dx23_done:
|
||
|
|
||
|
movsx eax,word .x1
|
||
|
shl eax,ROUND2
|
||
|
mov ebx,eax
|
||
|
mov edx,.z1
|
||
|
movd xmm1,.tx1
|
||
|
pxor xmm2,xmm2
|
||
|
punpcklwd xmm1,xmm2
|
||
|
cvtdq2ps xmm1,xmm1
|
||
|
|
||
|
mov .cz1,edx
|
||
|
mov .cz2,edx
|
||
|
movaps xmm0,.1_nv
|
||
|
movlps .ctx1,xmm1
|
||
|
movlps .ctx2,xmm1
|
||
|
movaps .cnv1,xmm0
|
||
|
movaps .cnv2,xmm0
|
||
|
|
||
|
; mov edx,.dx13
|
||
|
; cmp edx,.dx12
|
||
|
; jg .second_cause
|
||
|
|
||
|
movsx ecx,word .y1
|
||
|
cmp cx,.y2
|
||
|
|
||
|
jge .rpt_loop1_end
|
||
|
|
||
|
.rpt_loop1:
|
||
|
pushad
|
||
|
|
||
|
movaps xmm2,.y_min
|
||
|
movaps xmm0,.cnv1
|
||
|
movaps xmm1,.cnv2
|
||
|
; movlps xmm3,.cz1 ; cz1, cz2 both
|
||
|
movaps xmm3,.ctx1
|
||
|
movaps xmm5,.ctx2
|
||
|
movaps xmm4,.l_v
|
||
|
movd xmm6,.x_res
|
||
|
sar ebx,ROUND2
|
||
|
sar eax,ROUND2
|
||
|
mov edx,.tx_ptr
|
||
|
mov edi,.screen
|
||
|
|
||
|
mov esi,.Zbuf
|
||
|
|
||
|
call glass_tex_line
|
||
|
|
||
|
popad
|
||
|
movaps xmm0,.cnv1
|
||
|
movaps xmm1,.cnv2
|
||
|
; movss xmm2,.cz1
|
||
|
; movss xmm3,.cz2
|
||
|
movaps xmm2,.ctx1
|
||
|
movaps xmm3,.ctx2
|
||
|
addps xmm0,.dn13
|
||
|
addps xmm1,.dn12
|
||
|
addps xmm2,.dtx13
|
||
|
addps xmm3,.dtx12
|
||
|
add eax,.dx13
|
||
|
add ebx,.dx12
|
||
|
|
||
|
movaps .cnv1,xmm0
|
||
|
movaps .cnv2,xmm1
|
||
|
; movss .cz1,xmm2
|
||
|
; movss .cz2,xmm3
|
||
|
movaps .ctx1,xmm2
|
||
|
movaps .ctx2,xmm3
|
||
|
add ecx,1
|
||
|
cmp cx,.y2
|
||
|
jl .rpt_loop1
|
||
|
|
||
|
|
||
|
; jmp .rpt_loop2_end
|
||
|
|
||
|
|
||
|
.rpt_loop1_end:
|
||
|
movsx ecx,word .y2
|
||
|
cmp cx,.y3
|
||
|
jge .rpt_loop2_end
|
||
|
|
||
|
movsx ebx,word .x2 ; eax - cur x1
|
||
|
shl ebx,ROUND2 ; ebx - cur x2
|
||
|
push dword .z2
|
||
|
pop dword .cz2
|
||
|
movd xmm1,.tx2
|
||
|
pxor xmm2,xmm2
|
||
|
punpcklwd xmm1,xmm2
|
||
|
cvtdq2ps xmm1,xmm1
|
||
|
movlps .ctx2,xmm1
|
||
|
movaps xmm0,.2_nv
|
||
|
movaps .cnv2,xmm0
|
||
|
|
||
|
|
||
|
.rpt_loop2:
|
||
|
pushad
|
||
|
|
||
|
movaps xmm2,.y_min
|
||
|
movaps xmm0,.cnv1
|
||
|
movaps xmm1,.cnv2
|
||
|
movaps xmm3,.ctx1
|
||
|
movaps xmm5,.ctx2
|
||
|
movaps xmm4,.l_v
|
||
|
sar ebx,ROUND2
|
||
|
sar eax,ROUND2
|
||
|
mov edx,.tx_ptr
|
||
|
mov edi,.screen
|
||
|
mov esi,.Zbuf
|
||
|
movd xmm6,.x_res
|
||
|
call glass_tex_line
|
||
|
|
||
|
popad
|
||
|
movaps xmm0,.cnv1
|
||
|
movaps xmm1,.cnv2
|
||
|
; movss xmm2,.cz1
|
||
|
; movss xmm3,.cz2
|
||
|
movaps xmm2,.ctx1
|
||
|
movaps xmm3,.ctx2
|
||
|
addps xmm0,.dn13
|
||
|
addps xmm1,.dn23
|
||
|
; addss xmm2,.dz13
|
||
|
; addss xmm3,.dz23
|
||
|
addps xmm2,.dtx13
|
||
|
addps xmm3,.dtx23
|
||
|
|
||
|
add eax,.dx13
|
||
|
add ebx,.dx23
|
||
|
|
||
|
movaps .cnv1,xmm0
|
||
|
movaps .cnv2,xmm1
|
||
|
movaps .ctx1,xmm2
|
||
|
movaps .ctx2,xmm3
|
||
|
|
||
|
; movss .cz1,xmm2
|
||
|
; movss .cz2,xmm3
|
||
|
|
||
|
add ecx,1
|
||
|
cmp cx,.y3
|
||
|
jl .rpt_loop2
|
||
|
|
||
|
.second_cause: ;dx13 > dx12
|
||
|
|
||
|
.rpt_loop2_end:
|
||
|
|
||
|
add esp,512
|
||
|
pop ebp
|
||
|
|
||
|
ret
|
||
|
align 16
|
||
|
glass_tex_line:
|
||
|
; in:
|
||
|
; xmm0 - normal vector 1
|
||
|
; xmm1 - normal vect 2
|
||
|
; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float
|
||
|
; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float
|
||
|
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
|
||
|
; as dword integers
|
||
|
; xmm4 - normalized light vector
|
||
|
; eax - x1
|
||
|
; ebx - x2
|
||
|
; ecx - y
|
||
|
; edi - screen buffer
|
||
|
; esi - stencil buffer filled with dd floats
|
||
|
; edx - texture pointer (handle)
|
||
|
; xmm6 - lowest dword x_res as integer
|
||
|
|
||
|
push ebp
|
||
|
mov ebp,esp
|
||
|
sub esp,350
|
||
|
sub ebp,16
|
||
|
and ebp,0xfffffff0
|
||
|
|
||
|
.n1 equ [ebp-16]
|
||
|
.n2 equ [ebp-32]
|
||
|
.lv equ [ebp-48]
|
||
|
.lx1 equ [ebp-52]
|
||
|
.lx2 equ [ebp-56]
|
||
|
; .z2 equ [ebp-60]
|
||
|
; .z1 equ [ebp-64]
|
||
|
.screen equ [ebp-68]
|
||
|
.zbuff equ [ebp-72]
|
||
|
.x_max equ [ebp-74]
|
||
|
.x_min equ [ebp-76]
|
||
|
.y_max equ [ebp-78]
|
||
|
.y_min equ [ebp-80]
|
||
|
.dn equ [ebp-96]
|
||
|
.x_res equ [ebp-100]
|
||
|
.y equ [ebp-104]
|
||
|
.cnv equ [ebp-128]
|
||
|
.z1 equ [ebp-136]
|
||
|
.ty1 equ [ebp-140]
|
||
|
.tx1 equ [ebp-144]
|
||
|
.z2 equ [ebp-152]
|
||
|
.ty2 equ [ebp-156]
|
||
|
.tx2 equ [ebp-160]
|
||
|
.cz equ [ebp-168]
|
||
|
.cty equ [ebp-172]
|
||
|
.ctx equ [ebp-176]
|
||
|
.dz equ [ebp-184]
|
||
|
.dty equ [ebp-188]
|
||
|
.dtx equ [ebp-192]
|
||
|
.yd equ [ebp-196]
|
||
|
.xd equ [ebp-200]
|
||
|
.yf equ [ebp-204]
|
||
|
.xf equ [ebp-208]
|
||
|
.w4 equ [ebp-212]
|
||
|
.w3 equ [ebp-216]
|
||
|
.w2 equ [ebp-220]
|
||
|
.w1 equ [ebp-224]
|
||
|
.p4 equ [ebp-228]
|
||
|
.p3 equ [ebp-232]
|
||
|
.p2 equ [ebp-236]
|
||
|
.p1 equ [ebp-240]
|
||
|
|
||
|
|
||
|
.tx_ptr equ [ebp-244]
|
||
|
|
||
|
; movaps xmm7,xmm3
|
||
|
; movaps xmm3,xmm5
|
||
|
; movaps xmm5,xmm7
|
||
|
|
||
|
|
||
|
mov .y,ecx
|
||
|
packssdw xmm2,xmm2
|
||
|
; movaps xmm7,xmm2
|
||
|
; movhps xmm2,[the_zero]
|
||
|
; pshuflw xmm2,xmm2,11111000b
|
||
|
; pshufd xmm2,xmm2,11111100b
|
||
|
; movlps xmm7,[the_zero]
|
||
|
; pshufhw xmm7,xmm7,11111111b
|
||
|
; movlps xmm7,[the_zero]
|
||
|
; psrldq xmm7,4
|
||
|
; por xmm2,xmm7
|
||
|
movq .y_min,xmm2
|
||
|
cmp cx,.y_min
|
||
|
jl .end_line
|
||
|
cmp cx,.y_max
|
||
|
jge .end_line ;
|
||
|
|
||
|
cmp eax,ebx
|
||
|
je .end_line
|
||
|
jl @f
|
||
|
xchg eax,ebx
|
||
|
movaps xmm7,xmm0
|
||
|
movaps xmm0,xmm1
|
||
|
movaps xmm1,xmm7
|
||
|
movaps xmm7,xmm3
|
||
|
movaps xmm3,xmm5
|
||
|
movaps xmm5,xmm7
|
||
|
@@:
|
||
|
|
||
|
cmp ax,.x_max
|
||
|
jge .end_line
|
||
|
cmp bx,.x_min
|
||
|
jle .end_line
|
||
|
movaps .lv,xmm4
|
||
|
movaps .n1,xmm0
|
||
|
movaps .n2,xmm1
|
||
|
mov .lx1,eax
|
||
|
mov .lx2,ebx
|
||
|
movaps .tx1,xmm3
|
||
|
movaps .tx2,xmm5
|
||
|
movd .x_res,xmm6
|
||
|
mov .tx_ptr,edx
|
||
|
sub ebx,eax
|
||
|
cvtsi2ss xmm7,ebx
|
||
|
shufps xmm7,xmm7,0
|
||
|
subps xmm1,xmm0
|
||
|
divps xmm1,xmm7
|
||
|
movaps .dn,xmm1
|
||
|
subps xmm5,xmm3
|
||
|
divps xmm5,xmm7
|
||
|
movaps .dtx,xmm5
|
||
|
|
||
|
|
||
|
|
||
|
mov ebx,.lx1
|
||
|
cmp bx,.x_min ; clipping on function4
|
||
|
jge @f
|
||
|
movzx eax,word .x_min
|
||
|
sub eax,ebx
|
||
|
cvtsi2ss xmm7,eax
|
||
|
shufps xmm7,xmm7,0
|
||
|
mulps xmm5,xmm7
|
||
|
mulps xmm1,xmm7
|
||
|
addps xmm5,.tx1
|
||
|
addps xmm1,.n1
|
||
|
movsx eax,word .x_min
|
||
|
movaps .tx1,xmm5
|
||
|
movaps .n1,xmm1
|
||
|
mov dword .lx1,eax
|
||
|
|
||
|
@@:
|
||
|
movzx eax,word .x_max
|
||
|
cmp .lx2,eax
|
||
|
jl @f
|
||
|
mov .lx2,eax
|
||
|
@@:
|
||
|
mov eax,.x_res
|
||
|
mul dword .y
|
||
|
add eax,.lx1
|
||
|
shl eax,2
|
||
|
add edi,eax
|
||
|
add esi,eax
|
||
|
|
||
|
mov ecx,.lx2
|
||
|
sub ecx,.lx1
|
||
|
; movaps xmm0,.n1
|
||
|
movaps xmm2,.tx1
|
||
|
; xorps xmm1,xmm1
|
||
|
align 16
|
||
|
.ddraw:
|
||
|
; movhlps xmm7,xmm2
|
||
|
; cmpnltss xmm7,dword[esi]
|
||
|
; movd eax,xmm7
|
||
|
; or eax,eax
|
||
|
; jnz .skip
|
||
|
xorps xmm5,xmm5
|
||
|
; movhlps xmm7,xmm2
|
||
|
; movss [esi],xmm7
|
||
|
movaps xmm7,.n1 ;xmm0
|
||
|
mulps xmm7,xmm7 ; normalize
|
||
|
haddps xmm7,xmm7
|
||
|
haddps xmm7,xmm7
|
||
|
rsqrtps xmm7,xmm7
|
||
|
mulps xmm7,.n1 ;xmm0
|
||
|
; andps xmm7,[abs_z_coof]
|
||
|
movaps .cnv,xmm7
|
||
|
|
||
|
movaps xmm6,xmm2
|
||
|
minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2
|
||
|
cvttps2dq xmm7,xmm6
|
||
|
cvtdq2ps xmm4,xmm7
|
||
|
subps xmm6,xmm4
|
||
|
movlps .xf,xmm6
|
||
|
; movaps xmm5,.lv
|
||
|
mov eax,lights_aligned ; global
|
||
|
align 16
|
||
|
.again_col:
|
||
|
movaps xmm0,[eax] ; calc multple lights
|
||
|
mulps xmm0,.cnv ;.lv ; last dword should be zeroed
|
||
|
haddps xmm0,xmm0
|
||
|
haddps xmm0,xmm0
|
||
|
; andps xmm0,[abs_val] ;calc absolute value
|
||
|
if 1
|
||
|
; stencil
|
||
|
movhlps xmm6,xmm2
|
||
|
movhlps xmm4,xmm2
|
||
|
addss xmm6,[aprox]
|
||
|
subss xmm4,[aprox]
|
||
|
cmpnltss xmm6,dword[esi]
|
||
|
cmpnltss xmm4,dword[esi]
|
||
|
xorps xmm6,xmm4
|
||
|
xorps xmm4,xmm4
|
||
|
movd ebx,xmm6
|
||
|
cmp ebx,-1
|
||
|
jne .no_reflective
|
||
|
end if
|
||
|
movaps xmm4,xmm0
|
||
|
mulps xmm4,xmm4
|
||
|
mulps xmm4,xmm4
|
||
|
mulps xmm4,xmm4
|
||
|
mulps xmm4,xmm4
|
||
|
mulps xmm4,[eax+48]
|
||
|
|
||
|
.no_reflective:
|
||
|
maxps xmm0,[the_zero]
|
||
|
; movaps xmm1,xmm0
|
||
|
mulps xmm0,[eax+16]
|
||
|
addps xmm4,xmm0
|
||
|
addps xmm4,[eax+32]
|
||
|
maxps xmm5,xmm4
|
||
|
add eax,64
|
||
|
cmp eax,lights_aligned_end
|
||
|
jnz .again_col
|
||
|
minps xmm5,[mask_255f]
|
||
|
|
||
|
; texture coords work
|
||
|
movd eax,xmm7
|
||
|
psrldq xmm7,4
|
||
|
movd ebx,xmm7
|
||
|
shl ebx,TEX_SHIFT
|
||
|
add eax,ebx
|
||
|
lea eax,[eax*3]
|
||
|
add eax,.tx_ptr
|
||
|
mov ebx,eax
|
||
|
add ebx,TEX_X*3
|
||
|
movd xmm7,[eax]
|
||
|
movd xmm6,[eax+3]
|
||
|
movd xmm4,[ebx]
|
||
|
movd xmm3,[ebx+3]
|
||
|
punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2
|
||
|
punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4
|
||
|
punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ...
|
||
|
movdqa xmm6,xmm7
|
||
|
movdqa xmm4,xmm7
|
||
|
psrldq xmm6,4
|
||
|
psrldq xmm4,8
|
||
|
|
||
|
punpcklbw xmm7,[the_zero] ; broadcasted 0
|
||
|
punpcklbw xmm6,[the_zero]
|
||
|
punpcklbw xmm4,[the_zero]
|
||
|
punpcklwd xmm7,[the_zero]
|
||
|
punpcklwd xmm6,[the_zero]
|
||
|
punpcklwd xmm4,[the_zero]
|
||
|
|
||
|
|
||
|
; calc w .........
|
||
|
movlps xmm3,[the_one] ; broadcasted dword 1.0
|
||
|
cvtdq2ps xmm7,xmm7
|
||
|
subps xmm3,.xf
|
||
|
cvtdq2ps xmm6,xmm6
|
||
|
movhps xmm3,.xf
|
||
|
cvtdq2ps xmm4,xmm4
|
||
|
movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf
|
||
|
shufps xmm3,xmm3,10001000b
|
||
|
shufps xmm1,xmm1,11110101b
|
||
|
mulps xmm3,xmm1
|
||
|
|
||
|
mulps xmm7,xmm3
|
||
|
mulps xmm6,xmm3
|
||
|
mulps xmm4,xmm3
|
||
|
haddps xmm7,xmm7 ; r
|
||
|
haddps xmm6,xmm6 ; g
|
||
|
haddps xmm4,xmm4 ; b
|
||
|
haddps xmm7,xmm7 ; r
|
||
|
haddps xmm6,xmm6 ; g
|
||
|
haddps xmm4,xmm4 ; b
|
||
|
movlhps xmm7,xmm6
|
||
|
shufps xmm7,xmm7,11101000b
|
||
|
movlhps xmm7,xmm4
|
||
|
|
||
|
mulps xmm5,xmm7
|
||
|
cvtps2dq xmm5,xmm5
|
||
|
psrld xmm5,8
|
||
|
movd xmm6,[edi]
|
||
|
packssdw xmm5,xmm5
|
||
|
packuswb xmm5,xmm5
|
||
|
paddusb xmm5,xmm6
|
||
|
movd [edi],xmm5
|
||
|
.skip:
|
||
|
add edi,4
|
||
|
add esi,4
|
||
|
; addps xmm0,.dn
|
||
|
movaps xmm0,.n1 ; cur normal
|
||
|
addps xmm0,.dn
|
||
|
addps xmm2,.dtx
|
||
|
movaps .n1,xmm0
|
||
|
sub ecx,1
|
||
|
jnz .ddraw
|
||
|
|
||
|
.end_line:
|
||
|
add esp,350
|
||
|
pop ebp
|
||
|
|
||
|
ret
|