529 lines
12 KiB
PHP
529 lines
12 KiB
PHP
|
; Real Phong's shading implemented if flat assembler
|
||
|
; by Maciej Guba.
|
||
|
; http://macgub.vxm.pl
|
||
|
|
||
|
ROUND2 equ 10
|
||
|
real_phong_tri_z:
|
||
|
;----procedure render Phongs shaded triangle with z coord
|
||
|
;----interpolation ( Catmull alghoritm )-----------------
|
||
|
;----I normalize normal vector in every pixel -----------
|
||
|
;------------------in - eax - x1 shl 16 + y1 ------------
|
||
|
;---------------------- ebx - x2 shl 16 + y2 ------------
|
||
|
;---------------------- ecx - x3 shl 16 + y3 ------------
|
||
|
;---------------------- esi - pointer to Z-buffer filled-
|
||
|
;---------------------- with dd float variables--------
|
||
|
;---------------------- edi - pointer to screen buffer---
|
||
|
;---------------------- xmm0 - 1st normal vector --------
|
||
|
;---------------------- xmm1 - 2cond normal vector ------
|
||
|
;---------------------- xmm2 - 3rd normal vector --------
|
||
|
;---------------------- xmm3 - normalized light vector --
|
||
|
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
|
||
|
;---------------------- as dwords floats ---------------
|
||
|
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
|
||
|
;---------------------- x_min, x_max as dword integers -
|
||
|
;---------------------- stack - no parameters -----------
|
||
|
;--------------------------------------------------------
|
||
|
;----------------- procedure don't save registers !! ----
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
push ebp
|
||
|
mov ebp,esp
|
||
|
sub esp,512
|
||
|
sub ebp,16
|
||
|
and ebp,0xfffffff0
|
||
|
|
||
|
.1_nv equ [ebp-16]
|
||
|
.2_nv equ [ebp-32]
|
||
|
.3_nv equ [ebp-48]
|
||
|
.l_v equ [ebp-64]
|
||
|
.z3 equ [ebp-72]
|
||
|
.z2 equ [ebp-76]
|
||
|
.z1 equ [ebp-80]
|
||
|
.x1 equ [ebp-82]
|
||
|
.y1 equ [ebp-84]
|
||
|
.x2 equ [ebp-86]
|
||
|
.y2 equ [ebp-88]
|
||
|
.x3 equ [ebp-90]
|
||
|
.y3 equ [ebp-92]
|
||
|
.Zbuf equ [ebp-96]
|
||
|
.x_max equ [ebp-100]
|
||
|
.x_min equ [ebp-104]
|
||
|
.y_max equ [ebp-108]
|
||
|
.y_min equ [ebp-112]
|
||
|
.screen equ [ebp-116]
|
||
|
.dx12 equ [ebp-120]
|
||
|
.dx13 equ [ebp-124]
|
||
|
.dx23 equ [ebp-128]
|
||
|
.dn12 equ [ebp-144]
|
||
|
.dn13 equ [ebp-160]
|
||
|
.dn23 equ [ebp-176]
|
||
|
.dz12 equ [ebp-180]
|
||
|
.dz13 equ [ebp-184]
|
||
|
.dz23 equ [ebp-188]
|
||
|
|
||
|
.cnv1 equ [ebp-208] ; cur normal vectors
|
||
|
.cnv2 equ [ebp-224]
|
||
|
.cz2 equ [ebp-228]
|
||
|
.cz1 equ [ebp-232]
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
.sort3: ; sort triangle coordinates...
|
||
|
cmp ax,bx
|
||
|
jle .sort1
|
||
|
xchg eax,ebx
|
||
|
shufps xmm4,xmm4,11100001b
|
||
|
movaps xmm6,xmm0
|
||
|
movaps xmm0,xmm1
|
||
|
movaps xmm1,xmm6
|
||
|
|
||
|
|
||
|
.sort1:
|
||
|
cmp bx,cx
|
||
|
jle .sort2
|
||
|
xchg ebx,ecx
|
||
|
shufps xmm4,xmm4,11011000b
|
||
|
movaps xmm6,xmm1
|
||
|
movaps xmm1,xmm2
|
||
|
movaps xmm2,xmm6
|
||
|
|
||
|
jmp .sort3
|
||
|
|
||
|
.sort2:
|
||
|
|
||
|
movaps .z1,xmm4
|
||
|
mov .y1,eax
|
||
|
mov .y2,ebx
|
||
|
mov .y3,ecx
|
||
|
|
||
|
movdqa .y_min,xmm5
|
||
|
if 1 ; check if at last only fragment
|
||
|
packssdw xmm5,xmm5 ; of triangle is in visable area
|
||
|
pshuflw xmm5,xmm5,11011000b
|
||
|
movdqu xmm7,.y3
|
||
|
movdqa xmm6,xmm5
|
||
|
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
|
||
|
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
|
||
|
movdqa xmm4,xmm7
|
||
|
pcmpgtw xmm7,xmm5
|
||
|
pcmpgtw xmm4,xmm6
|
||
|
pxor xmm7,xmm4
|
||
|
pmovmskb eax,xmm7
|
||
|
and eax,0x00aaaaaa
|
||
|
or eax,eax
|
||
|
jz .rpt_loop2_end
|
||
|
end if
|
||
|
movaps .1_nv,xmm0
|
||
|
movaps .2_nv,xmm1
|
||
|
movaps .3_nv,xmm2
|
||
|
movaps .l_v,xmm3
|
||
|
mov .Zbuf,esi
|
||
|
mov .screen,edi
|
||
|
|
||
|
|
||
|
|
||
|
mov bx,.y2 ; calc deltas
|
||
|
sub bx,.y1
|
||
|
jnz .rpt_dx12_make
|
||
|
|
||
|
xorps xmm7,xmm7
|
||
|
mov dword .dx12,0
|
||
|
mov dword .dz12,0
|
||
|
movaps .dn12,xmm7
|
||
|
jmp .rpt_dx12_done
|
||
|
|
||
|
.rpt_dx12_make:
|
||
|
mov ax,.x2
|
||
|
sub ax,.x1
|
||
|
cwde
|
||
|
movsx ebx,bx
|
||
|
shl eax,ROUND2
|
||
|
cdq
|
||
|
idiv ebx
|
||
|
mov .dx12,eax
|
||
|
|
||
|
cvtsi2ss xmm6,ebx
|
||
|
movss xmm5,.z2
|
||
|
subss xmm5,.z1
|
||
|
divss xmm5,xmm6
|
||
|
movss .dz12,xmm5
|
||
|
|
||
|
movaps xmm0,.2_nv
|
||
|
subps xmm0,.1_nv
|
||
|
shufps xmm6,xmm6,0
|
||
|
divps xmm0,xmm6
|
||
|
movaps .dn12,xmm0
|
||
|
|
||
|
|
||
|
.rpt_dx12_done:
|
||
|
|
||
|
mov bx,.y3 ; calc deltas
|
||
|
sub bx,.y1
|
||
|
jnz .rpt_dx13_make
|
||
|
|
||
|
xorps xmm7,xmm7
|
||
|
mov dword .dx13,0
|
||
|
mov dword .dz13,0
|
||
|
movaps .dn13,xmm7
|
||
|
jmp .rpt_dx13_done
|
||
|
|
||
|
.rpt_dx13_make:
|
||
|
mov ax,.x3
|
||
|
sub ax,.x1
|
||
|
cwde
|
||
|
movsx ebx,bx
|
||
|
shl eax,ROUND2
|
||
|
cdq
|
||
|
idiv ebx
|
||
|
mov .dx13,eax
|
||
|
|
||
|
cvtsi2ss xmm6,ebx
|
||
|
movss xmm5,.z3
|
||
|
subss xmm5,.z1
|
||
|
divss xmm5,xmm6
|
||
|
movss .dz13,xmm5
|
||
|
|
||
|
movaps xmm0,.3_nv
|
||
|
subps xmm0,.1_nv
|
||
|
shufps xmm6,xmm6,0
|
||
|
divps xmm0,xmm6
|
||
|
movaps .dn13,xmm0
|
||
|
|
||
|
.rpt_dx13_done:
|
||
|
|
||
|
mov bx,.y3 ; calc deltas
|
||
|
sub bx,.y2
|
||
|
jnz .rpt_dx23_make
|
||
|
|
||
|
xorps xmm7,xmm7
|
||
|
mov dword .dx23,0
|
||
|
mov dword .dz23,0
|
||
|
movaps .dn23,xmm7
|
||
|
jmp .rpt_dx23_done
|
||
|
|
||
|
.rpt_dx23_make:
|
||
|
mov ax,.x3
|
||
|
sub ax,.x2
|
||
|
cwde
|
||
|
movsx ebx,bx
|
||
|
shl eax,ROUND2
|
||
|
cdq
|
||
|
idiv ebx
|
||
|
mov .dx23,eax
|
||
|
|
||
|
cvtsi2ss xmm6,ebx
|
||
|
movss xmm5,.z3
|
||
|
subss xmm5,.z2
|
||
|
divss xmm5,xmm6
|
||
|
movss .dz23,xmm5
|
||
|
|
||
|
movaps xmm0,.3_nv
|
||
|
subps xmm0,.2_nv
|
||
|
shufps xmm6,xmm6,0
|
||
|
divps xmm0,xmm6
|
||
|
movaps .dn23,xmm0
|
||
|
|
||
|
.rpt_dx23_done:
|
||
|
|
||
|
|
||
|
movsx eax,word .x1
|
||
|
shl eax,ROUND2
|
||
|
mov ebx,eax
|
||
|
mov edx,.z1
|
||
|
mov .cz1,edx
|
||
|
mov .cz2,edx
|
||
|
movaps xmm0,.1_nv
|
||
|
movaps .cnv1,xmm0
|
||
|
movaps .cnv2,xmm0
|
||
|
|
||
|
|
||
|
movsx ecx,word .y1
|
||
|
cmp cx,.y2
|
||
|
|
||
|
jge .rpt_loop1_end
|
||
|
|
||
|
.rpt_loop1:
|
||
|
pushad
|
||
|
|
||
|
movaps xmm2,.y_min
|
||
|
movaps xmm0,.cnv1
|
||
|
movaps xmm1,.cnv2
|
||
|
movlps xmm3,.cz1
|
||
|
movaps xmm4,.l_v
|
||
|
sar ebx,ROUND2
|
||
|
sar eax,ROUND2
|
||
|
mov edi,.screen
|
||
|
mov esi,.Zbuf
|
||
|
|
||
|
call real_phong_line_z
|
||
|
|
||
|
popad
|
||
|
movaps xmm0,.cnv1
|
||
|
movaps xmm1,.cnv2
|
||
|
movss xmm2,.cz1
|
||
|
movss xmm3,.cz2
|
||
|
addps xmm0,.dn13
|
||
|
addps xmm1,.dn12
|
||
|
addss xmm2,.dz13
|
||
|
addss xmm3,.dz12
|
||
|
add eax,.dx13
|
||
|
add ebx,.dx12
|
||
|
|
||
|
movaps .cnv1,xmm0
|
||
|
movaps .cnv2,xmm1
|
||
|
movss .cz1,xmm2
|
||
|
movss .cz2,xmm3
|
||
|
|
||
|
add ecx,1
|
||
|
cmp cx,.y2
|
||
|
jl .rpt_loop1
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
.rpt_loop1_end:
|
||
|
movsx ecx,word .y2
|
||
|
cmp cx,.y3
|
||
|
jge .rpt_loop2_end
|
||
|
|
||
|
movsx ebx,word .x2 ; eax - cur x1
|
||
|
shl ebx,ROUND2 ; ebx - cur x2
|
||
|
push dword .z2
|
||
|
pop dword .cz2
|
||
|
movaps xmm0,.2_nv
|
||
|
movaps .cnv2,xmm0
|
||
|
|
||
|
|
||
|
.rpt_loop2:
|
||
|
pushad
|
||
|
|
||
|
movaps xmm2,.y_min
|
||
|
movaps xmm0,.cnv1
|
||
|
movaps xmm1,.cnv2
|
||
|
movlps xmm3,.cz1
|
||
|
movaps xmm4,.l_v
|
||
|
sar ebx,ROUND2
|
||
|
sar eax,ROUND2
|
||
|
mov edi,.screen
|
||
|
mov esi,.Zbuf
|
||
|
|
||
|
call real_phong_line_z
|
||
|
|
||
|
popad
|
||
|
movaps xmm0,.cnv1
|
||
|
movaps xmm1,.cnv2
|
||
|
movss xmm2,.cz1
|
||
|
movss xmm3,.cz2
|
||
|
addps xmm0,.dn13
|
||
|
addps xmm1,.dn23
|
||
|
addss xmm2,.dz13
|
||
|
addss xmm3,.dz23
|
||
|
add eax,.dx13
|
||
|
add ebx,.dx23
|
||
|
|
||
|
movaps .cnv1,xmm0
|
||
|
movaps .cnv2,xmm1
|
||
|
movss .cz1,xmm2
|
||
|
movss .cz2,xmm3
|
||
|
|
||
|
add ecx,1
|
||
|
cmp cx,.y3
|
||
|
jl .rpt_loop2
|
||
|
|
||
|
.rpt_loop2_end:
|
||
|
|
||
|
add esp,512
|
||
|
pop ebp
|
||
|
|
||
|
ret
|
||
|
align 16
|
||
|
real_phong_line_z:
|
||
|
; in:
|
||
|
; xmm0 - normal vector 1
|
||
|
; xmm1 - normal vect 2
|
||
|
; xmm3 - lo -> hi z1, z2 coords as dwords floats
|
||
|
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
|
||
|
; as dword integers
|
||
|
; xmm4 - normalized light vector
|
||
|
; eax - x1
|
||
|
; ebx - x2
|
||
|
; ecx - y
|
||
|
; edi - screen buffer
|
||
|
; esi - z buffer filled with dd floats
|
||
|
|
||
|
push ebp
|
||
|
mov ebp,esp
|
||
|
sub esp,160
|
||
|
sub ebp,16
|
||
|
and ebp,0xfffffff0
|
||
|
|
||
|
.n1 equ [ebp-16]
|
||
|
.n2 equ [ebp-32]
|
||
|
.lv equ [ebp-48]
|
||
|
.lx1 equ [ebp-52]
|
||
|
.lx2 equ [ebp-56]
|
||
|
.z2 equ [ebp-60]
|
||
|
.z1 equ [ebp-64]
|
||
|
.screen equ [ebp-68]
|
||
|
.zbuff equ [ebp-72]
|
||
|
.x_max equ [ebp-74]
|
||
|
.x_min equ [ebp-76]
|
||
|
.y_max equ [ebp-78]
|
||
|
.y_min equ [ebp-80]
|
||
|
.dn equ [ebp-96]
|
||
|
.dz equ [ebp-100]
|
||
|
.y equ [ebp-104]
|
||
|
.cnv equ [ebp-128]
|
||
|
|
||
|
mov .y,ecx
|
||
|
packssdw xmm2,xmm2
|
||
|
movq .y_min,xmm2
|
||
|
cmp cx,.y_min
|
||
|
jl .end_rp_line
|
||
|
cmp cx,.y_max
|
||
|
jge .end_rp_line ;
|
||
|
|
||
|
cmp eax,ebx
|
||
|
je .end_rp_line
|
||
|
jl @f
|
||
|
xchg eax,ebx
|
||
|
movaps xmm7,xmm0
|
||
|
movaps xmm0,xmm1
|
||
|
movaps xmm1,xmm7
|
||
|
shufps xmm3,xmm3,11100001b
|
||
|
@@:
|
||
|
|
||
|
cmp ax,.x_max
|
||
|
jge .end_rp_line
|
||
|
cmp bx,.x_min
|
||
|
jle .end_rp_line
|
||
|
movaps .lv,xmm4
|
||
|
movaps .n1,xmm0
|
||
|
movaps .n2,xmm1
|
||
|
mov .lx1,eax
|
||
|
mov .lx2,ebx
|
||
|
movlps .z1,xmm3
|
||
|
|
||
|
sub ebx,eax
|
||
|
cvtsi2ss xmm7,ebx
|
||
|
shufps xmm7,xmm7,0
|
||
|
subps xmm1,xmm0
|
||
|
divps xmm1,xmm7
|
||
|
movaps .dn,xmm1
|
||
|
psrldq xmm3,4
|
||
|
subss xmm3,.z1
|
||
|
divss xmm3,xmm7
|
||
|
movss .dz,xmm3
|
||
|
|
||
|
|
||
|
|
||
|
mov ebx,.lx1
|
||
|
cmp bx,.x_min ; clipping on function4
|
||
|
jge @f
|
||
|
movzx eax,word .x_min
|
||
|
sub eax,ebx
|
||
|
cvtsi2ss xmm7,eax
|
||
|
shufps xmm7,xmm7,0
|
||
|
mulss xmm3,xmm7
|
||
|
mulps xmm1,xmm7
|
||
|
addss xmm3,.z1
|
||
|
addps xmm1,.n1
|
||
|
movsx eax,word .x_min
|
||
|
movss .z1,xmm3
|
||
|
movaps .n1,xmm1
|
||
|
mov dword .lx1,eax
|
||
|
|
||
|
@@:
|
||
|
movzx eax,word .x_max
|
||
|
cmp .lx2,eax
|
||
|
jl @f
|
||
|
mov .lx2,eax
|
||
|
@@:
|
||
|
movzx eax,word[size_x_var]
|
||
|
mul dword .y
|
||
|
; mov edx,.x1
|
||
|
add eax,.lx1
|
||
|
shl eax,2
|
||
|
add edi,eax
|
||
|
add esi,eax
|
||
|
|
||
|
mov ecx,.lx2
|
||
|
sub ecx,.lx1
|
||
|
movaps xmm0,.n1
|
||
|
movss xmm2,.z1
|
||
|
align 16
|
||
|
.ddraw:
|
||
|
movss xmm7,xmm2
|
||
|
cmpnltss xmm7,dword[esi]
|
||
|
movd eax,xmm7
|
||
|
or eax,eax
|
||
|
jnz .skip
|
||
|
movss [esi],xmm2
|
||
|
movaps xmm7,xmm0
|
||
|
mulps xmm7,xmm7 ; normalize
|
||
|
haddps xmm7,xmm7
|
||
|
haddps xmm7,xmm7
|
||
|
rsqrtps xmm7,xmm7
|
||
|
mulps xmm7,xmm0
|
||
|
movaps .cnv,xmm7
|
||
|
|
||
|
mov edx,lights_aligned ; lights - global variable
|
||
|
xorps xmm1,xmm1 ; instead global can be used .lv - light vect.
|
||
|
@@:
|
||
|
movaps xmm6,[edx+16]
|
||
|
movaps xmm5,[edx]
|
||
|
movaps xmm3,[edx+48]
|
||
|
andps xmm5,[zero_hgst_dd] ; global
|
||
|
|
||
|
mulps xmm5,.cnv ;.lv ; last dword should be zeroed
|
||
|
haddps xmm5,xmm5
|
||
|
haddps xmm5,xmm5
|
||
|
; mulps xmm5,[env_const2]
|
||
|
; maxps xmm5,[dot_min]
|
||
|
; minps xmm5,[dot_max]
|
||
|
movaps xmm7,xmm5
|
||
|
; mulps xmm7,[env_const2]
|
||
|
; mulps xmm7,[env_const2]
|
||
|
; maxps xmm7,[dot_min]
|
||
|
; minps xmm7,[dot_max]
|
||
|
|
||
|
mulps xmm7,xmm7
|
||
|
mulps xmm7,xmm7
|
||
|
mulps xmm5,xmm6
|
||
|
mulps xmm7,xmm7
|
||
|
mulps xmm7,xmm3
|
||
|
|
||
|
addps xmm5,xmm7
|
||
|
minps xmm5,[mask_255f] ; global
|
||
|
maxps xmm1,xmm5
|
||
|
; movq xmm3,[edx+20] ; minimal color
|
||
|
; punpcklwd xmm3,[minimum0]
|
||
|
; cvtdq2ps xmm3,xmm3
|
||
|
; maxps xmm1,xmm3
|
||
|
add edx,64
|
||
|
cmp edx,lights_aligned_end ; global
|
||
|
jnz @b
|
||
|
|
||
|
cvtps2dq xmm1,xmm1
|
||
|
packssdw xmm1,xmm1
|
||
|
packuswb xmm1,xmm1
|
||
|
movd [edi],xmm1
|
||
|
.skip:
|
||
|
add edi,4
|
||
|
add esi,4
|
||
|
addps xmm0,.dn
|
||
|
addss xmm2,.dz
|
||
|
sub ecx,1
|
||
|
jnz .ddraw
|
||
|
|
||
|
.end_rp_line:
|
||
|
add esp,160
|
||
|
pop ebp
|
||
|
|
||
|
ret
|