View3ds v0.7 by Maciej Guba

git-svn-id: svn://kolibrios.org@8047 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Kirill Lipatov (Leency) 2020-07-13 11:09:09 +00:00
parent 45f7cb904e
commit 431e55f6b3
8 changed files with 1680 additions and 746 deletions

View File

@ -0,0 +1,550 @@
; Glass like rendering triangle by Maciej Guba.
; http://macgub.hekko.pl, macgub3@wp.pl
ROUND2 equ 10
glass_tri:
;----procedure render glass like triangle with z coord --
;----interpolation ( Catmull alghoritm )-----------------
;----I normalize normal vector in every pixel -----------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- edx - ptr to stencil_buff -------
;---------------------- esi - pointer to Z-buffer filled-
;---------------------- with dd float variables--------
;---------------------- edi - pointer to screen buffer---
;---------------------- xmm0 - 1st normal vector --------
;---------------------- xmm1 - 2cond normal vector ------
;---------------------- xmm2 - 3rd normal vector --------
;---------------------- xmm3 - normalized light vector --
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
;---------------------- as dwords floats ---------------
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
;---------------------- x_min, x_max as dword integers -
;---------------------- stack - no parameters -----------
;--------------------------------------------------------
;----------------- procedure don't save registers !! ----
push ebp
mov ebp,esp
sub esp,512
sub ebp,16
and ebp,0xfffffff0
.1_nv equ [ebp-16]
.2_nv equ [ebp-32]
.3_nv equ [ebp-48]
.l_v equ [ebp-64]
.z3 equ [ebp-72]
.z2 equ [ebp-76]
.z1 equ [ebp-80]
.x1 equ [ebp-82]
.y1 equ [ebp-84]
.x2 equ [ebp-86]
.y2 equ [ebp-88]
.x3 equ [ebp-90]
.y3 equ [ebp-92]
.Zbuf equ [ebp-96]
.x_max equ [ebp-100]
.x_min equ [ebp-104]
.y_max equ [ebp-108]
.y_min equ [ebp-112]
.screen equ [ebp-116]
.dx12 equ [ebp-120]
.dx13 equ [ebp-124]
.dx23 equ [ebp-128]
.dn12 equ [ebp-144]
.dn13 equ [ebp-160]
.dn23 equ [ebp-176]
.dz12 equ [ebp-180]
.dz13 equ [ebp-184]
.dz23 equ [ebp-188]
.cnv1 equ [ebp-208] ; cur normal vectors
.cnv2 equ [ebp-224]
.cz2 equ [ebp-228]
.cz1 equ [ebp-232]
.stencil_buff equ [ebp-236]
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
shufps xmm4,xmm4,11100001b
movaps xmm6,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm6
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
shufps xmm4,xmm4,11011000b
movaps xmm6,xmm1
movaps xmm1,xmm2
movaps xmm2,xmm6
jmp .sort3
.sort2:
movaps .z1,xmm4
mov .y1,eax
mov .y2,ebx
mov .y3,ecx
mov .stencil_buff, edx
movdqa .y_min,xmm5
if 1 ; check if at last only fragment
packssdw xmm5,xmm5 ; of triangle is in visable area
pshuflw xmm5,xmm5,11011000b
movdqu xmm7,.y3
movdqa xmm6,xmm5
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
movdqa xmm4,xmm7
pcmpgtw xmm7,xmm5
pcmpgtw xmm4,xmm6
pxor xmm7,xmm4
pmovmskb eax,xmm7
and eax,0x00aaaaaa
or eax,eax
jz .rpt_loop2_end
end if
movaps .1_nv,xmm0
movaps .2_nv,xmm1
movaps .3_nv,xmm2
movaps .l_v,xmm3
; mov .Zbuf,esi
mov .screen,edi
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .rpt_dx12_make
xorps xmm7,xmm7
mov dword .dx12,0
mov dword .dz12,0
movaps .dn12,xmm7
jmp .rpt_dx12_done
.rpt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx12,eax
cvtsi2ss xmm6,ebx
movss xmm5,.z2
subss xmm5,.z1
divss xmm5,xmm6
movss .dz12,xmm5
movaps xmm0,.2_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn12,xmm0
.rpt_dx12_done:
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .rpt_dx13_make
xorps xmm7,xmm7
mov dword .dx13,0
mov dword .dz13,0
movaps .dn13,xmm7
jmp .rpt_dx13_done
.rpt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx13,eax
cvtsi2ss xmm6,ebx
movss xmm5,.z3
subss xmm5,.z1
divss xmm5,xmm6
movss .dz13,xmm5
movaps xmm0,.3_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn13,xmm0
.rpt_dx13_done:
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .rpt_dx23_make
xorps xmm7,xmm7
mov dword .dx23,0
mov dword .dz23,0
movaps .dn23,xmm7
jmp .rpt_dx23_done
.rpt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx23,eax
cvtsi2ss xmm6,ebx
movss xmm5,.z3
subss xmm5,.z2
divss xmm5,xmm6
movss .dz23,xmm5
movaps xmm0,.3_nv
subps xmm0,.2_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn23,xmm0
.rpt_dx23_done:
movsx eax,word .x1
shl eax,ROUND2
mov ebx,eax
mov edx,.z1
mov .cz1,edx
mov .cz2,edx
movaps xmm0,.1_nv
movaps .cnv1,xmm0
movaps .cnv2,xmm0
movsx ecx,word .y1
cmp cx,.y2
jge .rpt_loop1_end
.rpt_loop1:
pushad
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edx,.stencil_buff
mov edi,.screen
; mov esi,.Zbuf
call glass_line
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
addps xmm0,.dn13
addps xmm1,.dn12
addss xmm2,.dz13
addss xmm3,.dz12
add eax,.dx13
add ebx,.dx12
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
add ecx,1
cmp cx,.y2
jl .rpt_loop1
.rpt_loop1_end:
movsx ecx,word .y2
cmp cx,.y3
jge .rpt_loop2_end
movsx ebx,word .x2 ; eax - cur x1
shl ebx,ROUND2 ; ebx - cur x2
push dword .z2
pop dword .cz2
movaps xmm0,.2_nv
movaps .cnv2,xmm0
.rpt_loop2:
pushad
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edx,.stencil_buff
mov edi,.screen
; mov esi,.Zbuf
call glass_line
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
addps xmm0,.dn13
addps xmm1,.dn23
addss xmm2,.dz13
addss xmm3,.dz23
add eax,.dx13
add ebx,.dx23
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
add ecx,1
cmp cx,.y3
jl .rpt_loop2
.rpt_loop2_end:
add esp,512
pop ebp
ret
align 16
glass_line:
; in:
; xmm0 - normal vector 1
; xmm1 - normal vect 2
; xmm3 - lo -> hi z1, z2 coords as dwords floats
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
; as dword integers
; xmm4 - normalized light vector
; eax - x1
; ebx - x2
; ecx - y
; edx - stencil buff ptr
; edi - screen buffer
; esi - z buffer ===> not needed in glass rendering
push ebp
mov ebp,esp
sub esp,256
sub ebp,16
and ebp,0xfffffff0
.n1 equ [ebp-16]
.n2 equ [ebp-32]
.lv equ [ebp-48]
.lx1 equ [ebp-52]
.lx2 equ [ebp-56]
.z2 equ [ebp-60]
.z1 equ [ebp-64]
.screen equ [ebp-68]
.zbuff equ [ebp-72]
.x_max equ [ebp-74]
.x_min equ [ebp-76]
.y_max equ [ebp-78]
.y_min equ [ebp-80]
.dn equ [ebp-96]
.dz equ [ebp-100]
.y equ [ebp-104]
.cnv equ [ebp-128]
.col_sum_b equ [ebp-136]
.col_sum_g equ [ebp-140]
.col_sum_r equ [ebp-144]
.cur_col equ [ebp-160]
.stencil_buf equ [ebp-164]
mov .y,ecx
packssdw xmm2,xmm2
movq .y_min,xmm2
cmp cx,.y_min
jl .end_rp_line
cmp cx,.y_max
jge .end_rp_line ;
cmp eax,ebx
je .end_rp_line
jl @f
xchg eax,ebx
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
shufps xmm3,xmm3,11100001b
@@:
cmp ax,.x_max
jge .end_rp_line
cmp bx,.x_min
jle .end_rp_line
movaps .lv,xmm4
movaps .n1,xmm0
movaps .n2,xmm1
mov .lx1,eax
mov .lx2,ebx
mov .stencil_buf,edx
movlps .z1,xmm3
sub ebx,eax
cvtsi2ss xmm7,ebx
shufps xmm7,xmm7,0
subps xmm1,xmm0
divps xmm1,xmm7
movaps .dn,xmm1
psrldq xmm3,4
subss xmm3,.z1
divss xmm3,xmm7
movss .dz,xmm3
mov ebx,.lx1
cmp bx,.x_min ; clipping on function4
jge @f
movzx eax,word .x_min
sub eax,ebx
cvtsi2ss xmm7,eax
shufps xmm7,xmm7,0
mulss xmm3,xmm7
mulps xmm1,xmm7
addss xmm3,.z1
addps xmm1,.n1
movsx eax,word .x_min
movss .z1,xmm3
movaps .n1,xmm1
mov dword .lx1,eax
@@:
movzx eax,word .x_max
cmp .lx2,eax
jl @f
mov .lx2,eax
@@:
movzx eax,word[xres_var]
mul dword .y
add eax,.lx1
shl eax,2
add edi,eax
mov ebx,eax
add ebx,.stencil_buf
mov ecx,.lx2
sub ecx,.lx1
movaps xmm0,.n1
movss xmm2,.z1
align 16
.ddraw:
movaps xmm7,xmm0
mulps xmm7,xmm7 ; normalize
haddps xmm7,xmm7
haddps xmm7,xmm7
rsqrtps xmm7,xmm7
mulps xmm7,xmm0
maxps xmm7,[the_zero]
movups .cnv,xmm7
mov edx,lights_aligned ; lights_aligned - global variable
xorps xmm1,xmm1 ; instead global can be used .lv - light vect.
.again_col:
movups xmm7,.cnv
mulps xmm7,[edx]
haddps xmm7,xmm7
haddps xmm7,xmm7
if 0
cmp [bump_flag],1 ; on/off temporaly
; depend on bump button
je @f
; stencil
movss xmm5,xmm2
movss xmm6,xmm2
addss xmm5,[aprox]
subss xmm6,[aprox]
; Stencil buffer for now not work as I expected,
; moreover - it not work at all.
cmpnltss xmm5,dword[ebx]
cmpnltss xmm6,dword[ebx]
xorps xmm5,xmm6
xorps xmm6,xmm6
movd eax,xmm5
cmp eax,-1
jne .no_reflective
end if
@@:
movaps xmm6,xmm7
mulps xmm6,xmm6
mulps xmm6,xmm6
mulps xmm6,xmm6
mulps xmm6,[edx+48]
.no_reflective:
mulps xmm7,[edx+16]
addps xmm7,xmm6
addps xmm7,[edx+32]
minps xmm7,[mask_255f] ; global
maxps xmm1,xmm7
add edx,64 ; size of one light in aligned list
cmp edx,lights_aligned_end
jl .again_col
cvtps2dq xmm1,xmm1
movd xmm6,[edi]
packssdw xmm1,xmm1
packuswb xmm1,xmm1
paddusb xmm1,xmm6
movd [edi],xmm1
.skip:
add edi,4
add ebx,4 ; stencil_buff
addps xmm0,.dn
addss xmm2,.dz
sub ecx,1
jnz .ddraw
.end_rp_line:
add esp,256
pop ebp
ret

View File

@ -0,0 +1,528 @@
; Real Phong's shading implemented if flat assembler
; by Maciej Guba.
; http://macgub.vxm.pl
ROUND2 equ 10
real_phong_tri_z:
;----procedure render Phongs shaded triangle with z coord
;----interpolation ( Catmull alghoritm )-----------------
;----I normalize normal vector in every pixel -----------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- esi - pointer to Z-buffer filled-
;---------------------- with dd float variables--------
;---------------------- edi - pointer to screen buffer---
;---------------------- xmm0 - 1st normal vector --------
;---------------------- xmm1 - 2cond normal vector ------
;---------------------- xmm2 - 3rd normal vector --------
;---------------------- xmm3 - normalized light vector --
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
;---------------------- as dwords floats ---------------
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
;---------------------- x_min, x_max as dword integers -
;---------------------- stack - no parameters -----------
;--------------------------------------------------------
;----------------- procedure don't save registers !! ----
push ebp
mov ebp,esp
sub esp,512
sub ebp,16
and ebp,0xfffffff0
.1_nv equ [ebp-16]
.2_nv equ [ebp-32]
.3_nv equ [ebp-48]
.l_v equ [ebp-64]
.z3 equ [ebp-72]
.z2 equ [ebp-76]
.z1 equ [ebp-80]
.x1 equ [ebp-82]
.y1 equ [ebp-84]
.x2 equ [ebp-86]
.y2 equ [ebp-88]
.x3 equ [ebp-90]
.y3 equ [ebp-92]
.Zbuf equ [ebp-96]
.x_max equ [ebp-100]
.x_min equ [ebp-104]
.y_max equ [ebp-108]
.y_min equ [ebp-112]
.screen equ [ebp-116]
.dx12 equ [ebp-120]
.dx13 equ [ebp-124]
.dx23 equ [ebp-128]
.dn12 equ [ebp-144]
.dn13 equ [ebp-160]
.dn23 equ [ebp-176]
.dz12 equ [ebp-180]
.dz13 equ [ebp-184]
.dz23 equ [ebp-188]
.cnv1 equ [ebp-208] ; cur normal vectors
.cnv2 equ [ebp-224]
.cz2 equ [ebp-228]
.cz1 equ [ebp-232]
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
shufps xmm4,xmm4,11100001b
movaps xmm6,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm6
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
shufps xmm4,xmm4,11011000b
movaps xmm6,xmm1
movaps xmm1,xmm2
movaps xmm2,xmm6
jmp .sort3
.sort2:
movaps .z1,xmm4
mov .y1,eax
mov .y2,ebx
mov .y3,ecx
movdqa .y_min,xmm5
if 1 ; check if at last only fragment
packssdw xmm5,xmm5 ; of triangle is in visable area
pshuflw xmm5,xmm5,11011000b
movdqu xmm7,.y3
movdqa xmm6,xmm5
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
movdqa xmm4,xmm7
pcmpgtw xmm7,xmm5
pcmpgtw xmm4,xmm6
pxor xmm7,xmm4
pmovmskb eax,xmm7
and eax,0x00aaaaaa
or eax,eax
jz .rpt_loop2_end
end if
movaps .1_nv,xmm0
movaps .2_nv,xmm1
movaps .3_nv,xmm2
movaps .l_v,xmm3
mov .Zbuf,esi
mov .screen,edi
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .rpt_dx12_make
xorps xmm7,xmm7
mov dword .dx12,0
mov dword .dz12,0
movaps .dn12,xmm7
jmp .rpt_dx12_done
.rpt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx12,eax
cvtsi2ss xmm6,ebx
movss xmm5,.z2
subss xmm5,.z1
divss xmm5,xmm6
movss .dz12,xmm5
movaps xmm0,.2_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn12,xmm0
.rpt_dx12_done:
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .rpt_dx13_make
xorps xmm7,xmm7
mov dword .dx13,0
mov dword .dz13,0
movaps .dn13,xmm7
jmp .rpt_dx13_done
.rpt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx13,eax
cvtsi2ss xmm6,ebx
movss xmm5,.z3
subss xmm5,.z1
divss xmm5,xmm6
movss .dz13,xmm5
movaps xmm0,.3_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn13,xmm0
.rpt_dx13_done:
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .rpt_dx23_make
xorps xmm7,xmm7
mov dword .dx23,0
mov dword .dz23,0
movaps .dn23,xmm7
jmp .rpt_dx23_done
.rpt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx23,eax
cvtsi2ss xmm6,ebx
movss xmm5,.z3
subss xmm5,.z2
divss xmm5,xmm6
movss .dz23,xmm5
movaps xmm0,.3_nv
subps xmm0,.2_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn23,xmm0
.rpt_dx23_done:
movsx eax,word .x1
shl eax,ROUND2
mov ebx,eax
mov edx,.z1
mov .cz1,edx
mov .cz2,edx
movaps xmm0,.1_nv
movaps .cnv1,xmm0
movaps .cnv2,xmm0
movsx ecx,word .y1
cmp cx,.y2
jge .rpt_loop1_end
.rpt_loop1:
pushad
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edi,.screen
mov esi,.Zbuf
call real_phong_line_z
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
addps xmm0,.dn13
addps xmm1,.dn12
addss xmm2,.dz13
addss xmm3,.dz12
add eax,.dx13
add ebx,.dx12
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
add ecx,1
cmp cx,.y2
jl .rpt_loop1
.rpt_loop1_end:
movsx ecx,word .y2
cmp cx,.y3
jge .rpt_loop2_end
movsx ebx,word .x2 ; eax - cur x1
shl ebx,ROUND2 ; ebx - cur x2
push dword .z2
pop dword .cz2
movaps xmm0,.2_nv
movaps .cnv2,xmm0
.rpt_loop2:
pushad
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edi,.screen
mov esi,.Zbuf
call real_phong_line_z
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
addps xmm0,.dn13
addps xmm1,.dn23
addss xmm2,.dz13
addss xmm3,.dz23
add eax,.dx13
add ebx,.dx23
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
add ecx,1
cmp cx,.y3
jl .rpt_loop2
.rpt_loop2_end:
add esp,512
pop ebp
ret
align 16
real_phong_line_z:
; in:
; xmm0 - normal vector 1
; xmm1 - normal vect 2
; xmm3 - lo -> hi z1, z2 coords as dwords floats
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
; as dword integers
; xmm4 - normalized light vector
; eax - x1
; ebx - x2
; ecx - y
; edi - screen buffer
; esi - z buffer filled with dd floats
push ebp
mov ebp,esp
sub esp,160
sub ebp,16
and ebp,0xfffffff0
.n1 equ [ebp-16]
.n2 equ [ebp-32]
.lv equ [ebp-48]
.lx1 equ [ebp-52]
.lx2 equ [ebp-56]
.z2 equ [ebp-60]
.z1 equ [ebp-64]
.screen equ [ebp-68]
.zbuff equ [ebp-72]
.x_max equ [ebp-74]
.x_min equ [ebp-76]
.y_max equ [ebp-78]
.y_min equ [ebp-80]
.dn equ [ebp-96]
.dz equ [ebp-100]
.y equ [ebp-104]
.cnv equ [ebp-128]
mov .y,ecx
packssdw xmm2,xmm2
movq .y_min,xmm2
cmp cx,.y_min
jl .end_rp_line
cmp cx,.y_max
jge .end_rp_line ;
cmp eax,ebx
je .end_rp_line
jl @f
xchg eax,ebx
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
shufps xmm3,xmm3,11100001b
@@:
cmp ax,.x_max
jge .end_rp_line
cmp bx,.x_min
jle .end_rp_line
movaps .lv,xmm4
movaps .n1,xmm0
movaps .n2,xmm1
mov .lx1,eax
mov .lx2,ebx
movlps .z1,xmm3
sub ebx,eax
cvtsi2ss xmm7,ebx
shufps xmm7,xmm7,0
subps xmm1,xmm0
divps xmm1,xmm7
movaps .dn,xmm1
psrldq xmm3,4
subss xmm3,.z1
divss xmm3,xmm7
movss .dz,xmm3
mov ebx,.lx1
cmp bx,.x_min ; clipping on function4
jge @f
movzx eax,word .x_min
sub eax,ebx
cvtsi2ss xmm7,eax
shufps xmm7,xmm7,0
mulss xmm3,xmm7
mulps xmm1,xmm7
addss xmm3,.z1
addps xmm1,.n1
movsx eax,word .x_min
movss .z1,xmm3
movaps .n1,xmm1
mov dword .lx1,eax
@@:
movzx eax,word .x_max
cmp .lx2,eax
jl @f
mov .lx2,eax
@@:
movzx eax,word[size_x_var]
mul dword .y
; mov edx,.x1
add eax,.lx1
shl eax,2
add edi,eax
add esi,eax
mov ecx,.lx2
sub ecx,.lx1
movaps xmm0,.n1
movss xmm2,.z1
align 16
.ddraw:
movss xmm7,xmm2
cmpnltss xmm7,dword[esi]
movd eax,xmm7
or eax,eax
jnz .skip
movss [esi],xmm2
movaps xmm7,xmm0
mulps xmm7,xmm7 ; normalize
haddps xmm7,xmm7
haddps xmm7,xmm7
rsqrtps xmm7,xmm7
mulps xmm7,xmm0
movaps .cnv,xmm7
mov edx,lights_aligned ; lights - global variable
xorps xmm1,xmm1 ; instead global can be used .lv - light vect.
@@:
movaps xmm6,[edx+16]
movaps xmm5,[edx]
movaps xmm3,[edx+48]
andps xmm5,[zero_hgst_dd] ; global
mulps xmm5,.cnv ;.lv ; last dword should be zeroed
haddps xmm5,xmm5
haddps xmm5,xmm5
; mulps xmm5,[env_const2]
; maxps xmm5,[dot_min]
; minps xmm5,[dot_max]
movaps xmm7,xmm5
; mulps xmm7,[env_const2]
; mulps xmm7,[env_const2]
; maxps xmm7,[dot_min]
; minps xmm7,[dot_max]
mulps xmm7,xmm7
mulps xmm7,xmm7
mulps xmm5,xmm6
mulps xmm7,xmm7
mulps xmm7,xmm3
addps xmm5,xmm7
minps xmm5,[mask_255f] ; global
maxps xmm1,xmm5
; movq xmm3,[edx+20] ; minimal color
; punpcklwd xmm3,[minimum0]
; cvtdq2ps xmm3,xmm3
; maxps xmm1,xmm3
add edx,64
cmp edx,lights_aligned_end ; global
jnz @b
cvtps2dq xmm1,xmm1
packssdw xmm1,xmm1
packuswb xmm1,xmm1
movd [edi],xmm1
.skip:
add edi,4
add esi,4
addps xmm0,.dn
addss xmm2,.dz
sub ecx,1
jnz .ddraw
.end_rp_line:
add esp,160
pop ebp
ret

View File

@ -76,11 +76,20 @@ end if
mul edx mul edx
; shl eax,9 ; shl eax,9
add eax,dword .x add eax,dword .x
lea ebx,[eax*3] lea ebx,[eax*3]
cmp [dr_flag],12 ; 32 bit col cause
jne @f
add ebx,eax
@@:
mov eax,[esi] mov eax,[esi]
mov [edi+ebx],eax mov [edi+ebx],eax
.skip: .skip:
add esi,3 add esi,3
cmp [dr_flag],12
jne @f
inc esi
@@:
inc dword .x inc dword .x
movzx edx,word[size_x_var] movzx edx,word[size_x_var]
cmp dword .x,edx ;SIZE_X cmp dword .x,edx ;SIZE_X
@ -97,9 +106,12 @@ end if
movzx ecx,word[size_x_var] movzx ecx,word[size_x_var]
movzx eax,word[size_y_var] movzx eax,word[size_y_var]
imul ecx,eax imul ecx,eax
cmp [dr_flag],12
je @f
lea ecx,[ecx*3] lea ecx,[ecx*3]
shr ecx,2 shr ecx,2
; mov ecx,SIZE_X*SIZE_Y*3/4 ; mov ecx,SIZE_X*SIZE_Y*3/4
@@:
cld cld
rep movsd rep movsd
@ -156,23 +168,42 @@ if Ext >= SSE2
sub ecx,ebx sub ecx,ebx
mov esi,[screen_ptr] mov esi,[screen_ptr]
mov edi,[Zbuffer_ptr] mov edi,[Zbuffer_ptr]
cmp [dr_flag],12
je @f
lea ebx,[ebx*3] lea ebx,[ebx*3]
jmp .f
@@:
shl ebx,2
.f:
mov edx,esi mov edx,esi
add esi,ebx add esi,ebx
lea ebx,[ebx+esi] lea ebx,[ebx+esi]
pxor xmm0,xmm0 pxor xmm0,xmm0
push eax push eax
@@: .emb:
cmp [dr_flag],12
je @f
movlps xmm1,[esi+3] movlps xmm1,[esi+3]
movhps xmm1,[esi+6] movhps xmm1,[esi+6]
punpcklbw xmm1,xmm0
movlps xmm2,[esi-3] movlps xmm2,[esi-3]
movhps xmm2,[esi] movhps xmm2,[esi]
punpcklbw xmm2,xmm0
movlps xmm3,[ebx] movlps xmm3,[ebx]
movhps xmm3,[ebx+3] movhps xmm3,[ebx+3]
movlps xmm4,[edx] movlps xmm4,[edx]
movhps xmm4,[edx+3] movhps xmm4,[edx+3]
jmp .ff
@@:
movlps xmm1,[esi+4]
movhps xmm1,[esi+8]
movlps xmm2,[esi-4]
movhps xmm2,[esi]
movlps xmm3,[ebx]
movhps xmm3,[ebx+4]
movlps xmm4,[edx]
movhps xmm4,[edx+4]
.ff:
punpcklbw xmm1,xmm0
punpcklbw xmm2,xmm0
punpcklbw xmm3,xmm0 punpcklbw xmm3,xmm0
punpcklbw xmm4,xmm0 punpcklbw xmm4,xmm0
psubsw xmm1,xmm2 psubsw xmm1,xmm2
@ -199,7 +230,12 @@ end if
movd eax,xmm1 movd eax,xmm1
movzx eax,al movzx eax,al
; cmp [dr_flag],12
; je @f
lea eax,[eax*3+envmap_cub] lea eax,[eax*3+envmap_cub]
; jmp .fff
;@@:
mov eax,[eax] mov eax,[eax]
mov [edi],eax ;xmm1 mov [edi],eax ;xmm1
psrldq xmm1,8 psrldq xmm1,8
@ -209,23 +245,35 @@ end if
mov eax,[eax] mov eax,[eax]
mov [edi+4],eax mov [edi+4],eax
cmp [dr_flag],12
jne @f
add esi,2
add ebx,2
add edx,2
@@:
add edi,8 add edi,8
add esi,6 add esi,6
add ebx,6 add ebx,6
add edx,6 add edx,6
sub ecx,2 sub ecx,2
jnc @b jnc .emb
pop ecx ;,eax pop ecx ;,eax
mov edi,[screen_ptr] mov edi,[screen_ptr]
mov esi,[Zbuffer_ptr] mov esi,[Zbuffer_ptr]
cmp [dr_flag],12
je .e
@@: @@:
movsd movsd
dec edi dec edi
loop @b loop @b
.e:
rep movsd
end if end if
ret ret
;align 16 ;align 16

View File

@ -749,10 +749,13 @@ if Ext>=SSE2
push ecx push ecx
mov edi,[screen_ptr] mov edi,[screen_ptr]
movzx ecx,word[size_x_var] ;SIZE_X*3/4 movzx ecx,word[size_x_var] ;SIZE_X*3/4
cmp [dr_flag],12
je @f
lea ecx,[ecx*3+1] lea ecx,[ecx*3+1]
shr ecx,2 shr ecx,2
@@:
; mov ecx,SIZE_X*3/4
xor eax,eax xor eax,eax
rep stosd rep stosd
if 1 if 1
@ -760,9 +763,16 @@ if Ext>=SSE2
movzx ecx,word[size_y_var] movzx ecx,word[size_y_var]
sub ecx,3 sub ecx,3
imul ecx,ebx imul ecx,ebx
cmp [dr_flag],12 ; 32 bit per pix cause
je @f
lea ecx,[ecx*3] lea ecx,[ecx*3]
shr ecx,4 shr ecx,4
lea ebx,[ebx *3] lea ebx,[ebx *3]
jmp .blr
@@:
shr ecx,2
shl ebx,2
; mov ecx,(SIZE_X*(SIZE_Y-3))*3/16 ; mov ecx,(SIZE_X*(SIZE_Y-3))*3/16
.blr: .blr:
@@: @@:
@ -771,15 +781,19 @@ if Ext>=SSE2
mov ecx,edi mov ecx,edi
sub ecx,ebx sub ecx,ebx
movups xmm1,[ecx] movups xmm1,[ecx]
cmp [dr_flag],12
je @f
movups xmm2,[edi-3] movups xmm2,[edi-3]
movups xmm3,[edi+3] movups xmm3,[edi+3]
jmp .f
@@:
movups xmm2,[edi-4]
movups xmm3,[edi+4]
.f:
pavgb xmm0,xmm1 pavgb xmm0,xmm1
pavgb xmm2,xmm3 pavgb xmm2,xmm3
pavgb xmm0,xmm2 pavgb xmm0,xmm2
psubusb xmm0,xmm5 ; importand if fire psubusb xmm0,xmm5 ; importand if fire
movups [edi],xmm0 movups [edi],xmm0
add edi,16 add edi,16
add esi,16 add esi,16
@ -788,12 +802,16 @@ if Ext>=SSE2
end if end if
xor eax,eax xor eax,eax
movzx ecx,word[size_x_var] movzx ecx,word[size_x_var]
cmp [dr_flag],12
je @f
lea ecx,[ecx*3] lea ecx,[ecx*3]
shr ecx,2 shr ecx,2
@@:
; mov ecx,SIZE_X*3/4 ; mov ecx,SIZE_X*3/4
rep stosd rep stosd
pop ecx pop ecx
loop .again_blur dec ecx
jnz .again_blur
mov esp,ebp mov esp,ebp
pop ebp pop ebp
end if end if

View File

@ -1,6 +1,7 @@
; DATA AREA ************************************ ; DATA AREA ************************************
i3 dw 3 i3 dw 3
i12 dd 12
i256 dw 256 i256 dw 256
i255d dd 255 i255d dd 255
dot_max dd 1.0 ; dot product max and min dot_max dd 1.0 ; dot product max and min
@ -64,6 +65,7 @@
dw 0 dw 0
edit_end_y dw 0 edit_end_y dw 0
mouse_state dd 0 mouse_state dd 0
menu: menu:
db 2 ; button number = index db 2 ; button number = index
db 'rotary ' ; label db 'rotary ' ; label
@ -73,7 +75,11 @@
db 3 db 3
db 'shd. model' db 'shd. model'
if Ext >= SSE3
db 13
else
db 12 db 12
end if
dr_flag db 0 ; 6 - dots dr_flag db 0 ; 6 - dots
dd shd_f dd shd_f
@ -242,6 +248,9 @@ sinus_flag db 0
;; dd color_component_f ;; dd color_component_f
db -1 ; end mark db -1 ; end mark
@ -259,6 +268,7 @@ flags: ; flags description
db 'btex' db 'btex'
db 'cenv' db 'cenv'
db 'grdl' db 'grdl'
db 'rphg'
spd_f: spd_f:
db 'idle' db 'idle'
db 'full' db 'full'
@ -344,7 +354,10 @@ base_vector:
if Ext=SSE2 if Ext=SSE2
db ' (SSE2)' db ' (SSE2)'
end if end if
db ' 0.069b',0 if Ext=SSE3
db ' (SSE3)'
end if
db ' 0.070',0
labellen: labellen:
STRdata db '-1 ' STRdata db '-1 '
@ -420,9 +433,19 @@ lights:
;=============================================== ;===============================================
lightsend: lightsend:
align 16 align 16
emboss_bias: emboss_bias:
dw 128, 128, 128, 128, 128, 128, 128, 128 dw 128, 128, 128, 128, 128, 128, 128, 128
zero_hgst_dd:
dd -1, -1, -1, 0
mask_255f:
times 4 dd 255.0
the_zero:
times 4 dd 0.0
I_END: I_END:
if USE_LFN = 0 if USE_LFN = 0
@ -559,6 +582,11 @@ align 16
tex_points rb points_count * 4 ; bump_map and texture coords tex_points rb points_count * 4 ; bump_map and texture coords
; each point word x, word y ; each point word x, word y
align 16 align 16
lights_aligned:
lights_aligned_end = $ + 16 * 12
rb 16 * 12
if Ext >= SSE2 if Ext >= SSE2
sse_repository rb 1024 sse_repository rb 1024
end if end if
@ -571,6 +599,7 @@ end if
procinfo: procinfo:
rb 1024 ; process info rb 1024 ; process info
I_Param rb 256 I_Param rb 256
memStack rb 4000 ;memory area for stack memStack:
rb 2000
align 16 align 16
screen: screen:

View File

@ -1,3 +1,12 @@
View3ds 0.069 - May 2020
1. KPacked files support by Leency.
2. 32bit vertices indexes and ability to load whole RAM limited objects.
(Above 65535 vertices and triangles), (by me).
3. I switch off painters algotithm mode (depth sorting). In app impelementetion it has
limited vertices count and produce less quality image than Z buffer Catmull algo.
In addition this switch off reduces app size, (by me).
-----------------------------------------------------------------------------------
View3ds 0.068 - XI 2016 View3ds 0.068 - XI 2016
1. Editing option - new 'editor' button. 1. Editing option - new 'editor' button.

View File

@ -1,14 +1,11 @@
View3ds 0.069 - tiny viewer to .3ds and .asc files with several graphics View3ds 0.070 - tiny viewer to .3ds and .asc files with several graphics
effects implementation. effects implementation.
What's new? What's new?
1. KPacked files support by Leency. 1. Some keys support by Leency.
1. 32bit vertices indexes and ability to load whole RAM limited objects. 2. New displaying model - real Phong - real not fake normal vector interpolation, normalising it and calculating
(Above 65535 vertices and triangles), (by me). dot product (one for each light). It requires SSE3. (by me)
2. I switch off painters algotithm mode (depth sorting). In app impelementetion it has
limited vertices count and produce less quality image than Z buffer Catmull algo.
In addition this switch off reduces app size, (by me).
Buttons description: Buttons description:
1. rotary: choosing rotary axle: x, y, x+y. 1. rotary: choosing rotary axle: x, y, x+y.
@ -17,7 +14,7 @@ Buttons description:
pos (position shading depend), dots (app draws only points - nodes of object), pos (position shading depend), dots (app draws only points - nodes of object),
txgrd (texture mapping + smooth shading), 2tex (texture mapping + spherical txgrd (texture mapping + smooth shading), 2tex (texture mapping + spherical
environment mapping), bmap (bump + texture mapping), cenv (cubic environment environment mapping), bmap (bump + texture mapping), cenv (cubic environment
mapping), grdl (Gouraud lines - edges only). mapping), grdl (Gouraud lines - edges only), rphg (real Phong).
3. speed: idle, full. 3. speed: idle, full.
4,5. zoom in, out: no comment. 4,5. zoom in, out: no comment.
6. catmull: disabled 6. catmull: disabled
@ -42,4 +39,4 @@ Buttons description:
is released apply current position. You may also decrease whole handlers count by enable culling (using is released apply current position. You may also decrease whole handlers count by enable culling (using
appropriate button) - some back handlers become hidden. appropriate button) - some back handlers become hidden.
Maciej Guba V 2020 Maciej Guba VII 2020

View File

@ -1,11 +1,11 @@
; application : View3ds ver. 0.069 - tiny .3ds and .asc files viewer ; application : View3ds ver. 0.070 - tiny .3ds and .asc files viewer
; with a few graphics effects demonstration. ; with a few graphics effects demonstration.
; compiler : FASM ; compiler : FASM
; system : KolibriOS ; system : KolibriOS
; author : Macgub aka Maciej Guba ; author : Macgub aka Maciej Guba
; email : macgub3@wp.pl ; email : macgub3@wp.pl
; web : www.macgub.hekko.pl ; web : http://macgub.vxm.pl
; Fell free to use this intro in your own distribution of KolibriOS. ; Fell free to use this intro in your own distribution of KolibriOS.
; Special greetings to KolibriOS team . ; Special greetings to KolibriOS team .
; I hope because my demos Christian Belive will be near to each of You. ; I hope because my demos Christian Belive will be near to each of You.
@ -36,6 +36,7 @@ NON = 0 ; -/ \-
MMX = 1 MMX = 1
SSE = 2 SSE = 2
SSE2 = 3 SSE2 = 3
SSE3 = 4
Ext = SSE2 ;Ext={ NON | MMX | SSE | SSE2 } Ext = SSE2 ;Ext={ NON | MMX | SSE | SSE2 }
; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) ; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features)
@ -54,9 +55,6 @@ use32
START: ; start of execution START: ; start of execution
cld cld
; mov eax,14 ; window size according to cur res ...
; int 0x40
; sub eax,150 shl 16 + 150
mov eax, 500 shl 16 + 600 ; ... or set manually mov eax, 500 shl 16 + 600 ; ... or set manually
mov [size_y_var],ax mov [size_y_var],ax
shr ax,1 shr ax,1
@ -94,15 +92,10 @@ START: ; start of execution
je .gen je .gen
jmp .malloc jmp .malloc
.gen: .gen:
if USE_LFN
mov [triangles_count_var],1000
mov [points_count_var],1000
call alloc_mem_for_tp
end if
call generate_object call generate_object
jmp .opt jmp .opt
.asc: .asc:
mov [triangles_count_var],10000 mov [triangles_count_var],10000 ; to do: read asc header
mov [points_count_var],10000 mov [points_count_var],10000
call alloc_mem_for_tp call alloc_mem_for_tp
call read_asc call read_asc
@ -115,12 +108,14 @@ START: ; start of execution
.opt: .opt:
; call alloc_buffer_mem ; alloc memfor screnn and z buffer
call optimize_object1 ; proc in file b_procs.asm call optimize_object1 ; proc in file b_procs.asm
; set point(0,0,0) in center and calc all coords ; set point(0,0,0) in center and calc all coords
; to be in <-1.0,1.0> ; to be in <-1.0,1.0>
call normalize_all_light_vectors call normalize_all_light_vectors
if Ext >= SSE3
call copy_lights ; to aligned float
end if
call init_triangles_normals2 call init_triangles_normals2
call init_point_normals call init_point_normals
call init_envmap2 call init_envmap2
@ -290,6 +285,9 @@ still:
jne .next_m5 ; 'grd ' 1 jne .next_m5 ; 'grd ' 1
call make_random_lights ; 'env ' 2 call make_random_lights ; 'env ' 2
call normalize_all_light_vectors ; 'bump' 3 call normalize_all_light_vectors ; 'bump' 3
if Ext >= SSE3
call copy_lights
end if
call do_color_buffer ; intit color_map ; 'tex ' 4 call do_color_buffer ; intit color_map ; 'tex ' 4
; cmp [emboss_flag],1 ; 'pos ' 5 ; cmp [emboss_flag],1 ; 'pos ' 5
; je @f ; 'dots' 6 ; je @f ; 'dots' 6
@ -332,13 +330,13 @@ still:
.next_m: .next_m:
cmp ah,18 cmp ah,18
jne .next_m2 jne .next_m2
if USE_LFN
mov [re_alloc_flag],1 ; reallocate memory mov [re_alloc_flag],1 ; reallocate memory
mov [triangles_count_var],1000 mov [triangles_count_var],1000
mov [points_count_var],1000 mov [points_count_var],1000
call alloc_mem_for_tp call alloc_mem_for_tp
mov [re_alloc_flag],0 mov [re_alloc_flag],0
end if
mov bl,[generator_flag] mov bl,[generator_flag]
; or bl,bl ; or bl,bl
; jz .next_m2 ; jz .next_m2
@ -536,21 +534,12 @@ still:
mov ecx,[points_count_var] mov ecx,[points_count_var]
call rotary call rotary
; RDTSC
; pop ebx
; sub eax,ebx
; sub eax,41
; push eax
mov esi,[points_rotated_ptr] mov esi,[points_rotated_ptr]
mov edi,[points_translated_ptr] mov edi,[points_translated_ptr]
mov ecx,[points_count_var] mov ecx,[points_count_var]
call translate_points call translate_points
; cmp [dr_flag],5
; jne @f
; call calc_attenuation_light
; @@:
cmp [fire_flag],0 cmp [fire_flag],0
jne @f jne @f
call clrscr ; clear the screen call clrscr ; clear the screen
@ -568,8 +557,6 @@ still:
@@: @@:
call fill_Z_buffer ; make background call fill_Z_buffer ; make background
.non_f: .non_f:
; RDTSC
; push eax
cmp [dr_flag],6 cmp [dr_flag],6
jne @f jne @f
call draw_dots call draw_dots
@ -742,13 +729,22 @@ end if
loop .dc loop .dc
pop eax pop eax
mov eax,7 ; put image mov eax,7 ; put image
mov ebx,[screen_ptr] mov ebx,[screen_ptr]
mov ecx,[size_y_var] mov ecx,[size_y_var]
; mov ecx,SIZE_X shl 16 + SIZE_Y ; mov ecx,SIZE_X shl 16 + SIZE_Y
mov edx,[offset_y] ;5 shl 16 + 25 mov edx,[offset_y] ;5 shl 16 + 25
cmp [dr_flag],12
je .ff
int 0x40 int 0x40
jmp .f
.ff:
mov eax,65
mov esi,32
xor ebp,ebp
int 0x40
.f:
mov eax,13 mov eax,13
mov bx,[size_x_var] mov bx,[size_x_var]
add ebx,18 add ebx,18
@ -773,8 +769,6 @@ end if
int 40h int 40h
; addsubps xmm0,xmm0
jmp still jmp still
@ -797,6 +791,9 @@ include "BUMP_TEX.INC"
include "GRD_TEX.INC" include "GRD_TEX.INC"
include "TWO_TEX.INC" include "TWO_TEX.INC"
include "ASC.INC" include "ASC.INC"
if Ext >= SSE3
include "3r_phg.inc"
end if
clear_vertices_index: clear_vertices_index:
mov edi,[vertices_index_ptr] mov edi,[vertices_index_ptr]
movzx eax,word[size_x_var] movzx eax,word[size_x_var]
@ -825,9 +822,15 @@ edit: ; mmx required, edit mesh by vertex
movzx edx,word[size_x_var] movzx edx,word[size_x_var]
imul edx,ecx imul edx,ecx
add ebx,edx add ebx,edx
push ebx
lea ecx,[ebx*2] lea ecx,[ebx*2]
lea ebx,[ebx*3] lea ebx,[ebx*3]
cmp [dr_flag],12
jne @f
add ebx,[esp]
@@:
add esp,4
add ebx,[screen_ptr] add ebx,[screen_ptr]
mov ebx,[ebx] mov ebx,[ebx]
and ebx,0x00ffffff and ebx,0x00ffffff
@ -868,18 +871,6 @@ edit: ; mmx required, edit mesh by vertex
; left button pressed ; left button pressed
; macro check_bar
; {
; movzx ebx,word[.x_coord]
; movzx ecx,word[.y_coord]
; imul ebx,ecx
; lea ecx,[ebx*2]
; lea ebx,[ebx*3]
; add ebx,[screen_ptr]
; mov ebx,[ebx]
; and ebx,0x00ffffff
; cmp ebx,0x00ff0000 ; is handle bar ?
; }
check_bar check_bar
jne .no_edit jne .no_edit
@ -985,7 +976,7 @@ alloc_buffer_mem:
mul ecx mul ecx
mov [.temp],eax mov [.temp],eax
lea ecx,[eax*3] lea ecx,[eax*4] ; more mem for r_phg cause
add ecx,256 add ecx,256
mov eax,68 mov eax,68
mov ebx,20 mov ebx,20
@ -1020,28 +1011,6 @@ alloc_buffer_mem:
if 0
;old Menuet style alloc
movzx ecx,word[size_x_var]
movzx eax,word[size_y_var]
add eax,200
mul ecx
lea ecx,[eax*3]
add ecx,16
and ecx,0xfffffff0
push ecx
shl eax,2
add ecx,eax
add ecx,MEM_END
mov ebx,1
mov eax,64 ; allocate mem - resize app mem
int 0x40
mov [screen_ptr],MEM_END
mov [Zbuffer_ptr],MEM_END
pop ecx
add [Zbuffer_ptr],ecx
end if
ret
update_flags: update_flags:
; updates flags and writing flag description ; updates flags and writing flag description
@ -1063,6 +1032,7 @@ update_flags:
add edi,17 add edi,17
cmp byte[edi],-1 cmp byte[edi],-1
jne .ch_another jne .ch_another
jmp .no_write
.write: .write:
; clreol {pascal never dies} ; clreol {pascal never dies}
; * eax = 13 - function number ; * eax = 13 - function number
@ -1447,39 +1417,6 @@ do_color_buffer: ; do color buffer for Gouraud, flat shading
mov esp,ebp mov esp,ebp
pop ebp pop ebp
ret ret
if 0
init_triangles_normals:
mov ebx,triangles_normals
mov ebp,triangles
@@:
push ebx
mov ebx,vectors
mov esi,dword[ebp] ; first point index
lea esi,[esi*3]
lea esi,[points+esi*2] ; esi - pointer to 1st 3d point
movzx edi,dword[ebp+4] ; second point index
lea edi,[edi*3]
lea edi,[points+edi*2] ; edi - pointer to 2nd 3d point
call make_vector
add ebx,12
mov esi,edi
movzx edi,dword[ebp+8] ; third point index
lea edi,[edi*3]
lea edi,[points+edi*2]
call make_vector
mov edi,ebx ; edi - pointer to 2nd vector
mov esi,ebx
sub esi,12 ; esi - pointer to 1st vector
pop ebx
call cross_product
mov edi,ebx
call normalize_vector
add ebp,12
add ebx,12
cmp dword[ebp],-1
jne @b
ret
end if
init_point_normals: init_point_normals:
.x equ dword [ebp-4] .x equ dword [ebp-4]
@ -1599,326 +1536,46 @@ init_triangles_normals2:
; cmp dword[ebp],-1 ; cmp dword[ebp],-1
; jne @b ; jne @b
ret ret
if 0 ; ind 64 but
;================================================================= if Ext >= SSE3
sort_triangles: copy_lights: ; after normalising !
mov esi,[triangles_ptr] mov esi,lights
mov edi,triangles_with_z mov edi,lights_aligned
mov ebp,[points_translated_ptr] mov ecx,3
.again:
make_triangle_with_z: ;makes list with triangles and z position push ecx
movzx eax,word[esi] mov ecx,3
lea eax,[eax*3] cld
movzx ecx,word[ebp+eax*2+4] rep movsd
xor eax,eax
movzx eax,word[esi+2] stosd
lea eax,[eax*3] mov ecx,3
add cx,word[ebp+eax*2+4] .b:
push ecx
movzx eax,word[esi+4] mov ecx,3
lea eax,[eax*3]
add cx,word[ebp+eax*2+4]
mov ax,cx
; cwd
; idiv word[i3]
movsd ; store vertex coordinates
movsw
stosw ; middle vertex coordinate 'z' in triangles_with_z list
cmp dword[esi],-1
jne make_triangle_with_z
movsd ; copy end mark
mov eax,4
lea edx,[edi-8-trizdd]
; lea edx, [edi-8]
; sub edx,[triangles_w_z_ptr]
mov [high],edx
call quicksort
mov eax,4
mov edx,[high]
call insertsort
jmp end_sort
quicksort:
mov ecx,edx
sub ecx,eax
cmp ecx,32
jc .exit
lea ecx,[eax+edx]
shr ecx,4
lea ecx,[ecx*8-4];
; mov edi,[triangles_w_z_ptr]
; mov ebx,[edi+eax]
; mov esi,[edi+ecx]
; mov edi,[edi+edx]
mov ebx,[trizdd+eax]; trizdd[l]
mov esi,[trizdd+ecx]; trizdd[i]
mov edi,[trizdd+edx]; trizdd[h]
cmp ebx,esi
jg @f ; direction NB! you need to negate these to invert the order
if Ext=NON
mov [trizdd+eax],esi
mov [trizdd+ecx],ebx
mov ebx,[trizdd+eax-4]
mov esi,[trizdd+ecx-4]
mov [trizdd+eax-4],esi
mov [trizdd+ecx-4],ebx
mov ebx,[trizdd+eax]
mov esi,[trizdd+ecx]
else
; push ebx
; mov ebx,[triangles_w_z_ptr]
; movq mm0,[ebx+eax-4]
; movq mm1,[ebx+ecx-4]
; movq [ebx+ecx-4],mm0
; movq [ebx+eax-4],mm1
; pop ebx
movq mm0,[trizdq+eax-4]
movq mm1,[trizdq+ecx-4]
movq [trizdq+ecx-4],mm0
movq [trizdq+eax-4],mm1
xchg ebx,esi
end if
@@: @@:
cmp ebx,edi movzx ebx,byte[esi]
jg @f ; direction cvtsi2ss xmm0,ebx
if Ext=NON movss [edi],xmm0
mov [trizdd+eax],edi inc esi
mov [trizdd+edx],ebx add edi,4
mov ebx,[trizdd+eax-4] loop @b
mov edi,[trizdd+edx-4] stosd
mov [trizdd+eax-4],edi pop ecx
mov [trizdd+edx-4],ebx loop .b
mov ebx,[trizdd+eax] inc esi ; skip shiness
mov edi,[trizdd+edx] pop ecx
else loop .again
; push ebx
; mov ebx,[triangles_w_z_ptr]
; movq mm0,[ebx+eax-4]
; movq mm1,[ebx+edx-4]
; movq [ebx+edx-4],mm0
; movq [ebx+eax-4],mm1
movq mm0,[trizdq+eax-4]
movq mm1,[trizdq+edx-4]
movq [trizdq+edx-4],mm0
movq [trizdq+eax-4],mm1
; pop ebx
xchg ebx,edi
end if
@@:
cmp esi,edi
jg @f ; direction
if Ext=NON
mov [trizdd+ecx],edi
mov [trizdd+edx],esi
mov esi,[trizdd+ecx-4]
mov edi,[trizdd+edx-4]
mov [trizdd+ecx-4],edi
mov [trizdd+edx-4],esi
else
; push ebx
; mov ebx,[triangles_w_z_ptr]
; movq mm0,[ebx+ecx-4]
; movq mm1,[ebx+edx-4]
; movq [ebx+edx-4],mm0
; movq [ebx+ecx-4],mm1
; pop ebx
movq mm0,[trizdq+ecx-4]
movq mm1,[trizdq+edx-4]
movq [trizdq+edx-4],mm0
movq [trizdq+ecx-4],mm1
xchg ebx,esi
end if
@@:
mov ebp,eax ; direction
add ebp,8 ; j
if Ext=NON
mov esi,[trizdd+ebp]
mov edi,[trizdd+ecx]
mov [trizdd+ebp],edi
mov [trizdd+ecx],esi
mov esi,[trizdd+ebp-4]
mov edi,[trizdd+ecx-4]
mov [trizdd+ecx-4],esi
mov [trizdd+ebp-4],edi
else
; push ebx
; mov ebx,[triangles_w_z_ptr]
; movq mm0,[ebx+ebp-4]
; movq mm1,[ebx+ecx-4]
; movq [ebx+ecx-4],mm0
; movq [ebx+ebp-4],mm1
; pop ebx
movq mm0,[trizdq+ebp-4]
movq mm1,[trizdq+ecx-4]
movq [trizdq+ecx-4],mm0
movq [trizdq+ebp-4],mm1
end if
mov ecx,edx ; i; direction
mov ebx,[trizdd+ebp]; trizdd[j]
; mov ebx, [triangles_w_z_ptr]
; add ebx, ebp
; push eax
; mov eax, [triangles_w_z_ptr]
.loop:
sub ecx,8 ; direction
cmp [trizdd+ecx],ebx
; cmp [eax+ecx],ebx
jl .loop ; direction
@@:
add ebp,8 ; direction
cmp [trizdd+ebp],ebx
; cmp [eax+ebp],ebx
jg @b ; direction
cmp ebp,ecx
jge @f ; direction
if Ext=NON
mov esi,[trizdd+ecx]
mov edi,[trizdd+ebp]
mov [trizdd+ebp],esi
mov [trizdd+ecx],edi
mov edi,[trizdd+ecx-4]
mov esi,[trizdd+ebp-4]
mov [trizdd+ebp-4],edi
mov [trizdd+ecx-4],esi
else
; movq mm0,[eax+ecx-4]
; movq mm1,[eax+ebp-4]
; movq [eax+ebp-4],mm0
; movq [eax+ecx-4],mm1
movq mm0,[trizdq+ecx-4]
movq mm1,[trizdq+ebp-4]
movq [trizdq+ebp-4],mm0
movq [trizdq+ecx-4],mm1
end if
jmp .loop
; pop eax
@@:
if Ext=NON
mov esi,[trizdd+ecx]
mov edi,[trizdd+eax+8]
mov [trizdd+eax+8],esi
mov [trizdd+ecx],edi
mov edi,[trizdd+ecx-4]
mov esi,[trizdd+eax+4]
mov [trizdd+eax+4],edi
mov [trizdd+ecx-4],esi
else
; push edx
; mov edx,[triangles_w_z_ptr]
; movq mm0,[edx+ecx-4]
; movq mm1,[edx+eax+4]; dir
; movq [edx+eax+4],mm0; dir
; movq [edx+ecx-4],mm1
; pop edx
movq mm0,[trizdq+ecx-4]
movq mm1,[trizdq+eax+4]; dir
movq [trizdq+eax+4],mm0; dir
movq [trizdq+ecx-4],mm1
end if
add ecx,8
push ecx edx
mov edx,ebp
call quicksort
pop edx eax
call quicksort
.exit:
ret ret
insertsort:
mov esi,eax
.start:
add esi,8
cmp esi,edx
ja .exit
mov ebx,[trizdd+esi]
; mov ebx,[triangles_w_z_ptr]
; add ebx,esi
if Ext=NON
mov ecx,[trizdd+esi-4]
else
; push ebx
; mov ebx,[triangles_w_z_ptr]
; movq mm1,[ebx+esi-4]
movq mm1,[trizdq+esi-4]
; pop ebx
end if end if
mov edi,esi
@@:
cmp edi,eax
jna @f
; push eax
; mov eax,[triangles_w_z_ptr]
; cmp [eax+edi-8],ebx
; pop eax
cmp [trizdd+edi-8],ebx
jg @f ; direction
if Ext=NON
mov ebp,[trizdd+edi-8]
mov [trizdd+edi],ebp
mov ebp,[trizdd+edi-12]
mov [trizdd+edi-4],ebp
else
; push eax
; mov eax,[triangles_w_z_ptr]
; movq mm0,[eax+edi-12]
; movq [eax+edi-4],mm0
movq mm0,[trizdq+edi-12]
movq [trizdq+edi-4],mm0
; pop eax
end if
sub edi,8
jmp @b
@@:
if Ext=NON
mov [trizdd+edi],ebx
mov [trizdd+edi-4],ecx
else
; push eax
; mov eax,[triangles_w_z_ptr]
; movq [eax+edi-4],mm1
movq [trizdq+edi-4],mm1
; pop eax
end if
jmp .start
.exit:
ret
end_sort:
; translate triangles_with_z to sorted_triangles
mov esi,triangles_with_z
; mov esi,[triangles_w_z_ptr]
; mov edi,sorted_triangles
mov edi,[triangles_ptr]
again_copy:
if Ext=NON
movsd
movsw
add esi,2
else
movq mm0,[esi]
movq [edi],mm0
add esi,8
add edi,6
end if
cmp dword[esi],-1
jne again_copy
; if Ext=MMX
; emms
; end if
movsd ; copy end mark too
ret
end if ; 64 ind
clrscr: clrscr:
mov edi,[screen_ptr] mov edi,[screen_ptr]
movzx ecx,word[size_x_var] movzx ecx,word[size_x_var]
movzx eax,word[size_y_var] movzx eax,word[size_y_var]
imul ecx,eax imul ecx,eax
lea ecx,[ecx*3]
shr ecx,2
xor eax,eax xor eax,eax
if Ext=NON if Ext=NON
rep stosd rep stosd
@ -2130,7 +1787,10 @@ end if
je .cubic_env_mapping je .cubic_env_mapping
cmp [dr_flag],11 cmp [dr_flag],11
je .draw_smooth_line je .draw_smooth_line
; **************** if Ext >= SSE3
cmp [dr_flag],12
je .r_phg
end if ; ****************
mov esi,point_index3 ; do Gouraud shading mov esi,point_index3 ; do Gouraud shading
mov ecx,3 mov ecx,3
.again_grd_draw: .again_grd_draw:
@ -2953,7 +2613,72 @@ end if
push [xx2] push [xx2]
call smooth_line call smooth_line
jmp .end_draw
@@: @@:
if Ext >= SSE3
.r_phg:
movd xmm5,[size_y_var]
punpcklwd xmm5,[the_zero]
pshufd xmm5,xmm5,01110011b
mov eax,[point_index1]
mov ebx,[point_index2]
mov ecx,[point_index3]
imul eax,[i12]
imul ebx,[i12]
imul ecx,[i12]
add eax,[points_normals_rot_ptr]
add ebx,[points_normals_rot_ptr]
add ecx,[points_normals_rot_ptr]
movups xmm0,[eax]
movups xmm1,[ebx]
movups xmm2,[ecx]
andps xmm0,[zero_hgst_dd]
andps xmm1,[zero_hgst_dd]
andps xmm2,[zero_hgst_dd]
xorps xmm3,xmm3
mov eax,[point_index1]
mov ebx,[point_index2]
mov ecx,[point_index3]
imul eax,[i12]
imul ebx,[i12]
imul ecx,[i12]
add eax,[points_rotated_ptr]
add ebx,[points_rotated_ptr]
add ecx,[points_rotated_ptr]
push dword[ecx+8]
push dword[ebx+8]
push dword[eax+8]
movups xmm4,[esp]
add esp,12
andps xmm4,[zero_hgst_dd]
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov edi,[screen_ptr]
mov esi,[Zbuffer_ptr]
call real_phong_tri_z
jmp .end_draw
end if
.end_draw: .end_draw:
pop esi pop esi
@ -2990,12 +2715,25 @@ draw_handlers:
push dword 0 push dword 0
movzx eax,word[size_x_var] movzx eax,word[size_x_var]
cmp [dr_flag],12
je @f
lea ebx,[eax*3] lea ebx,[eax*3]
sub ebx,18 sub ebx,18
add eax,eax add eax,eax
sub eax,12 sub eax,12
mov [.xres3m18],ebx mov [.xres3m18],ebx
mov [.xres2m12],eax mov [.xres2m12],eax
jmp .f
@@:
lea ebx,[eax*4]
sub ebx,4*6
add eax,eax
sub eax,3*4
mov [.xres3m18],ebx
mov [.xres2m12],eax
.f:
mov esi,[points_translated_ptr] mov esi,[points_translated_ptr]
.loop: .loop:
@ -3033,7 +2771,13 @@ draw_handlers:
; sub eax,3 ; sub eax,3
imul eax,edx imul eax,edx
add eax,ebx add eax,ebx
push eax
lea edi,[eax*3] lea edi,[eax*3]
cmp [dr_flag],12
jne @f
add edi,[esp]
@@:
add esp,4
lea eax,[eax*2] lea eax,[eax*2]
; draw bar 6x6 ; draw bar 6x6
add edi,[screen_ptr] add edi,[screen_ptr]
@ -3049,13 +2793,20 @@ draw_handlers:
push ecx push ecx
mov ecx,6 mov ecx,6
@@: .do:
mov word[edi],0x0000 ;ax mov word[edi],0x0000 ;ax
mov byte[edi+2],0xff ;al mov byte[edi+2],0xff ;al
mov word[eax],dx mov word[eax],dx
add eax,2 add eax,2
cmp [dr_flag],12
jne @f
add edi,4
loop .do
jmp .ad
@@:
add edi,3 add edi,3
loop @b loop .do
.ad:
add edi,[.xres3m18] add edi,[.xres3m18]
add eax,[.xres2m12] add eax,[.xres2m12]
pop ecx pop ecx
@ -3077,6 +2828,10 @@ ret
fill_Z_buffer: fill_Z_buffer:
mov eax,0x70000000 mov eax,0x70000000
cmp [dr_flag],12
jne @f
mov eax,60000.1
@@:
mov edi,[Zbuffer_ptr] mov edi,[Zbuffer_ptr]
movzx ecx,word[size_x_var] movzx ecx,word[size_x_var]
movzx ebx,word[size_y_var] movzx ebx,word[size_y_var]