Update to ver 077 - edit subbmit fixed, fire bug fixed, see readme.txt for details.

git-svn-id: svn://kolibrios.org@9740 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
macgub 2022-03-14 17:02:40 +00:00
parent d9a3ea763b
commit 92b749efe6
12 changed files with 901 additions and 718 deletions

View File

@ -600,8 +600,9 @@ else
add esi,12 add esi,12
add edi,12 add edi,12
dec ecx ; dec ecx
jne .again ; jne .again
loop .again
mov [edi],dword -1 mov [edi],dword -1
end if end if
ret ret
@ -695,7 +696,7 @@ translate_points: ; just convert into integer; z coord still needed
fiadd [vect_y] fiadd [vect_y]
fistp word[edi+2] fistp word[edi+2]
end if end if
if Ext>=SSE if Ext>=SSE2
movups xmm0,[esi] movups xmm0,[esi]
cvtps2dq xmm0,xmm0 cvtps2dq xmm0,xmm0
packssdw xmm0,xmm0 packssdw xmm0,xmm0
@ -722,8 +723,6 @@ translate_points: ; just convert into integer; z coord still needed
add esi,12 add esi,12
add edi,6 add edi,6
; dec ecx
; jnz .again
loop .again loop .again
ret ret

View File

@ -344,7 +344,7 @@ end if
pop ebp pop ebp
ret ret
align 16
glass_line: glass_line:
; in: ; in:
; xmm0 - normal vector 1 ; xmm0 - normal vector 1
@ -362,7 +362,7 @@ glass_line:
push ebp push ebp
mov ebp,esp mov ebp,esp
sub esp,256 sub esp,190
sub ebp,16 sub ebp,16
and ebp,0xfffffff0 and ebp,0xfffffff0
@ -537,7 +537,7 @@ align 16
jnz .ddraw jnz .ddraw
.end_rp_line: .end_rp_line:
add esp,256 add esp,190
pop ebp pop ebp
ret ret

View File

@ -452,7 +452,7 @@ end if
pop ebp pop ebp
ret ret
align 16
glass_tex_line: glass_tex_line:
; in: ; in:
; xmm0 - normal vector 1 ; xmm0 - normal vector 1
@ -747,7 +747,6 @@ end if
.skip: .skip:
add edi,4 add edi,4
add esi,4 add esi,4
; addps xmm0,.dn
movaps xmm0,.n1 ; cur normal movaps xmm0,.n1 ; cur normal
addps xmm0,.dn addps xmm0,.dn
addps xmm2,.dtx addps xmm2,.dtx

View File

@ -372,7 +372,7 @@ ray_shd_l:
push ebp push ebp
mov ebp,esp mov ebp,esp
sub esp,320 sub esp,270
sub ebp,16 sub ebp,16
and ebp,0xfffffff0 and ebp,0xfffffff0
@ -421,6 +421,7 @@ ray_shd_l:
shufps xmm3,xmm3,11100001b shufps xmm3,xmm3,11100001b
@@: @@:
movd .cur_tri,mm7 movd .cur_tri,mm7
; sub .cur_tri,dword 1
cmp ax,.x_max cmp ax,.x_max
jge .end_rp_line jge .end_rp_line
cmp bx,.x_min cmp bx,.x_min
@ -689,7 +690,7 @@ end if
dec ecx dec ecx
jnz .ddraw jnz .ddraw
.end_rp_line: .end_rp_line:
add esp,320 add esp,270
pop ebp pop ebp
ret ret

View File

@ -62,20 +62,6 @@ ret
if Ext > SSE2 if Ext > SSE2
;-------------------------------------------------------------------- ;--------------------------------------------------------------------
init_point_lights: init_point_lights:
; mov eax,1000
; cvtsi2ss xmm1,eax
; shufps xmm1,xmm1,11000000b
; mov esi,lights_aligned
; mov edi,point_light_coords
; mov ecx,3
; @@:
; movaps xmm0,[esi]
; addps xmm0,[f05xz]
; mulps xmm0,xmm1
; movaps [edi],xmm0
; add esi,64
; add edi,16
; loop @b
mov ecx,3 mov ecx,3
mov edi,point_light_coords mov edi,point_light_coords
@@: @@:
@ -90,16 +76,11 @@ init_point_lights:
call random call random
cvtsi2ss xmm0,eax cvtsi2ss xmm0,eax
movss [edi+4],xmm0 movss [edi+4],xmm0
; movzx ebx,word[size_x_var]
; shl ebx,2
; neg ebx
mov ecx,-1900 mov ecx,-1900
; sub ecx,100
mov edx,-600 mov edx,-600
call random call random
cvtsi2ss xmm0,eax cvtsi2ss xmm0,eax
movss [edi+8],xmm0 movss [edi+8],xmm0
; mov dword[edi+8],-1700.0
mov [edi+12],dword 0 mov [edi+12],dword 0
add edi,16 add edi,16
pop ecx pop ecx
@ -174,7 +155,7 @@ intersect_tri: ; Moeller-Trumbore method
; or eax,eax ; or eax,eax
; jz @f ; jz @f
comiss xmm0,[eps] comiss xmm0,[eps]
jl @f jb @f
rcpss xmm0,.det rcpss xmm0,.det
movss .invdet,xmm0 movss .invdet,xmm0
@ -228,7 +209,7 @@ intersect_tri: ; Moeller-Trumbore method
; test eax,1 ; test eax,1
; jz @f ; jz @f
comiss xmm1,[eps] comiss xmm1,[eps]
jl @f jb @f
mov eax,1 mov eax,1
cmp .ift,0 cmp .ift,0
@ -264,6 +245,16 @@ do_edges_list:
.edd_ptr equ [ebp-8] .edd_ptr equ [ebp-8]
.counter equ [ebp-12] .counter equ [ebp-12]
mov ebx, 12
mov eax, 68
mov ecx,[triangles_count_var]
lea ecx,[ecx*3]
shl ecx,4
add ecx,1024
mov edx,[edges_ptr]
int 0x40 ; -> allocate memory to edges
mov [edges_ptr], eax ; -> eax = pointer to allocated mem
mov ebx,[edges_ptr] mov ebx,[edges_ptr]
mov eax,[triangles_ptr] mov eax,[triangles_ptr]
@ -280,17 +271,18 @@ do_edges_list:
loop @b loop @b
mov ebx,[edges_ptr] mov ebx,[edges_ptr]
mov ecx,[triangles_count_var] mov ecx,[triangles_count_var]
lea ecx,[ecx*3] lea ecx,[ecx*3]
.mxd: .mxd:
mov eax,[ebx] mov eax,[ebx]
mov edx,[ebx+4]
cmp eax,[ebx+4] cmp eax,[ebx+4]
jl @f cmovg eax,edx
movq xmm0,[ebx] cmovg edx,[ebx]
pshufd xmm0,xmm0,11100001b mov [ebx],eax
movq [ebx],xmm0 mov [ebx+4],edx
@@:
add ebx,8 add ebx,8
loop .mxd loop .mxd
@ -303,20 +295,20 @@ do_edges_list:
mov esi,ecx mov esi,ecx
shl esi,3 shl esi,3
add esi,ebx add esi,ebx
dec ecx
.ccc: .ccc:
mov eax,[ebx+8] mov eax,[ebx+8]
cmp eax,[ebx] cmp eax,[ebx]
jge .g jae .g
movq xmm0,[ebx+8] movq xmm0,[ebx+8]
push ebx push ebx
.c: .c:
cmp ebx,esi cmp ebx,esi
jge .done jae .done
cmp ebx,[edges_ptr] cmp ebx,[edges_ptr]
jl .done jb .done
cmp eax,[ebx] cmp eax,[ebx]
jge .done jae .done
movq xmm7,[ebx] movq xmm7,[ebx]
movq [ebx+8],xmm7 movq [ebx+8],xmm7
sub ebx,8 sub ebx,8
@ -328,10 +320,7 @@ do_edges_list:
pop ebx pop ebx
.g: .g:
add ebx,8 add ebx,8
dec ecx loop .ccc
cmp ecx,1
jnz .ccc
; insert sort again ; insert sort again
mov ebx,[edges_ptr] mov ebx,[edges_ptr]
@ -350,7 +339,7 @@ do_edges_list:
inc ecx inc ecx
add ebx,8 add ebx,8
cmp ebx,esi cmp ebx,esi
jge .br ; break jae .br ; break
cmp eax,[ebx] cmp eax,[ebx]
je .aa je .aa
mov .counter,ecx mov .counter,ecx
@ -368,12 +357,12 @@ do_edges_list:
mov eax,[ebx+12] mov eax,[ebx+12]
mov edx,[ebx+8] mov edx,[ebx+8]
cmp eax,[ebx+4] cmp eax,[ebx+4]
jge .gg2 jae .gg2
movq xmm0,[ebx+8] movq xmm0,[ebx+8]
push ebx push ebx
.c2: .c2:
cmp eax,[ebx+4] cmp eax,[ebx+4]
jge .done2 jae .done2
movq xmm7,[ebx] movq xmm7,[ebx]
movq [ebx+8],xmm7 movq [ebx+8],xmm7
@ -405,60 +394,68 @@ do_edges_list:
add esp,8 add esp,8
.ff: .ff:
; count edges ; count edges
mov ecx,0
mov edx,[triangles_count_var] mov ecx,[triangles_count_var]
lea edx,[edx*3] lea ecx,[ecx*3+3]
mov ebx,[edges_ptr] mov esi,[edges_ptr]
; mov esi,edx xor edx,edx
; shl esi,3 cld
; add esi,[edges_ptr]
.nx: .nx:
movq xmm0,[ebx] lodsd
add ebx,8 mov ebx,eax
; cmp ebx,esi lodsd
; jae @f cmp ebx,[esi]
movq xmm1,[ebx] jnz .ic
; @@: cmp eax,[esi+4]
pcmpeqd xmm0,xmm1 jnz .ic
pmovmskb eax,xmm0 loop .nx
and eax,0xff jmp .endc
cmp eax,0xff .ic:
jz @f
inc ecx inc edx
@@: loop .nx
dec edx .endc:
jnz .nx mov .ed_cnt,edx
mov ecx,edx
mov .ed_cnt,ecx shl ecx,3
lea ecx,[ecx*3]
shl ecx,2
add ecx,65536 add ecx,65536
mov ebx,12 mov ebx,12
mov eax,68 mov eax,68
mov edx,.edd_ptr mov edx,.edd_ptr
int 0x40 ; -> allocate memory to triangles int 0x40 ; -> allocate memory to new edges
mov .edd_ptr, eax ; -> eax = pointer to allocated mem mov .edd_ptr, eax ; -> eax = pointer to allocated mem
mov ebx,[edges_ptr]
mov ecx,[triangles_count_var] mov ecx,[triangles_count_var]
lea ecx,[ecx*3] lea ecx,[ecx*3]
.seek: add ecx,ecx
movq xmm0,[ebx] mov esi,[edges_ptr]
movq xmm1,[ebx+8] mov edi,eax
pcmpeqd xmm1,xmm0 xor edx,edx
pmovmskb edx,xmm1 cld
and edx,0xff .nx1:
cmp edx,0xff lodsd
je @f mov ebx,eax
movq [eax],xmm0 lodsd
add eax,8 cmp ebx,[esi]
@@: jnz .ic1
add ebx,8 cmp eax,[esi+4]
loop .seek jnz .ic1
loop .nx1
jmp .endc1
.ic1:
xchg eax,ebx
stosd
mov eax,ebx
stosd
inc edx
loop .nx1
.endc1:
mov eax,68 mov eax,68
mov ebx,13 mov ebx,13
@ -595,8 +592,8 @@ draw_dots:
mov edi,[screen_ptr] mov edi,[screen_ptr]
lea eax,[eax*3] lea eax,[eax*3]
add edi,eax add edi,eax
xor eax,eax or eax,-1
not eax ; not eax
stosd stosd
@@: @@:
loop .drw loop .drw

View File

@ -707,7 +707,7 @@ if Ext >= SSE2
movups .cty2,xmm3 movups .cty2,xmm3
end if end if
if (Ext = MMX) if (Ext = MMX)| (Ext = SSE)
movq mm0,.cby2 movq mm0,.cby2
movq mm1,.cby1 movq mm1,.cby1
movq mm2,.cey2 movq mm2,.cey2
@ -843,7 +843,7 @@ if Ext >= SSE2
end if end if
if (Ext = MMX) if (Ext = MMX)| (Ext = SSE)
movq mm0,.cby2 movq mm0,.cby2
movq mm1,.cby1 movq mm1,.cby1
movq mm2,.cey2 movq mm2,.cey2
@ -1469,13 +1469,3 @@ end if
.bl_end: .bl_end:
mov esp,ebp mov esp,ebp
ret 76 ret 76
;Ext = MMX
; else
; movq mm5, qword[.temp1] ;-
; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X
; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE
; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap
; movd ebx,mm5
; psrlq mm5,32
; end if

View File

@ -76,14 +76,19 @@ detect_chunks:
mov .chmr,eax ; chunks mark if bit is set - tri was used mov .chmr,eax ; chunks mark if bit is set - tri was used
mov edi,eax mov edi,eax
pxor xmm0,xmm0 ; pxor xmm0,xmm0
mov ecx,[triangles_count_var] mov ecx,[triangles_count_var]
shr ecx,7 shr ecx,5
inc ecx inc ecx
@@: xor eax,eax
movdqa [edi],xmm0 cld
add edi,16 rep stosd
loop @b ; shr ecx,7
; inc ecx
; @@:
; movdqa [edi],xmm0
; add edi,16
; loop @b
mov eax,[points_count_var] mov eax,[points_count_var]
@ -293,6 +298,7 @@ detect_chunks:
mov .up,esi mov .up,esi
mov .str,edi mov .str,edi
; mov edi,.tri_ch1
.lb1: ; nx chunk .lb1: ; nx chunk
cmp edi,.ltch1 cmp edi,.ltch1
jnb .endl jnb .endl
@ -399,7 +405,7 @@ detect_chunks:
; mov ebx,.chunks mov ebx,.chunks
mov ecx,.ch_cnt mov ecx,.ch_cnt
mov esi,.tri_ch mov esi,.tri_ch

View File

@ -357,7 +357,7 @@ base_vector:
if Ext=SSE3 if Ext=SSE3
db ' (SSE3)' db ' (SSE3)'
end if end if
db ' 0.076',0 db ' 0.077',0
labellen: labellen:
STRdata db '-1 ' STRdata db '-1 '
lab_vert: lab_vert:
@ -488,7 +488,7 @@ end if
the_one: the_one:
times 4 dd 1.0 times 4 dd 1.0
eps: times 4 dd 0.00000 eps: times 4 dd 0.000001
vect_x: dw SIZE_X / 2 vect_x: dw SIZE_X / 2
vect_y dw SIZE_Y / 2 vect_y dw SIZE_Y / 2
@ -500,9 +500,9 @@ end if
xres_var dw SIZE_X xres_var dw SIZE_X
epsone dd 1.0001 epsone dd 1.00001
aprox dd 0.0001 aprox dd 0.0001
epsminus dd -0.0001 epsminus dd 0.00001
file_info: file_info:
@ -513,9 +513,9 @@ end if
fptr dd 0 ;workarea fptr dd 0 ;workarea
file_name: file_name:
db '/sys/3d/house.3ds',0 db '/sys/3d/house.3ds',0
; db '/tmp0/1/sc.3ds',0 ; db '/tmp0/1/bmwm3.3ds',0
rb 256 rb 1024
I_END: I_END:

View File

@ -37,19 +37,19 @@ gouraud_triangle_z:
.dz12 equ dword[ebp-20] .dz12 equ dword[ebp-20]
.dc12r equ dword[ebp-24] .dc12r equ dword[ebp-24]
.dc12g equ dword[ebp-28] .dc12g equ dword[ebp-28]
.dc12b equ dword[ebp-32] .dc12b equ [ebp-32]
.dx13 equ dword[ebp-36] .dx13 equ dword[ebp-36]
.dz13 equ dword[ebp-40] .dz13 equ dword[ebp-40]
.dc13r equ dword[ebp-44] .dc13r equ dword[ebp-44]
.dc13g equ dword[ebp-48] .dc13g equ dword[ebp-48]
.dc13b equ dword[ebp-52] .dc13b equ [ebp-52]
.dx23 equ dword[ebp-56] .dx23 equ dword[ebp-56]
.dz23 equ dword[ebp-60] .dz23 equ dword[ebp-60]
.dc23r equ dword[ebp-64] .dc23r equ dword[ebp-64]
.dc23g equ dword[ebp-68] .dc23g equ dword[ebp-68]
.dc23b equ dword[ebp-72] .dc23b equ [ebp-72]
.zz1 equ dword[ebp-76] .zz1 equ dword[ebp-76]
.c1r equ dword[ebp-80] .c1r equ dword[ebp-80]
@ -124,6 +124,39 @@ end if
loop @b loop @b
jmp .gt_dx12_done jmp .gt_dx12_done
.gt_dx12_make: .gt_dx12_make:
if Ext>= SSE2
movsx ebx,bx
mov eax,1 shl 15
cdq
idiv ebx
; push eax
mov ebx,eax
mov ax,.x2
sub ax,.x1
cwde
imul ebx
sar eax,15 - ROUND
push eax
; mov .dx12,eax
sub esp,4*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movq xmm1,[.col1r]
movq xmm2,[.col2r]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
punpcklwd xmm2,xmm3
psrad xmm2,15 - ROUND
pshufd xmm2,xmm2,11000110b
movdqu .dc12b,xmm2
else
mov ax,.x2 mov ax,.x2
sub ax,.x1 sub ax,.x1
cwde cwde
@ -166,6 +199,7 @@ end if
idiv ebx idiv ebx
; mov .dc12b,eax ; mov .dc12b,eax
push eax push eax
end if
.gt_dx12_done: .gt_dx12_done:
mov bx,.y3 ; calc deltas mov bx,.y3 ; calc deltas
@ -182,6 +216,38 @@ end if
loop @b loop @b
jmp .gt_dx13_done jmp .gt_dx13_done
.gt_dx13_make: .gt_dx13_make:
if Ext>= SSE2
movsx ebx,bx
mov eax,1 shl 15
cdq
idiv ebx
mov ebx,eax
mov ax,.x3
sub ax,.x1
cwde
imul ebx
sar eax,15 - ROUND
push eax
sub esp,4*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movq xmm1,[.col1r]
movq xmm2,[.col3r]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
punpcklwd xmm2,xmm3
psrad xmm2,15 - ROUND
pshufd xmm2,xmm2,11000110b
movdqu .dc13b,xmm2
else
mov ax,.x3 mov ax,.x3
sub ax,.x1 sub ax,.x1
cwde cwde
@ -224,6 +290,7 @@ end if
idiv ebx idiv ebx
; mov .dc13b,eax ; mov .dc13b,eax
push eax push eax
end if
.gt_dx13_done: .gt_dx13_done:
mov bx,.y3 ; calc deltas mov bx,.y3 ; calc deltas
@ -240,6 +307,39 @@ end if
loop @b loop @b
jmp .gt_dx23_done jmp .gt_dx23_done
.gt_dx23_make: .gt_dx23_make:
if Ext>= SSE2
movsx ebx,bx
mov eax,1 shl 15
cdq
idiv ebx
; push eax
mov ebx,eax
mov ax,.x3
sub ax,.x2
cwde
imul ebx
sar eax,15 - ROUND
push eax
sub esp,4*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movq xmm1,[.col2r]
movq xmm2,[.col3r]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
punpcklwd xmm2,xmm3
psrad xmm2,15 - ROUND
pshufd xmm2,xmm2,11000110b
movdqu .dc23b,xmm2
else
mov ax,.x3 mov ax,.x3
sub ax,.x2 sub ax,.x2
cwde cwde
@ -282,6 +382,7 @@ end if
idiv ebx idiv ebx
; mov .dc23b,eax ; mov .dc23b,eax
push eax push eax
end if
.gt_dx23_done: .gt_dx23_done:
sub esp,32 sub esp,32
@ -340,6 +441,7 @@ end if
call gouraud_line_z call gouraud_line_z
popad popad
if Ext >= MMX if Ext >= MMX
movq mm0,.c1bM movq mm0,.c1bM
paddd mm0,qword .dc13bM paddd mm0,qword .dc13bM
@ -483,6 +585,7 @@ gouraud_line_z:
.z2 equ dword[ebp+8] .z2 equ dword[ebp+8]
.y equ word[ebp+12] .y equ word[ebp+12]
.x1 equ ebp+14 .x1 equ ebp+14
.c1b equ ebp+16 .c1b equ ebp+16
.c1g equ ebp+18 .c1g equ ebp+18
.c1r equ ebp+20 .c1r equ ebp+20
@ -540,6 +643,43 @@ gouraud_line_z:
cmp word[.x2],0 cmp word[.x2],0
jle .gl_quit jle .gl_quit
if 0
mov bx,word[.x2] ; dz = z2-z1/x2-x1
sub bx,word[.x1]
movsx ebx,bx
mov eax,1 shl 15
cdq
idiv ebx
mov ebx,eax
mov eax,.x3
sub eax,.x1
cwde
imul ebx
sar eax,15 - ROUND
push eax
sub esp,4*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movq xmm1,[.col1r]
movq xmm2,[.col3r]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
punpcklwd xmm2,xmm3
psrad xmm2,15 - ROUND
pshufd xmm2,xmm2,11000110b
movdqu .dc13b,xmm2
end if
mov eax,.z2 mov eax,.z2
sub eax,.z1 sub eax,.z1
cdq cdq
@ -549,28 +689,35 @@ gouraud_line_z:
idiv ebx idiv ebx
push eax push eax
mov eax,1 shl 15
cdq
idiv ebx
mov ebx,eax
mov ax,word[.c2b] mov ax,word[.c2b]
sub ax,word[.c1b] sub ax,word[.c1b]
cwde cwde
shl eax,ROUND imul ebx
cdq sar eax,15 - ROUND
idiv ebx
push eax push eax
mov ax,word[.c2g] mov ax,word[.c2g]
sub ax,word[.c1g] sub ax,word[.c1g]
cwde cwde
shl eax,ROUND imul ebx
cdq sar eax,15 - ROUND
idiv ebx
push eax push eax
mov ax,word[.c2r] mov ax,word[.c2r]
sub ax,word[.c1r] sub ax,word[.c1r]
cwde cwde
shl eax,ROUND ; dc_r = c2r-c1r/x2-x1 imul ebx
cdq sar eax,15 - ROUND
idiv ebx
push eax push eax
cmp word[.x1],0 ; clipping on function cmp word[.x1],0 ; clipping on function
@ -625,6 +772,14 @@ gouraud_line_z:
; mov .cz,edx ; mov .cz,edx
;end if ;end if
mov edx,.dz ; edx - delta z mov edx,.dz ; edx - delta z
if Ext >= SSE2
movq xmm7,[.c1b]
pshuflw xmm7,xmm7,11000110b
punpcklwd xmm7,[the_zero]
pslld xmm7,ROUND
movdqu xmm1,[.dc_rM]
end if
if Ext = NON
movzx eax,word[.c1r] movzx eax,word[.c1r]
shl eax,ROUND shl eax,ROUND
mov .cr,eax mov .cr,eax
@ -634,7 +789,8 @@ gouraud_line_z:
movzx eax,word[.c1b] movzx eax,word[.c1b]
shl eax,ROUND shl eax,ROUND
mov .cb,eax mov .cb,eax
if Ext = MMX end if
if (Ext = MMX) | (Ext=SSE)
; mov .c_z,edx ; mov .c_z,edx
movd mm2,[.dc_bM] ; delta color blue MMX movd mm2,[.dc_bM] ; delta color blue MMX
movd mm3,[.cbM] ; current blue MMX movd mm3,[.cbM] ; current blue MMX
@ -652,7 +808,17 @@ end if
;end if ;end if
cmp ebx,dword[esi] ; esi - z_buffer cmp ebx,dword[esi] ; esi - z_buffer
jge @f ; edi - Screen buffer jge @f ; edi - Screen buffer
if Ext = MMX if Ext >= SSE2
movdqa xmm0,xmm7
psrld xmm0,ROUND
packssdw xmm0,xmm0
packuswb xmm0,xmm0
movd eax,xmm0
stosw
shr eax,16
stosb
end if
if (Ext=MMX) | (Ext=SSE)
movq mm0,mm3 ; mm0, mm1 - temp registers movq mm0,mm3 ; mm0, mm1 - temp registers
psrld mm0,ROUND psrld mm0,ROUND
movq mm1,mm4 movq mm1,mm4
@ -664,7 +830,8 @@ if Ext = MMX
stosw stosw
shr eax,16 shr eax,16
stosb stosb
else end if
if Ext=NON
mov eax,.cr mov eax,.cr
sar eax,ROUND sar eax,ROUND
stosb stosb
@ -686,10 +853,14 @@ end if
;if Ext=NON ;if Ext=NON
add ebx,edx add ebx,edx
;end if ;end if
if Ext=MMX if Ext >=SSE2
paddd xmm7,xmm1
end if
if (Ext=MMX) | (Ext=SSE)
paddd mm3,mm2 paddd mm3,mm2
paddd mm4,mm5 paddd mm4,mm5
else end if
if Ext = NON
mov eax,.dc_g mov eax,.dc_g
add .cg,eax add .cg,eax
mov eax,.dc_b mov eax,.dc_b

View File

@ -1,3 +1,25 @@
View3ds 0.076 - XII 2021
1. Detecting manifold chunks procedure based on kind of sorted pivot
table. Chunks are counted and this number displayed.
2. New calculating normal vectors proc that use some data produced
by new chunks routine. Now big object loading is fast. I load object that
contains ~500000 vertices, ~700000 faces and ~2000 0000 unique edges
in few seconds on i5 2cond gen. Earlier such objects calculating was
rather above time limits.
3. On http://board.flatassembler.net occasionaly there are some disccusions
about optimizing. Some clever people, wich skills and competence I trust,
claims - for CPU's manufactured last ~15 years size of code is crucial
for speed. (Better utilize CPU cache).
So I wrote some 'movsd' mnemonics instead 'mov [edi],sth'; 'loop' instead
'dec ecx,jnz sth'. Moreover I come back to init some local varibles
by 'push' (flat_cat.inc). I took effort to change divisions to
multiplications two_tex.inc (works ok in fpu only Ext = NON mode and
of course in Ext = SSE3 mode), grd_tex.inc (single line not parallel
muls, whole drawing routine 4 divs instead 27 divisions),
bump_tex.inc - 3 divs in SSE2 mode.s See sources for details.
4. Editor button allows now editing by vertex all above 65535 vert objects.
----------------------------------------------------------------------------------
View3ds 0.075 - XII 2021 View3ds 0.075 - XII 2021
1. Cusom rotate using keys and mouse scroll support by Leency. 1. Cusom rotate using keys and mouse scroll support by Leency.
---------------------------------------------------------------------------------- ----------------------------------------------------------------------------------
@ -9,7 +31,6 @@ View3ds 0.074 - IX 2021
set 'on' this option. Note that is non real time model, especially when set 'on' this option. Note that is non real time model, especially when
complex object is computed. I took effort to introduce accelerating complex object is computed. I took effort to introduce accelerating
structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled
for now - seems to work incorrect(slow). for now - seems to work incorrect(slow).
---------------------------------------------------------------------------------- ----------------------------------------------------------------------------------

View File

@ -1,33 +1,16 @@
View3ds 0.076 - tiny viewer to .3ds and .asc files with several graphics View3ds 0.077 - tiny viewer to .3ds and .asc files with several graphics
effects implementation. effects implementation.
What's new? Whats new?
1. Detecting manifold chunks procedure based on kind of sorted pivot 1. More divs elimination comparing to ver 0.076, - grd_cat.inc file.
table. Chunks are counted and this number displayed. 2. Some 3ds object I have, reads with invalid normals - fixed.
2. New calculating normal vectors proc that use some data produced 3. Invalid submit edition bug - fixed. Smaller size of adjcent proc.
by new chunks routine. Now big object loading is fast. I load object that 4. Edges detection fix.
contains ~500000 vertices, ~700000 faces and ~2000 0000 unique edges
in few seconds on i5 2cond gen. Earlier such objects calculating was
rather above time limits.
3. On http://board.flatassembler.net occasionaly there are some disccusions
about optimizing. Some clever people, wich skills and competence I trust,
claims - for CPU's manufactured last ~15 years size of code is crucial
for speed. (Better utilize CPU cache).
So I wrote some 'movsd' mnemonics instead 'mov [edi],sth'; 'loop' instead
'dec ecx,jnz sth'. Moreover I come back to init some local varibles
by 'push' (flat_cat.inc). I took effort to change divisions to
multiplications two_tex.inc (works ok in fpu only Ext = NON mode and
of course in Ext = SSE3 mode), grd_tex.inc (single line not parallel
muls, whole drawing routine 4 divs instead 27 divisions),
bump_tex.inc - 3 divs in SSE2 mode.s See sources for details.
4. Editor button allows now editing by vertex all above 65535 vert objects.
Buttons description: Buttons description:
1. rotary: choosing rotary axle: x, y, x+y, keys - for object translate 1. rotary: choosing rotary axle: x, y, x+y, keys - for object custom rotate
using keyboard. . using keyboard - keys <, >, PgUp, PgDown.
2. shd. model: choosing shading model: flat, grd (smooth), env (spherical 2. shd. model: choosing shading model: flat, grd (smooth), env (spherical
environment mapping, bump (bump mapping), tex (texture mapping), environment mapping, bump (bump mapping), tex (texture mapping),
pos (position shading depend), dots (app draws only points - nodes of object), pos (position shading depend), dots (app draws only points - nodes of object),
@ -40,7 +23,7 @@ Buttons description:
6. ray shadow: calc ray casted shadows. 6. ray shadow: calc ray casted shadows.
7. culling: backface culling on/ off. 7. culling: backface culling on/ off.
8. rand. light: Randomize 3 unlinear lights( so called Phong's illumination). 8. rand. light: Randomize 3 unlinear lights( so called Phong's illumination).
9. Blur: blur N times; N=0,1,2,3,4,5 9. blur: blur N times; N=0,1,2,3,4,5
10.11,12,13. loseless operations (rotary 90, 180 degrees). 10.11,12,13. loseless operations (rotary 90, 180 degrees).
12. emboss: Do emboss effect( flat bumps ), use 'bumps deep' button to do edges 12. emboss: Do emboss effect( flat bumps ), use 'bumps deep' button to do edges
more deep. more deep.
@ -61,4 +44,4 @@ Buttons description:
decrease whole handlers count by enable culling (using appropriate button) - some decrease whole handlers count by enable culling (using appropriate button) - some
back handlers become hidden. back handlers become hidden.
Maciej Guba XII 2021 Maciej Guba march 2022

View File

@ -1,5 +1,5 @@
; application : View3ds ver. 0.076 - tiny .3ds and .asc files viewer ; application : View3ds ver. 0.077 - tiny .3ds and .asc files viewer
; with a few graphics effects demonstration. ; with a few graphics effects demonstration.
; compiler : FASM ; compiler : FASM
; system : KolibriOS ; system : KolibriOS
@ -67,50 +67,71 @@ START: ; start of execution
call alloc_buffer_mem call alloc_buffer_mem
call read_param call read_param
call read_from_disk ; read, if all is ok eax = 0 call read_from_disk ; read, if all is ok eax = 0
btr eax,31 ; mark 1
cmp eax,0 cmp eax,0
jne .gen jne .gen
bts eax,31 ; mark 2
mov esi,[fptr] mov esi,[fptr]
cmp [esi],word 4D4Dh cmp [esi],word 4D4Dh
jne .asc jne .asc_gen
call read_tp_variables ; init points and triangles count variables call read_tp_variables ; init points and triangles count variables
cmp eax,0 cmp eax,0
jne .malloc jne .malloc
xor eax,eax ; if failed read -> generate
.gen: .gen:
; if no house.3ds on board - generate .asc_gen: ; read asc file or generate
xor bl,bl ; reallocate memory push eax
; if no house.3ds on rd - generate
xor bl,bl ; allocate memory
mov [triangles_count_var],20000 mov [triangles_count_var],20000
mov [points_count_var],20000 mov [points_count_var],20000
call alloc_mem_for_tp call alloc_mem_for_tp
pop eax
bt eax,31
jc .asc
mov bl,[generator_flag] mov bl,[generator_flag]
call generate_object call generate_object
mov ax,1 ;mark
jmp .opt jmp .opt
.asc: .asc:
mov [triangles_count_var],10000 ; to do: read asc header ; xor bl,bl
mov [points_count_var],10000 ; mov [triangles_count_var],20000 ; to do: read asc header
call alloc_mem_for_tp ; mov [points_count_var],20000
; call alloc_mem_for_tp
call read_asc call read_asc
xor ax,ax
jmp .opt jmp .opt
.malloc: .malloc:
call alloc_mem_for_tp call alloc_mem_for_tp
call read_from_file call read_from_file
.opt: .opt:
if Ext >= SSE2
push ax
end if
call optimize_object1 ; proc in file b_procs.asm call optimize_object1 ; proc in file b_procs.asm
; set point(0,0,0) in center and calc all coords ; set point(0,0,0) in center and calc all coords
; to be in <-1.0,1.0> ; to be in <-1.0,1.0>
call normalize_all_light_vectors call normalize_all_light_vectors
call copy_lights ; to aligned float call copy_lights ; to aligned float
call init_triangles_normals2 ; call init_triangles_normals2
if Ext >= SSE2 if Ext >= SSE2
; if first byte of ax set -> old style normal vectors finding
call detect_chunks call detect_chunks
mov [chunks_number],ecx mov [chunks_number],ecx
mov [chunks_ptr],ebx mov [chunks_ptr],ebx
push esi
push edi
call init_triangles_normals2
; esi - tri_ch ; esi - tri_ch
; edi - t_ptr - every vertice index - pointer to to all triangles ; edi - t_ptr - every vertice index - pointer to to all triangles
; that have this index ; that have this index
pop edi
pop esi
pop ax
end if end if
call init_point_normals call init_point_normals
@ -122,7 +143,6 @@ START: ; start of execution
call do_color_buffer ; intit color_map call do_color_buffer ; intit color_map
if Ext >= SSE3 if Ext >= SSE3
call init_point_lights call init_point_lights
mov [fire_flag],0 ; proteza
end if end if
mov edi,bumpmap mov edi,bumpmap
call calc_bumpmap call calc_bumpmap
@ -206,10 +226,22 @@ START: ; start of execution
jmp noclose jmp noclose
red: ; redraw red: ; redraw
; xor edx,edx
; @@:
; push edx
mov eax,9 ; get process info mov eax,9 ; get process info
mov ebx,procinfo mov ebx,procinfo
mov ecx,-1 or ecx,-1
int 0x40 int 0x40
; pop edx
; inc edx
; cmp dword[procinfo+26],50000000 ; ~ 10 Mbytes
; jb @f
; cmp edx,1
; je @b
; @@:
mov eax,[procinfo+42] ; read params of window mov eax,[procinfo+42] ; read params of window
sub eax,225 sub eax,225
mov [size_x_var],ax mov [size_x_var],ax
@ -297,14 +329,14 @@ START: ; start of execution
call update_flags ; update flags and write labels of flags call update_flags ; update flags and write labels of flags
; do other operations according to flag ; do other operations according to flag
cmp ah,3 ; ah = 3 -> shading model ; cmp ah,3 ; ah = 3 -> shading model
jne .next_m6 ; jne .next_m6
cmp [dr_flag],2 ; cmp [dr_flag],2
jne @f ; jne @f
; call init_envmap2 ; <----! this don't works in env mode ; call init_envmap2 ; <----! this don't works in env mode
; and more than ~18 kb objects ; and more than ~18 kb objects
; call init_envmap_cub2 ; call init_envmap_cub2
@@: ; @@:
cmp [dr_flag],4 cmp [dr_flag],4
jne @f jne @f
call generate_texture2 call generate_texture2
@ -402,7 +434,7 @@ START: ; start of execution
call detect_chunks call detect_chunks
mov [chunks_number],ecx mov [chunks_number],ecx
mov [chunks_ptr],ebx mov [chunks_ptr],ebx
mov ax,1 ; - old style detecting normal vectors
; esi - tri_ch ; esi - tri_ch
; edi - t_ptr - every vertice index - pointer to to all triangles ; edi - t_ptr - every vertice index - pointer to to all triangles
; that have this index ; that have this index
@ -412,6 +444,7 @@ START: ; start of execution
call calc_bumpmap_coords ; bump and texture mapping call calc_bumpmap_coords ; bump and texture mapping
call do_edges_list call do_edges_list
call write_info call write_info
.next_m2: .next_m2:
cmp ah,19 cmp ah,19
je @f je @f
@ -693,6 +726,7 @@ START: ; start of execution
lea ecx,[eax*4] lea ecx,[eax*4]
if (Ext = MMX)|(Ext = SSE) if (Ext = MMX)|(Ext = SSE)
emms
mov bh,bl mov bh,bl
push bx push bx
shl ebx,16 shl ebx,16
@ -884,9 +918,10 @@ clear_vertices_index:
movzx ecx,word[size_y_var] movzx ecx,word[size_y_var]
imul ecx,eax imul ecx,eax
xor eax,eax xor eax,eax
shr ecx,1 ; shr ecx,1
rep stosd rep stosd
ret ret
edit: ; mmx required, edit mesh by vertex edit: ; mmx required, edit mesh by vertex
push ebp push ebp
mov ebp,esp mov ebp,esp
@ -895,9 +930,9 @@ edit: ; mmx required, edit mesh by vertex
.y_coord equ ebp-2 .y_coord equ ebp-2
.x_coord equ ebp-4 .x_coord equ ebp-4
.points_translated equ ebp-10 .points_translated equ ebp-10
.points equ ebp-22 .points equ ebp-26
.points_rotated equ ebp-34 .points_rotated equ ebp-26-16
.mx equ ebp-70 .mx equ ebp-26-56
macro check_bar macro check_bar
{ {
@ -906,17 +941,11 @@ edit: ; mmx required, edit mesh by vertex
movzx edx,word[size_x_var] movzx edx,word[size_x_var]
imul edx,ecx imul edx,ecx
add ebx,edx add ebx,edx
push ebx
mov ecx,ebx mov ecx,ebx
shl ecx,2 shl ecx,2
; lea ecx,[ebx*2]
lea ebx,[ebx*3] lea ebx,[ebx*3]
cmp [dr_flag],10
cmp [dr_flag],12 cmovg ebx,ecx
jl @f
add ebx,[esp]
@@:
add esp,4
add ebx,[screen_ptr] add ebx,[screen_ptr]
mov ebx,[ebx] mov ebx,[ebx]
and ebx,0x00ffffff and ebx,0x00ffffff
@ -935,10 +964,9 @@ edit: ; mmx required, edit mesh by vertex
pcmpgtw mm0,mm1 pcmpgtw mm0,mm1
pcmpgtw mm3,mm1 pcmpgtw mm3,mm1
pxor mm3,mm0 pxor mm3,mm0
movd eax,mm3 pmovmskb eax,mm3
mov cx,ax and eax,1111b
shr eax,16
and ax,cx
or ax,ax or ax,ax
jz .no_edit jz .no_edit
@ -949,15 +977,12 @@ edit: ; mmx required, edit mesh by vertex
; store both x and y coordinates ; store both x and y coordinates
ror eax,16 ror eax,16
; push eax
; sub esp,256
mov [.x_coord],eax mov [.x_coord],eax
test word[mouse_state],100000000b test word[mouse_state],100000000b
jz .not_press ; check if left mouse button press jz .not_press ; check if left mouse button press
; left button pressed ; left button pressed
check_bar check_bar
jne .no_edit jne .no_edit
add ecx,[vertices_index_ptr] add ecx,[vertices_index_ptr]
@ -992,29 +1017,17 @@ edit: ; mmx required, edit mesh by vertex
check_bar check_bar
jne .end jne .end
mov esi,[vertex_edit_no] movd xmm0,[edit_end_x]
; dec esi punpcklwd xmm0,[the_zero]
lea esi,[esi*3] movd xmm1,[vect_x]
add esi,esi punpcklwd xmm1,[the_zero]
add esi,[points_translated_ptr] ; movd xmm2,[offset_y]
emms ; punpcklwd xmm2,[the_zero]
psubd xmm0,xmm1
; psubd xmm0,xmm2
cvtdq2ps xmm0,xmm0
movups [.points],xmm0
movd mm1,dword[esi]
paddw mm1,mm0
psubw mm1,qword[vect_x]
movd dword[esi],mm1
lea edi,[.points]
; detranslate
fninit
fild word[esi+4]
fstp dword[edi+8]
fild word[esi+2]
fisub word[offset_x]
fstp dword[edi+4]
fild word[esi]
fisub word[offset_y] ; proteza
fstp dword[edi]
mov esi,matrix mov esi,matrix
lea edi,[.mx] lea edi,[.mx]
@ -1037,11 +1050,8 @@ edit: ; mmx required, edit mesh by vertex
movsd movsd
movsd movsd
movsd movsd
; mov ecx,3
; cld
; rep movsd
mov dword[edit_start_x],0
mov dword[edit_end_x],0 mov dword[edit_end_x],0
mov [vertex_edit_no],-1 mov [vertex_edit_no],-1
@ -1096,7 +1106,7 @@ alloc_buffer_mem:
mov esp,ebp mov esp,ebp
pop ebp pop ebp
ret
@ -1511,6 +1521,7 @@ init_point_normals:
;in: ;in:
; esi - tri_ch ; esi - tri_ch
; edi - t_ptr ; edi - t_ptr
; ax = 1 -> old style finding normals
.z equ dword [ebp-8] .z equ dword [ebp-8]
.y equ dword [ebp-12] .y equ dword [ebp-12]
.x equ [ebp-16] .x equ [ebp-16]
@ -1519,6 +1530,7 @@ init_point_normals:
.t_ptr equ dword [ebp-36] .t_ptr equ dword [ebp-36]
.tri_ch equ dword [ebp-40] .tri_ch equ dword [ebp-40]
.max_val equ dword [ebp-44] .max_val equ dword [ebp-44]
.mark equ word [ebp-45]
push ebp push ebp
mov ebp,esp mov ebp,esp
@ -1527,9 +1539,9 @@ init_point_normals:
mov .t_ptr,edi mov .t_ptr,edi
mov .tri_ch,esi mov .tri_ch,esi
; mov .mark,ax
bt ax,0
jc .old1
mov ecx,[triangles_count_var] mov ecx,[triangles_count_var]
@ -1581,6 +1593,9 @@ init_point_normals:
jmp .end jmp .end
.old1:
xor edx,edx
.old: .old:
@ -1644,6 +1659,9 @@ init_point_normals:
mov edx,.point_number mov edx,.point_number
cmp edx,[points_count_var] cmp edx,[points_count_var]
jne .ipn_loop jne .ipn_loop
; cmp .mark,1
; je .end1
; always free if Ext>=SSE2
.end: .end:
mov eax,68 mov eax,68
@ -1656,7 +1674,7 @@ init_point_normals:
mov ecx,.tri_ch mov ecx,.tri_ch
int 0x40 int 0x40
; .end1:
add esp,64 add esp,64
@ -1817,38 +1835,37 @@ clrscr:
movzx ecx,word[size_x_var] movzx ecx,word[size_x_var]
movzx eax,word[size_y_var] movzx eax,word[size_y_var]
imul ecx,eax imul ecx,eax
cld
xor eax,eax xor eax,eax
if Ext=NON ; if Ext=NON
rep stosd rep stosd
else if Ext = MMX ; else if Ext = MMX
pxor mm0,mm0 ; pxor mm0,mm0
@@: ; @@:
movq [edi+00],mm0 ; movq [edi+00],mm0
movq [edi+08],mm0 ; movq [edi+08],mm0
movq [edi+16],mm0 ; movq [edi+16],mm0
movq [edi+24],mm0 ; movq [edi+24],mm0
add edi,32 ; add edi,32
sub ecx,8 ; sub ecx,8
jnc @b ; jnc @b
else ; else
push ecx ; push ecx
mov ecx,edi ; mov ecx,edi
and ecx,0x0000000f ; and ecx,0x0000000f
rep stosb ; rep stosb
pop ecx ; pop ecx
and ecx,0xfffffff0 ; and ecx,0xfffffff0
xorps xmm0,xmm0 ; xorps xmm0,xmm0
@@: ; @@:
movaps [edi],xmm0 ; movaps [edi],xmm0
movaps [edi+16],xmm0 ; movaps [edi+16],xmm0
movaps [edi+32],xmm0 ; movaps [edi+32],xmm0
movaps [edi+48],xmm0 ; movaps [edi+48],xmm0
add edi,64 ; add edi,64
sub ecx,16 ; sub ecx,16
jnz @b ; jnz @b
end if ; end if
ret ret
@ -1879,7 +1896,7 @@ draw_triangles:
push ebp push ebp
mov ebp,esp mov ebp,esp
sub esp,60 sub esp,64
; movzx ax,[dr_flag] ; movzx ax,[dr_flag]
mov .dr_flag,ax mov .dr_flag,ax
@ -2777,6 +2794,7 @@ if Ext >= SSE3
; je @f ; je @f
; int3 ; int3
; @@: ; @@:
mov eax, .index1x12 mov eax, .index1x12
mov ebx, .index2x12 mov ebx, .index2x12
mov ecx, .index3x12 mov ecx, .index3x12
@ -2945,7 +2963,7 @@ end if
.eend: .eend:
add esp,60 add esp,64
pop ebp pop ebp
ret ret
@ -2956,7 +2974,7 @@ draw_handlers:
; in eax - render model ; in eax - render model
push ebp push ebp
mov ebp,esp mov ebp,esp
; emms emms
.fac equ dword[ebp-16] .fac equ dword[ebp-16]
.xplus_scr equ ebp-8 .xplus_scr equ ebp-8
.xplus_index equ ebp-12 .xplus_index equ ebp-12
@ -3320,12 +3338,12 @@ alloc_mem_for_tp:
int 0x40 ; -> allocate memory to triangles int 0x40 ; -> allocate memory to triangles
mov [triangles_ptr], eax ; -> eax = pointer to allocated mem mov [triangles_ptr], eax ; -> eax = pointer to allocated mem
mov eax, 68 ; mov eax, 68
mov ecx,[triangles_count_var] ; mov ecx,[triangles_count_var]
imul ecx,[i36] ; imul ecx,[i36]
mov edx,[edges_ptr] ; mov edx,[edges_ptr]
int 0x40 ; -> allocate memory to triangles ; int 0x40 ; -> allocate memory to triangles
mov [edges_ptr], eax ; -> eax = pointer to allocated mem ; mov [edges_ptr], eax ; -> eax = pointer to allocated mem
; ststic memory ; ststic memory
@ -3411,6 +3429,7 @@ read_from_disk:
; eax = 0 -> ok file loaded ; eax = 0 -> ok file loaded
ret ret
read_param: read_param:
cld
mov esi,I_Param mov esi,I_Param
cmp dword[esi],0 cmp dword[esi],0
je .end je .end
@ -3543,9 +3562,6 @@ ret
; ******* WINDOW DEFINITIONS AND DRAW ******** ; ******* WINDOW DEFINITIONS AND DRAW ********
; ********************************************* ; *********************************************
draw_window: draw_window:
movzx eax,[fire_flag]
push eax
; int3
mov eax,12 ; function 12:tell os about windowdraw mov eax,12 ; function 12:tell os about windowdraw
mov ebx,1 ; 1, start of draw mov ebx,1 ; 1, start of draw
int 0x40 int 0x40
@ -3732,8 +3748,8 @@ ret
mov eax,12 ; function 12:tell os about windowdraw mov eax,12 ; function 12:tell os about windowdraw
mov ebx,2 ; 2, end of draw mov ebx,2 ; 2, end of draw
int 0x40 int 0x40
pop eax ; pop eax
mov [fire_flag],al ; mov [fire_flag],al
ret ret