diff --git a/programs/demos/view3ds/3dmath.inc b/programs/demos/view3ds/3dmath.inc index b144f91dd1..e8c2174c6d 100644 --- a/programs/demos/view3ds/3dmath.inc +++ b/programs/demos/view3ds/3dmath.inc @@ -600,8 +600,9 @@ else add esi,12 add edi,12 - dec ecx - jne .again +; dec ecx +; jne .again + loop .again mov [edi],dword -1 end if ret @@ -667,7 +668,7 @@ translate_points: ; just convert into integer; z coord still needed end if .again: - if 0 + if 0 fld dword[esi+8] ; fmul [rsscale] fist word[edi+4] @@ -695,7 +696,7 @@ translate_points: ; just convert into integer; z coord still needed fiadd [vect_y] fistp word[edi+2] end if - if Ext>=SSE + if Ext>=SSE2 movups xmm0,[esi] cvtps2dq xmm0,xmm0 packssdw xmm0,xmm0 @@ -722,8 +723,6 @@ translate_points: ; just convert into integer; z coord still needed add esi,12 add edi,6 - ; dec ecx - ; jnz .again loop .again ret diff --git a/programs/demos/view3ds/3glass.inc b/programs/demos/view3ds/3glass.inc index 293cbf3881..74648e2fdf 100644 --- a/programs/demos/view3ds/3glass.inc +++ b/programs/demos/view3ds/3glass.inc @@ -344,7 +344,7 @@ end if pop ebp ret -align 16 + glass_line: ; in: ; xmm0 - normal vector 1 @@ -362,7 +362,7 @@ glass_line: push ebp mov ebp,esp - sub esp,256 + sub esp,190 sub ebp,16 and ebp,0xfffffff0 @@ -537,7 +537,7 @@ align 16 jnz .ddraw .end_rp_line: - add esp,256 + add esp,190 pop ebp ret diff --git a/programs/demos/view3ds/3glass_tex.inc b/programs/demos/view3ds/3glass_tex.inc index d78fd986d6..a2022ef566 100644 --- a/programs/demos/view3ds/3glass_tex.inc +++ b/programs/demos/view3ds/3glass_tex.inc @@ -452,7 +452,7 @@ end if pop ebp ret -align 16 + glass_tex_line: ; in: ; xmm0 - normal vector 1 @@ -747,7 +747,6 @@ end if .skip: add edi,4 add esi,4 - ; addps xmm0,.dn movaps xmm0,.n1 ; cur normal addps xmm0,.dn addps xmm2,.dtx diff --git a/programs/demos/view3ds/3ray_shd.inc b/programs/demos/view3ds/3ray_shd.inc index 7da685927e..86e7a87ba0 100644 --- a/programs/demos/view3ds/3ray_shd.inc +++ b/programs/demos/view3ds/3ray_shd.inc @@ -372,7 +372,7 @@ ray_shd_l: push ebp mov ebp,esp - sub esp,320 + sub esp,270 sub ebp,16 and ebp,0xfffffff0 @@ -421,6 +421,7 @@ ray_shd_l: shufps xmm3,xmm3,11100001b @@: movd .cur_tri,mm7 + ; sub .cur_tri,dword 1 cmp ax,.x_max jge .end_rp_line cmp bx,.x_min @@ -689,7 +690,7 @@ end if dec ecx jnz .ddraw .end_rp_line: - add esp,320 + add esp,270 pop ebp ret diff --git a/programs/demos/view3ds/a_procs.inc b/programs/demos/view3ds/a_procs.inc index 05799e390e..9ad72476cf 100644 --- a/programs/demos/view3ds/a_procs.inc +++ b/programs/demos/view3ds/a_procs.inc @@ -62,20 +62,6 @@ ret if Ext > SSE2 ;-------------------------------------------------------------------- init_point_lights: - ; mov eax,1000 - ; cvtsi2ss xmm1,eax - ; shufps xmm1,xmm1,11000000b - ; mov esi,lights_aligned - ; mov edi,point_light_coords - ; mov ecx,3 - ; @@: - ; movaps xmm0,[esi] - ; addps xmm0,[f05xz] - ; mulps xmm0,xmm1 - ; movaps [edi],xmm0 - ; add esi,64 - ; add edi,16 - ; loop @b mov ecx,3 mov edi,point_light_coords @@: @@ -90,16 +76,11 @@ init_point_lights: call random cvtsi2ss xmm0,eax movss [edi+4],xmm0 - ; movzx ebx,word[size_x_var] - ; shl ebx,2 - ; neg ebx mov ecx,-1900 - ; sub ecx,100 mov edx,-600 call random cvtsi2ss xmm0,eax movss [edi+8],xmm0 - ; mov dword[edi+8],-1700.0 mov [edi+12],dword 0 add edi,16 pop ecx @@ -174,7 +155,7 @@ intersect_tri: ; Moeller-Trumbore method ; or eax,eax ; jz @f comiss xmm0,[eps] - jl @f + jb @f rcpss xmm0,.det movss .invdet,xmm0 @@ -228,7 +209,7 @@ intersect_tri: ; Moeller-Trumbore method ; test eax,1 ; jz @f comiss xmm1,[eps] - jl @f + jb @f mov eax,1 cmp .ift,0 @@ -264,6 +245,16 @@ do_edges_list: .edd_ptr equ [ebp-8] .counter equ [ebp-12] + mov ebx, 12 + mov eax, 68 + mov ecx,[triangles_count_var] + lea ecx,[ecx*3] + shl ecx,4 + add ecx,1024 + mov edx,[edges_ptr] + int 0x40 ; -> allocate memory to edges + mov [edges_ptr], eax ; -> eax = pointer to allocated mem + mov ebx,[edges_ptr] mov eax,[triangles_ptr] @@ -280,17 +271,18 @@ do_edges_list: loop @b + mov ebx,[edges_ptr] mov ecx,[triangles_count_var] lea ecx,[ecx*3] .mxd: mov eax,[ebx] + mov edx,[ebx+4] cmp eax,[ebx+4] - jl @f - movq xmm0,[ebx] - pshufd xmm0,xmm0,11100001b - movq [ebx],xmm0 - @@: + cmovg eax,edx + cmovg edx,[ebx] + mov [ebx],eax + mov [ebx+4],edx add ebx,8 loop .mxd @@ -303,20 +295,20 @@ do_edges_list: mov esi,ecx shl esi,3 add esi,ebx - + dec ecx .ccc: mov eax,[ebx+8] cmp eax,[ebx] - jge .g + jae .g movq xmm0,[ebx+8] push ebx .c: cmp ebx,esi - jge .done + jae .done cmp ebx,[edges_ptr] - jl .done + jb .done cmp eax,[ebx] - jge .done + jae .done movq xmm7,[ebx] movq [ebx+8],xmm7 sub ebx,8 @@ -328,10 +320,7 @@ do_edges_list: pop ebx .g: add ebx,8 - dec ecx - cmp ecx,1 - jnz .ccc - + loop .ccc ; insert sort again mov ebx,[edges_ptr] @@ -350,7 +339,7 @@ do_edges_list: inc ecx add ebx,8 cmp ebx,esi - jge .br ; break + jae .br ; break cmp eax,[ebx] je .aa mov .counter,ecx @@ -368,12 +357,12 @@ do_edges_list: mov eax,[ebx+12] mov edx,[ebx+8] cmp eax,[ebx+4] - jge .gg2 + jae .gg2 movq xmm0,[ebx+8] push ebx .c2: cmp eax,[ebx+4] - jge .done2 + jae .done2 movq xmm7,[ebx] movq [ebx+8],xmm7 @@ -405,60 +394,68 @@ do_edges_list: add esp,8 .ff: + ; count edges - mov ecx,0 - mov edx,[triangles_count_var] - lea edx,[edx*3] - mov ebx,[edges_ptr] -; mov esi,edx -; shl esi,3 -; add esi,[edges_ptr] + + mov ecx,[triangles_count_var] + lea ecx,[ecx*3+3] + mov esi,[edges_ptr] + xor edx,edx + cld .nx: - movq xmm0,[ebx] - add ebx,8 -; cmp ebx,esi -; jae @f - movq xmm1,[ebx] -; @@: - pcmpeqd xmm0,xmm1 - pmovmskb eax,xmm0 - and eax,0xff - cmp eax,0xff - jz @f - inc ecx - @@: - dec edx - jnz .nx + lodsd + mov ebx,eax + lodsd + cmp ebx,[esi] + jnz .ic + cmp eax,[esi+4] + jnz .ic + loop .nx + jmp .endc + .ic: + + inc edx + loop .nx + .endc: + mov .ed_cnt,edx + mov ecx,edx - mov .ed_cnt,ecx - lea ecx,[ecx*3] - shl ecx,2 + shl ecx,3 add ecx,65536 mov ebx,12 mov eax,68 mov edx,.edd_ptr - int 0x40 ; -> allocate memory to triangles + int 0x40 ; -> allocate memory to new edges mov .edd_ptr, eax ; -> eax = pointer to allocated mem - mov ebx,[edges_ptr] - mov ecx,[triangles_count_var] - lea ecx,[ecx*3] - .seek: - movq xmm0,[ebx] - movq xmm1,[ebx+8] - pcmpeqd xmm1,xmm0 - pmovmskb edx,xmm1 - and edx,0xff - cmp edx,0xff - je @f - movq [eax],xmm0 - add eax,8 - @@: - add ebx,8 - loop .seek + mov ecx,[triangles_count_var] + lea ecx,[ecx*3] + add ecx,ecx + mov esi,[edges_ptr] + mov edi,eax + xor edx,edx + cld + .nx1: + lodsd + mov ebx,eax + lodsd + cmp ebx,[esi] + jnz .ic1 + cmp eax,[esi+4] + jnz .ic1 + loop .nx1 + jmp .endc1 + .ic1: + xchg eax,ebx + stosd + mov eax,ebx + stosd + inc edx + loop .nx1 + .endc1: mov eax,68 mov ebx,13 @@ -595,8 +592,8 @@ draw_dots: mov edi,[screen_ptr] lea eax,[eax*3] add edi,eax - xor eax,eax - not eax + or eax,-1 +; not eax stosd @@: loop .drw diff --git a/programs/demos/view3ds/bump_tex.inc b/programs/demos/view3ds/bump_tex.inc index 12c696b6a7..3b645496a1 100644 --- a/programs/demos/view3ds/bump_tex.inc +++ b/programs/demos/view3ds/bump_tex.inc @@ -707,7 +707,7 @@ if Ext >= SSE2 movups .cty2,xmm3 end if -if (Ext = MMX) +if (Ext = MMX)| (Ext = SSE) movq mm0,.cby2 movq mm1,.cby1 movq mm2,.cey2 @@ -843,7 +843,7 @@ if Ext >= SSE2 end if -if (Ext = MMX) +if (Ext = MMX)| (Ext = SSE) movq mm0,.cby2 movq mm1,.cby1 movq mm2,.cey2 @@ -1469,13 +1469,3 @@ end if .bl_end: mov esp,ebp ret 76 -;Ext = MMX - -; else -; movq mm5, qword[.temp1] ;- -; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X -; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE -; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap -; movd ebx,mm5 -; psrlq mm5,32 -; end if diff --git a/programs/demos/view3ds/chunks.inc b/programs/demos/view3ds/chunks.inc index 051c1a8a1b..e1882f8470 100644 --- a/programs/demos/view3ds/chunks.inc +++ b/programs/demos/view3ds/chunks.inc @@ -76,14 +76,19 @@ detect_chunks: mov .chmr,eax ; chunks mark if bit is set - tri was used mov edi,eax - pxor xmm0,xmm0 +; pxor xmm0,xmm0 mov ecx,[triangles_count_var] - shr ecx,7 + shr ecx,5 inc ecx - @@: - movdqa [edi],xmm0 - add edi,16 - loop @b + xor eax,eax + cld + rep stosd +; shr ecx,7 +; inc ecx +; @@: +; movdqa [edi],xmm0 +; add edi,16 +; loop @b mov eax,[points_count_var] @@ -293,6 +298,7 @@ detect_chunks: mov .up,esi mov .str,edi +; mov edi,.tri_ch1 .lb1: ; nx chunk cmp edi,.ltch1 jnb .endl @@ -399,7 +405,7 @@ detect_chunks: -; mov ebx,.chunks + mov ebx,.chunks mov ecx,.ch_cnt mov esi,.tri_ch diff --git a/programs/demos/view3ds/data.inc b/programs/demos/view3ds/data.inc index 4feac6fdb6..f32b274501 100644 --- a/programs/demos/view3ds/data.inc +++ b/programs/demos/view3ds/data.inc @@ -357,7 +357,7 @@ base_vector: if Ext=SSE3 db ' (SSE3)' end if - db ' 0.076',0 + db ' 0.077',0 labellen: STRdata db '-1 ' lab_vert: @@ -488,7 +488,7 @@ end if the_one: times 4 dd 1.0 - eps: times 4 dd 0.00000 + eps: times 4 dd 0.000001 vect_x: dw SIZE_X / 2 vect_y dw SIZE_Y / 2 @@ -500,9 +500,9 @@ end if xres_var dw SIZE_X - epsone dd 1.0001 + epsone dd 1.00001 aprox dd 0.0001 - epsminus dd -0.0001 + epsminus dd 0.00001 file_info: @@ -513,9 +513,9 @@ end if fptr dd 0 ;workarea file_name: db '/sys/3d/house.3ds',0 - ; db '/tmp0/1/sc.3ds',0 + ; db '/tmp0/1/bmwm3.3ds',0 - rb 256 + rb 1024 I_END: diff --git a/programs/demos/view3ds/grd_cat.inc b/programs/demos/view3ds/grd_cat.inc index 5480b12b6d..c154ec785f 100644 --- a/programs/demos/view3ds/grd_cat.inc +++ b/programs/demos/view3ds/grd_cat.inc @@ -37,19 +37,19 @@ gouraud_triangle_z: .dz12 equ dword[ebp-20] .dc12r equ dword[ebp-24] .dc12g equ dword[ebp-28] -.dc12b equ dword[ebp-32] +.dc12b equ [ebp-32] .dx13 equ dword[ebp-36] .dz13 equ dword[ebp-40] .dc13r equ dword[ebp-44] .dc13g equ dword[ebp-48] -.dc13b equ dword[ebp-52] +.dc13b equ [ebp-52] .dx23 equ dword[ebp-56] .dz23 equ dword[ebp-60] .dc23r equ dword[ebp-64] .dc23g equ dword[ebp-68] -.dc23b equ dword[ebp-72] +.dc23b equ [ebp-72] .zz1 equ dword[ebp-76] .c1r equ dword[ebp-80] @@ -78,7 +78,7 @@ end if mov ebp,esp ; sub esp,84 - .sort3: ; sort triangle coordinates... + .sort3: ; sort triangle coordinates... cmp ax,bx jle .sort1 xchg eax,ebx @@ -89,389 +89,491 @@ end if xchg edx,dword[.col2b] mov dword[.col1b],edx .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - mov edx,dword[.col2r] - xchg edx,dword[.col3r] - mov dword[.col2r],edx - mov edx,dword[.col2b] - xchg edx,dword[.col3b] - mov dword[.col2b],edx + cmp bx,cx + jle .sort2 + xchg ebx,ecx + mov edx,dword[.col2r] + xchg edx,dword[.col3r] + mov dword[.col2r],edx + mov edx,dword[.col2b] + xchg edx,dword[.col3b] + mov dword[.col2b],edx jmp .sort3 .sort2: - push eax ; store in variables - push ebx - push ecx - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .gt_loop2_end + push eax ; store in variables + push ebx + push ecx + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .gt_loop2_end - mov bx,.y2 ; calc deltas - sub bx,.y1 - jnz .gt_dx12_make + mov bx,.y2 ; calc deltas + sub bx,.y1 + jnz .gt_dx12_make ; mov .dx12,0 ; mov .dz12,0 ; mov .dc12r,0 ; mov .dc12g,0 ; mov .dc12b,0 - mov ecx,5 + mov ecx,5 @@: - push dword 0 - loop @b - jmp .gt_dx12_done + push dword 0 + loop @b + jmp .gt_dx12_done .gt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND + +if Ext>= SSE2 + + movsx ebx,bx + mov eax,1 shl 15 cdq - idiv ebx + idiv ebx + ; push eax + mov ebx,eax + + + mov ax,.x2 + sub ax,.x1 + cwde + imul ebx + sar eax,15 - ROUND + push eax + ; mov .dx12,eax + + sub esp,4*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movq xmm1,[.col1r] + movq xmm2,[.col2r] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + punpcklwd xmm2,xmm3 + psrad xmm2,15 - ROUND + pshufd xmm2,xmm2,11000110b + movdqu .dc12b,xmm2 +else + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx ; mov .dx12,eax - push eax + push eax - mov ax,word[.z2] - sub ax,word[.z1] + mov ax,word[.z2] + sub ax,word[.z1] cwde - shl eax,CATMULL_SHIFT + shl eax,CATMULL_SHIFT cdq - idiv ebx - push eax + idiv ebx + push eax - mov ax,word[.col2r] - sub ax,word[.col1r] + mov ax,word[.col2r] + sub ax,word[.col1r] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dc12r,eax - push eax - mov ax,word[.col2g] - sub ax,word[.col1g] + push eax + mov ax,word[.col2g] + sub ax,word[.col1g] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dc12g,eax - push eax - mov ax,word[.col2b] ;;--- - sub ax,word[.col1b] + push eax + mov ax,word[.col2b] ;;--- + sub ax,word[.col1b] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dc12b,eax - push eax + push eax +end if .gt_dx12_done: - mov bx,.y3 ; calc deltas - sub bx,.y1 - jnz .gt_dx13_make + mov bx,.y3 ; calc deltas + sub bx,.y1 + jnz .gt_dx13_make ; mov .dx13,0 ; mov .dz13,0 ; mov .dc13r,0 ; mov .dc13g,0 ; mov .dc13b,0 - mov ecx,5 + mov ecx,5 @@: - push dword 0 - loop @b - jmp .gt_dx13_done + push dword 0 + loop @b + jmp .gt_dx13_done .gt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND + +if Ext>= SSE2 + + movsx ebx,bx + mov eax,1 shl 15 cdq - idiv ebx + idiv ebx + mov ebx,eax + + + mov ax,.x3 + sub ax,.x1 + cwde + imul ebx + sar eax,15 - ROUND + push eax + + sub esp,4*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movq xmm1,[.col1r] + movq xmm2,[.col3r] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + punpcklwd xmm2,xmm3 + psrad xmm2,15 - ROUND + pshufd xmm2,xmm2,11000110b + movdqu .dc13b,xmm2 +else + + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx ; mov .dx13,eax - push eax + push eax - mov ax,word[.z3] - sub ax,word[.z1] + mov ax,word[.z3] + sub ax,word[.z1] cwde - shl eax,CATMULL_SHIFT + shl eax,CATMULL_SHIFT cdq - idiv ebx - push eax + idiv ebx + push eax - mov ax,word[.col3r] - sub ax,word[.col1r] + mov ax,word[.col3r] + sub ax,word[.col1r] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dc13r,eax - push eax - mov ax,word[.col3g] - sub ax,word[.col1g] + push eax + mov ax,word[.col3g] + sub ax,word[.col1g] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dc13g,eax - push eax - mov ax,word[.col3b] - sub ax,word[.col1b] + push eax + mov ax,word[.col3b] + sub ax,word[.col1b] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dc13b,eax - push eax + push eax +end if .gt_dx13_done: - mov bx,.y3 ; calc deltas - sub bx,.y2 - jnz .gt_dx23_make + mov bx,.y3 ; calc deltas + sub bx,.y2 + jnz .gt_dx23_make ; mov .dx23,0 ; mov .dz23,0 ; mov .dc23r,0 ; mov .dc23g,0 ; mov .dc23b,0 - mov ecx,5 + mov ecx,5 @@: - push dword 0 - loop @b - jmp .gt_dx23_done + push dword 0 + loop @b + jmp .gt_dx23_done .gt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND + +if Ext>= SSE2 + + movsx ebx,bx + mov eax,1 shl 15 cdq - idiv ebx + idiv ebx + ; push eax + mov ebx,eax + + mov ax,.x3 + sub ax,.x2 + cwde + imul ebx + sar eax,15 - ROUND + push eax + + sub esp,4*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movq xmm1,[.col2r] + movq xmm2,[.col3r] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + punpcklwd xmm2,xmm3 + psrad xmm2,15 - ROUND + pshufd xmm2,xmm2,11000110b + movdqu .dc23b,xmm2 +else + + + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx ; mov .dx23,eax - push eax + push eax - mov ax,word[.z3] - sub ax,word[.z2] + mov ax,word[.z3] + sub ax,word[.z2] cwde - shl eax,CATMULL_SHIFT + shl eax,CATMULL_SHIFT cdq - idiv ebx - push eax + idiv ebx + push eax - mov ax,word[.col3r] - sub ax,word[.col2r] + mov ax,word[.col3r] + sub ax,word[.col2r] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dc23r,eax - push eax - mov ax,word[.col3g] - sub ax,word[.col2g] + push eax + mov ax,word[.col3g] + sub ax,word[.col2g] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dc23g,eax - push eax - mov ax,word[.col3b] - sub ax,word[.col2b] + push eax + mov ax,word[.col3b] + sub ax,word[.col2b] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dc23b,eax - push eax + push eax +end if .gt_dx23_done: - sub esp,32 + sub esp,32 - movsx eax,.x1 ; eax - cur x1 - shl eax,ROUND ; ebx - cur x2 - mov ebx,eax - movsx edx,word[.z1] - shl edx,CATMULL_SHIFT - mov .zz1,edx - mov .zz2,edx - movzx edx,word[.col1r] - shl edx,ROUND - mov .c1r,edx - mov .c2r,edx - movzx edx,word[.col1g] - shl edx,ROUND - mov .c1g,edx - mov .c2g,edx - movzx edx,word[.col1b] - shl edx,ROUND - mov .c1b,edx - mov .c2b,edx - mov cx,.y1 - cmp cx,.y2 - jge .gt_loop1_end + movsx eax,.x1 ; eax - cur x1 + shl eax,ROUND ; ebx - cur x2 + mov ebx,eax + movsx edx,word[.z1] + shl edx,CATMULL_SHIFT + mov .zz1,edx + mov .zz2,edx + movzx edx,word[.col1r] + shl edx,ROUND + mov .c1r,edx + mov .c2r,edx + movzx edx,word[.col1g] + shl edx,ROUND + mov .c1g,edx + mov .c2g,edx + movzx edx,word[.col1b] + shl edx,ROUND + mov .c1b,edx + mov .c2b,edx + mov cx,.y1 + cmp cx,.y2 + jge .gt_loop1_end .gt_loop1: pushad ; macro .debug - mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors - sar edx,ROUND - push dx - mov edx,.c2g - sar edx,ROUND - push dx - mov edx,.c2b - sar edx,ROUND - push dx - sar ebx,ROUND ; x2 - push bx - mov edx,.c1r - sar edx,ROUND - push dx - mov edx,.c1g - sar edx,ROUND - push dx - mov edx,.c1b - sar edx,ROUND - push dx - sar eax,ROUND - push ax ; x1 - push cx ; y - push .zz2 - push .zz1 - call gouraud_line_z + mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors + sar edx,ROUND + push dx + mov edx,.c2g + sar edx,ROUND + push dx + mov edx,.c2b + sar edx,ROUND + push dx + sar ebx,ROUND ; x2 + push bx + mov edx,.c1r + sar edx,ROUND + push dx + mov edx,.c1g + sar edx,ROUND + push dx + mov edx,.c1b + sar edx,ROUND + push dx + sar eax,ROUND + push ax ; x1 + push cx ; y + push .zz2 + push .zz1 + call gouraud_line_z popad + if Ext >= MMX - movq mm0,.c1bM - paddd mm0,qword .dc13bM - movq .c1bM,mm0 - movq mm1,.c2bM - paddd mm1,qword .dc12bM - movq .c2bM,mm1 + movq mm0,.c1bM + paddd mm0,qword .dc13bM + movq .c1bM,mm0 + movq mm1,.c2bM + paddd mm1,qword .dc12bM + movq .c2bM,mm1 - movq mm0,.c1rM - paddd mm0,qword .dc13rM - movq .c1rM,mm0 - movq mm1,.c2rM - paddd mm1,qword .dc12rM - movq .c2rM,mm1 + movq mm0,.c1rM + paddd mm0,qword .dc13rM + movq .c1rM,mm0 + movq mm1,.c2rM + paddd mm1,qword .dc12rM + movq .c2rM,mm1 else - mov edx,.dc13r - add .c1r,edx - mov edx,.dc13g - add .c1g,edx - mov edx,.dc13b - add .c1b,edx - mov edx,.dc12r - add .c2r,edx - mov edx,.dc12g - add .c2g,edx - mov edx,.dc12b - add .c2b,edx + mov edx,.dc13r + add .c1r,edx + mov edx,.dc13g + add .c1g,edx + mov edx,.dc13b + add .c1b,edx + mov edx,.dc12r + add .c2r,edx + mov edx,.dc12g + add .c2g,edx + mov edx,.dc12b + add .c2b,edx - mov edx,.dz13 - add .zz1,edx - mov edx,.dz12 - add .zz2,edx + mov edx,.dz13 + add .zz1,edx + mov edx,.dz12 + add .zz2,edx end if - add eax,.dx13 - add ebx,.dx12 - inc cx - cmp cx,.y2 - jl .gt_loop1 + add eax,.dx13 + add ebx,.dx12 + inc cx + cmp cx,.y2 + jl .gt_loop1 .gt_loop1_end: - mov cx,.y2 - cmp cx,.y3 - jge .gt_loop2_end + mov cx,.y2 + cmp cx,.y3 + jge .gt_loop2_end - movsx ebx,.x2 ; eax - cur x1 - shl ebx,ROUND ; ebx - cur x2 - movsx edx,word[.z2] - shl edx,CATMULL_SHIFT - mov .zz2,edx - movzx edx,word[.col2r] - shl edx,ROUND - mov .c2r,edx - movzx edx,word[.col2g] - shl edx,ROUND - mov .c2g,edx - movzx edx,word[.col2b] - shl edx,ROUND - mov .c2b,edx + movsx ebx,.x2 ; eax - cur x1 + shl ebx,ROUND ; ebx - cur x2 + movsx edx,word[.z2] + shl edx,CATMULL_SHIFT + mov .zz2,edx + movzx edx,word[.col2r] + shl edx,ROUND + mov .c2r,edx + movzx edx,word[.col2g] + shl edx,ROUND + mov .c2g,edx + movzx edx,word[.col2b] + shl edx,ROUND + mov .c2b,edx .gt_loop2: pushad ; macro .debug - mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors - sar edx,ROUND - push dx - mov edx,.c2g - sar edx,ROUND - push dx - mov edx,.c2b - sar edx,ROUND - push dx - sar ebx,ROUND ; x2 - push bx - mov edx,.c1r - sar edx,ROUND - push dx - mov edx,.c1g - sar edx,ROUND - push dx - mov edx,.c1b - sar edx,ROUND - push dx - sar eax,ROUND - push ax ; x1 - push cx ; y - push .zz2 - push .zz1 - call gouraud_line_z + mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors + sar edx,ROUND + push dx + mov edx,.c2g + sar edx,ROUND + push dx + mov edx,.c2b + sar edx,ROUND + push dx + sar ebx,ROUND ; x2 + push bx + mov edx,.c1r + sar edx,ROUND + push dx + mov edx,.c1g + sar edx,ROUND + push dx + mov edx,.c1b + sar edx,ROUND + push dx + sar eax,ROUND + push ax ; x1 + push cx ; y + push .zz2 + push .zz1 + call gouraud_line_z popad if Ext >= MMX - movq mm0,.c1bM - paddd mm0,qword .dc13bM - movq .c1bM,mm0 - movq mm1,.c2bM - paddd mm1,qword .dc23bM - movq .c2bM,mm1 + movq mm0,.c1bM + paddd mm0,qword .dc13bM + movq .c1bM,mm0 + movq mm1,.c2bM + paddd mm1,qword .dc23bM + movq .c2bM,mm1 - movq mm0,.c1rM - paddd mm0,qword .dc13rM - movq .c1rM,mm0 - movq mm1,.c2rM - paddd mm1,qword .dc23rM - movq .c2rM,mm1 + movq mm0,.c1rM + paddd mm0,qword .dc13rM + movq .c1rM,mm0 + movq mm1,.c2rM + paddd mm1,qword .dc23rM + movq .c2rM,mm1 else - mov edx,.dc13r - add .c1r,edx - mov edx,.dc13g - add .c1g,edx - mov edx,.dc13b - add .c1b,edx - mov edx,.dc23r - add .c2r,edx - mov edx,.dc23g - add .c2g,edx - mov edx,.dc23b - add .c2b,edx - mov edx,.dz13 - add .zz1,edx - mov edx,.dz23 - add .zz2,edx + mov edx,.dc13r + add .c1r,edx + mov edx,.dc13g + add .c1g,edx + mov edx,.dc13b + add .c1b,edx + mov edx,.dc23r + add .c2r,edx + mov edx,.dc23g + add .c2g,edx + mov edx,.dc23b + add .c2b,edx + mov edx,.dz13 + add .zz1,edx + mov edx,.dz23 + add .zz2,edx end if - add eax,.dx13 - add ebx,.dx23 - inc cx - cmp cx,.y3 - jl .gt_loop2 + add eax,.dx13 + add ebx,.dx23 + inc cx + cmp cx,.y3 + jl .gt_loop2 .gt_loop2_end: - mov esp,ebp + mov esp,ebp ret 24 gouraud_line_z: ;----------------- procedure drawing gouraud line @@ -479,10 +581,11 @@ gouraud_line_z: ;----------------- esi - pointer to Z_buffer ;----------------- edi - pointer to screen buffer ;----------------- stack: -.z1 equ dword[ebp+4] ; z coordiunate shifted left CATMULL_SHIFT +.z1 equ dword[ebp+4] ; z coordiunate shifted left CATMULL_SHIFT .z2 equ dword[ebp+8] .y equ word[ebp+12] .x1 equ ebp+14 + .c1b equ ebp+16 .c1g equ ebp+18 .c1r equ ebp+20 @@ -509,138 +612,191 @@ gouraud_line_z: .dc_rM equ ebp-16 .dc_gM equ ebp-12 .dc_bM equ ebp-8 - mov ebp,esp + mov ebp,esp - mov ax,.y - or ax,ax - jl .gl_quit - mov bx,[size_y_var] - dec bx - cmp ax,bx ;SIZE_Y - jge .gl_quit + mov ax,.y + or ax,ax + jl .gl_quit + mov bx,[size_y_var] + dec bx + cmp ax,bx ;SIZE_Y + jge .gl_quit - mov eax,dword[.x1] - cmp ax,word[.x2] - je .gl_quit - jl @f + mov eax,dword[.x1] + cmp ax,word[.x2] + je .gl_quit + jl @f - xchg eax,dword[.x2] - mov dword[.x1],eax - mov eax,dword[.c1g] - xchg eax,dword[.c2g] - mov dword[.c1g],eax - mov eax,.z1 - xchg eax,.z2 - mov .z1,eax + xchg eax,dword[.x2] + mov dword[.x1],eax + mov eax,dword[.c1g] + xchg eax,dword[.c2g] + mov dword[.c1g],eax + mov eax,.z1 + xchg eax,.z2 + mov .z1,eax @@: - mov bx,[size_x_var] - dec bx - cmp word[.x1],bx ;SIZE_X - jge .gl_quit - cmp word[.x2],0 - jle .gl_quit + mov bx,[size_x_var] + dec bx + cmp word[.x1],bx ;SIZE_X + jge .gl_quit + cmp word[.x2],0 + jle .gl_quit - mov eax,.z2 - sub eax,.z1 - cdq - mov bx,word[.x2] ; dz = z2-z1/x2-x1 - sub bx,word[.x1] - movsx ebx,bx - idiv ebx - push eax +if 0 + mov bx,word[.x2] ; dz = z2-z1/x2-x1 + sub bx,word[.x1] + movsx ebx,bx - mov ax,word[.c2b] - sub ax,word[.c1b] - cwde - shl eax,ROUND - cdq - idiv ebx - push eax - mov ax,word[.c2g] - sub ax,word[.c1g] - cwde - shl eax,ROUND - cdq - idiv ebx - push eax + mov eax,1 shl 15 + cdq + idiv ebx + mov ebx,eax - mov ax,word[.c2r] - sub ax,word[.c1r] - cwde - shl eax,ROUND ; dc_r = c2r-c1r/x2-x1 - cdq - idiv ebx - push eax - cmp word[.x1],0 ; clipping on function - jg @f - mov eax,.dz - movsx ebx,word[.x1] - neg ebx - imul ebx - add .z1,eax - mov word[.x1],0 + mov eax,.x3 + sub eax,.x1 + cwde + imul ebx + sar eax,15 - ROUND + push eax - mov eax,.dc_r - imul ebx - sar eax,ROUND - add word[.c1r],ax + sub esp,4*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movq xmm1,[.col1r] + movq xmm2,[.col3r] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + punpcklwd xmm2,xmm3 + psrad xmm2,15 - ROUND + pshufd xmm2,xmm2,11000110b + movdqu .dc13b,xmm2 - mov eax,.dc_g - imul ebx - sar eax,ROUND - add word[.c1g],ax - mov eax,.dc_b - imul ebx - sar eax,ROUND - add word[.c1b],ax +end if + + + mov eax,.z2 + sub eax,.z1 + cdq + mov bx,word[.x2] ; dz = z2-z1/x2-x1 + sub bx,word[.x1] + movsx ebx,bx + idiv ebx + push eax + + + mov eax,1 shl 15 + cdq + idiv ebx + mov ebx,eax + + + mov ax,word[.c2b] + sub ax,word[.c1b] + cwde + imul ebx + sar eax,15 - ROUND + push eax + + + mov ax,word[.c2g] + sub ax,word[.c1g] + cwde + imul ebx + sar eax,15 - ROUND + push eax + + + + mov ax,word[.c2r] + sub ax,word[.c1r] + cwde + imul ebx + sar eax,15 - ROUND + push eax + + cmp word[.x1],0 ; clipping on function + jg @f + mov eax,.dz + movsx ebx,word[.x1] + neg ebx + imul ebx + add .z1,eax + mov word[.x1],0 + + mov eax,.dc_r + imul ebx + sar eax,ROUND + add word[.c1r],ax + + mov eax,.dc_g + imul ebx + sar eax,ROUND + add word[.c1g],ax + + mov eax,.dc_b + imul ebx + sar eax,ROUND + add word[.c1b],ax @@: - mov bx,[size_x_var] - dec bx - cmp word[.x2],bx ;SIZE_X - jl @f - mov word[.x2],bx ;SIZE_X + mov bx,[size_x_var] + dec bx + cmp word[.x2],bx ;SIZE_X + jl @f + mov word[.x2],bx ;SIZE_X @@: - sub esp,16 ; calculate memory begin - movzx edx,word[size_x_var] ;SIZE_X ; in buffers - movzx eax,.y - mul edx - movzx edx,word[.x1] - add eax,edx - push eax - lea eax,[eax*3] - add edi,eax - pop eax - shl eax,2 - add esi,eax + sub esp,16 ; calculate memory begin + movzx edx,word[size_x_var] ;SIZE_X ; in buffers + movzx eax,.y + mul edx + movzx edx,word[.x1] + add eax,edx + push eax + lea eax,[eax*3] + add edi,eax + pop eax + shl eax,2 + add esi,eax - mov cx,word[.x2] - sub cx,word[.x1] - movzx ecx,cx - mov ebx,.z1 ; ebx - currrent z shl CATMULL_SIFT + mov cx,word[.x2] + sub cx,word[.x1] + movzx ecx,cx + mov ebx,.z1 ; ebx - currrent z shl CATMULL_SIFT ;if Ext >= SSE ; mov .cz,edx ;end if - mov edx,.dz ; edx - delta z - movzx eax,word[.c1r] - shl eax,ROUND - mov .cr,eax - movzx eax,word[.c1g] - shl eax,ROUND - mov .cg,eax - movzx eax,word[.c1b] - shl eax,ROUND - mov .cb,eax -if Ext = MMX + mov edx,.dz ; edx - delta z +if Ext >= SSE2 + movq xmm7,[.c1b] + pshuflw xmm7,xmm7,11000110b + punpcklwd xmm7,[the_zero] + pslld xmm7,ROUND + movdqu xmm1,[.dc_rM] +end if +if Ext = NON + movzx eax,word[.c1r] + shl eax,ROUND + mov .cr,eax + movzx eax,word[.c1g] + shl eax,ROUND + mov .cg,eax + movzx eax,word[.c1b] + shl eax,ROUND + mov .cb,eax +end if +if (Ext = MMX) | (Ext=SSE) ; mov .c_z,edx - movd mm2,[.dc_bM] ; delta color blue MMX - movd mm3,[.cbM] ; current blue MMX - movq mm5,[.dc_rM] - movq mm4,[.crM] - pxor mm6,mm6 + movd mm2,[.dc_bM] ; delta color blue MMX + movd mm3,[.cbM] ; current blue MMX + movq mm5,[.dc_rM] + movq mm4,[.crM] + pxor mm6,mm6 end if @@ -650,55 +806,70 @@ end if ; psrsq mm0,32 ; movd ebx,mm0 ;end if - cmp ebx,dword[esi] ; esi - z_buffer - jge @f ; edi - Screen buffer -if Ext = MMX - movq mm0,mm3 ; mm0, mm1 - temp registers - psrld mm0,ROUND - movq mm1,mm4 - psrld mm1,ROUND - packssdw mm1,mm0 - packuswb mm1,mm6 -; movd [edi],mm1 - movd eax,mm1 - stosw - shr eax,16 - stosb -else - mov eax,.cr - sar eax,ROUND - stosb - mov eax,.cg - sar eax,ROUND - stosb - mov eax,.cb - sar eax,ROUND - stosb + cmp ebx,dword[esi] ; esi - z_buffer + jge @f ; edi - Screen buffer +if Ext >= SSE2 + movdqa xmm0,xmm7 + psrld xmm0,ROUND + packssdw xmm0,xmm0 + packuswb xmm0,xmm0 + movd eax,xmm0 + stosw + shr eax,16 + stosb end if - mov dword[esi],ebx +if (Ext=MMX) | (Ext=SSE) + movq mm0,mm3 ; mm0, mm1 - temp registers + psrld mm0,ROUND + movq mm1,mm4 + psrld mm1,ROUND + packssdw mm1,mm0 + packuswb mm1,mm6 +; movd [edi],mm1 + movd eax,mm1 + stosw + shr eax,16 + stosb +end if +if Ext=NON + mov eax,.cr + sar eax,ROUND + stosb + mov eax,.cg + sar eax,ROUND + stosb + mov eax,.cb + sar eax,ROUND + stosb +end if + mov dword[esi],ebx ;if Ext = NON - jmp .no_skip + jmp .no_skip ;end if @@: - add edi,3 + add edi,3 .no_skip: - add esi,4 + add esi,4 ;if Ext=NON - add ebx,edx + add ebx,edx ;end if -if Ext=MMX - paddd mm3,mm2 - paddd mm4,mm5 -else - mov eax,.dc_g - add .cg,eax - mov eax,.dc_b - add .cb,eax - mov eax,.dc_r - add .cr,eax +if Ext >=SSE2 + paddd xmm7,xmm1 end if - loop .ddraw +if (Ext=MMX) | (Ext=SSE) + paddd mm3,mm2 + paddd mm4,mm5 +end if +if Ext = NON + mov eax,.dc_g + add .cg,eax + mov eax,.dc_b + add .cb,eax + mov eax,.dc_r + add .cr,eax +end if + loop .ddraw .gl_quit: - mov esp,ebp + mov esp,ebp ret 26 diff --git a/programs/demos/view3ds/history.txt b/programs/demos/view3ds/history.txt index a6c8faef2b..47f94b4b82 100644 --- a/programs/demos/view3ds/history.txt +++ b/programs/demos/view3ds/history.txt @@ -1,3 +1,25 @@ +View3ds 0.076 - XII 2021 +1. Detecting manifold chunks procedure based on kind of sorted pivot + table. Chunks are counted and this number displayed. +2. New calculating normal vectors proc that use some data produced + by new chunks routine. Now big object loading is fast. I load object that + contains ~500000 vertices, ~700000 faces and ~2000 0000 unique edges + in few seconds on i5 2cond gen. Earlier such objects calculating was + rather above time limits. +3. On http://board.flatassembler.net occasionaly there are some disccusions + about optimizing. Some clever people, wich skills and competence I trust, + claims - for CPU's manufactured last ~15 years size of code is crucial + for speed. (Better utilize CPU cache). + So I wrote some 'movsd' mnemonics instead 'mov [edi],sth'; 'loop' instead + 'dec ecx,jnz sth'. Moreover I come back to init some local varibles + by 'push' (flat_cat.inc). I took effort to change divisions to + multiplications two_tex.inc (works ok in fpu only Ext = NON mode and + of course in Ext = SSE3 mode), grd_tex.inc (single line not parallel + muls, whole drawing routine 4 divs instead 27 divisions), + bump_tex.inc - 3 divs in SSE2 mode.s See sources for details. +4. Editor button allows now editing by vertex all above 65535 vert objects. +---------------------------------------------------------------------------------- + View3ds 0.075 - XII 2021 1. Cusom rotate using keys and mouse scroll support by Leency. ---------------------------------------------------------------------------------- @@ -8,8 +30,7 @@ View3ds 0.074 - IX 2021 3. New rendering model - ray casted shadows and appropiate button to set 'on' this option. Note that is non real time model, especially when complex object is computed. I took effort to introduce accelerating - structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled - + structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled for now - seems to work incorrect(slow). ---------------------------------------------------------------------------------- diff --git a/programs/demos/view3ds/readme.txt b/programs/demos/view3ds/readme.txt index e680574552..646fb3469e 100644 --- a/programs/demos/view3ds/readme.txt +++ b/programs/demos/view3ds/readme.txt @@ -1,33 +1,16 @@ -View3ds 0.076 - tiny viewer to .3ds and .asc files with several graphics +View3ds 0.077 - tiny viewer to .3ds and .asc files with several graphics effects implementation. -What's new? -1. Detecting manifold chunks procedure based on kind of sorted pivot - table. Chunks are counted and this number displayed. -2. New calculating normal vectors proc that use some data produced - by new chunks routine. Now big object loading is fast. I load object that - contains ~500000 vertices, ~700000 faces and ~2000 0000 unique edges - in few seconds on i5 2cond gen. Earlier such objects calculating was - rather above time limits. -3. On http://board.flatassembler.net occasionaly there are some disccusions - about optimizing. Some clever people, wich skills and competence I trust, - claims - for CPU's manufactured last ~15 years size of code is crucial - for speed. (Better utilize CPU cache). - So I wrote some 'movsd' mnemonics instead 'mov [edi],sth'; 'loop' instead - 'dec ecx,jnz sth'. Moreover I come back to init some local varibles - by 'push' (flat_cat.inc). I took effort to change divisions to - multiplications two_tex.inc (works ok in fpu only Ext = NON mode and - of course in Ext = SSE3 mode), grd_tex.inc (single line not parallel - muls, whole drawing routine 4 divs instead 27 divisions), - bump_tex.inc - 3 divs in SSE2 mode.s See sources for details. -4. Editor button allows now editing by vertex all above 65535 vert objects. - - +Whats new? +1. More divs elimination comparing to ver 0.076, - grd_cat.inc file. +2. Some 3ds object I have, reads with invalid normals - fixed. +3. Invalid submit edition bug - fixed. Smaller size of adjcent proc. +4. Edges detection fix. Buttons description: -1. rotary: choosing rotary axle: x, y, x+y, keys - for object translate - using keyboard. . +1. rotary: choosing rotary axle: x, y, x+y, keys - for object custom rotate + using keyboard - keys <, >, PgUp, PgDown. 2. shd. model: choosing shading model: flat, grd (smooth), env (spherical environment mapping, bump (bump mapping), tex (texture mapping), pos (position shading depend), dots (app draws only points - nodes of object), @@ -40,7 +23,7 @@ Buttons description: 6. ray shadow: calc ray casted shadows. 7. culling: backface culling on/ off. 8. rand. light: Randomize 3 unlinear lights( so called Phong's illumination). -9. Blur: blur N times; N=0,1,2,3,4,5 +9. blur: blur N times; N=0,1,2,3,4,5 10.11,12,13. loseless operations (rotary 90, 180 degrees). 12. emboss: Do emboss effect( flat bumps ), use 'bumps deep' button to do edges more deep. @@ -56,9 +39,9 @@ Buttons description: 20. bright - -> decrease picture brightness. 21. wav effect -> do effect based sine function. 22. editor -> setting editing option. If is "on" then red bars are draw according to each - vertex, Pressing and moving left mouse button (cursor must be on handler)- change - vertex position. If left mouse button is released apply current position. You may also - decrease whole handlers count by enable culling (using appropriate button) - some - back handlers become hidden. + vertex, Pressing and moving left mouse button (cursor must be on handler)- change + vertex position. If left mouse button is released apply current position. You may also + decrease whole handlers count by enable culling (using appropriate button) - some + back handlers become hidden. - Maciej Guba XII 2021 + Maciej Guba march 2022 diff --git a/programs/demos/view3ds/view3ds.asm b/programs/demos/view3ds/view3ds.asm index e0caf2ba4d..9fc7afd9f7 100644 --- a/programs/demos/view3ds/view3ds.asm +++ b/programs/demos/view3ds/view3ds.asm @@ -1,5 +1,5 @@ -; application : View3ds ver. 0.076 - tiny .3ds and .asc files viewer +; application : View3ds ver. 0.077 - tiny .3ds and .asc files viewer ; with a few graphics effects demonstration. ; compiler : FASM ; system : KolibriOS @@ -64,53 +64,74 @@ START: ; start of execution fstp [rsscale] pop ebx - call alloc_buffer_mem - call read_param - call read_from_disk ; read, if all is ok eax = 0 - cmp eax,0 - jne .gen - mov esi,[fptr] - cmp [esi],word 4D4Dh - jne .asc - call read_tp_variables ; init points and triangles count variables - cmp eax,0 - + call alloc_buffer_mem + call read_param + call read_from_disk ; read, if all is ok eax = 0 + btr eax,31 ; mark 1 + cmp eax,0 + jne .gen + bts eax,31 ; mark 2 + mov esi,[fptr] + cmp [esi],word 4D4Dh + jne .asc_gen + call read_tp_variables ; init points and triangles count variables + cmp eax,0 jne .malloc + xor eax,eax ; if failed read -> generate .gen: - ; if no house.3ds on board - generate - xor bl,bl ; reallocate memory + .asc_gen: ; read asc file or generate + push eax + ; if no house.3ds on rd - generate + xor bl,bl ; allocate memory mov [triangles_count_var],20000 mov [points_count_var],20000 call alloc_mem_for_tp + pop eax + bt eax,31 + jc .asc + mov bl,[generator_flag] + call generate_object + mov ax,1 ;mark - mov bl,[generator_flag] - call generate_object - jmp .opt + jmp .opt .asc: - mov [triangles_count_var],10000 ; to do: read asc header - mov [points_count_var],10000 - call alloc_mem_for_tp + ; xor bl,bl + ; mov [triangles_count_var],20000 ; to do: read asc header + ; mov [points_count_var],20000 + ; call alloc_mem_for_tp call read_asc + xor ax,ax jmp .opt .malloc: call alloc_mem_for_tp call read_from_file .opt: + if Ext >= SSE2 + push ax + end if call optimize_object1 ; proc in file b_procs.asm ; set point(0,0,0) in center and calc all coords ; to be in <-1.0,1.0> call normalize_all_light_vectors call copy_lights ; to aligned float - call init_triangles_normals2 + ; call init_triangles_normals2 + if Ext >= SSE2 + ; if first byte of ax set -> old style normal vectors finding call detect_chunks mov [chunks_number],ecx mov [chunks_ptr],ebx + push esi + push edi + call init_triangles_normals2 + ; esi - tri_ch + ; edi - t_ptr - every vertice index - pointer to to all triangles + ; that have this index + pop edi + pop esi + pop ax - ; esi - tri_ch - ; edi - t_ptr - every vertice index - pointer to to all triangles - ; that have this index end if call init_point_normals @@ -122,7 +143,6 @@ START: ; start of execution call do_color_buffer ; intit color_map if Ext >= SSE3 call init_point_lights - mov [fire_flag],0 ; proteza end if mov edi,bumpmap call calc_bumpmap @@ -206,10 +226,22 @@ START: ; start of execution jmp noclose red: ; redraw + ; xor edx,edx + ; @@: + ; push edx mov eax,9 ; get process info mov ebx,procinfo - mov ecx,-1 + or ecx,-1 int 0x40 + ; pop edx + ; inc edx + ; cmp dword[procinfo+26],50000000 ; ~ 10 Mbytes + ; jb @f + ; cmp edx,1 + ; je @b + + + ; @@: mov eax,[procinfo+42] ; read params of window sub eax,225 mov [size_x_var],ax @@ -297,14 +329,14 @@ START: ; start of execution call update_flags ; update flags and write labels of flags ; do other operations according to flag - cmp ah,3 ; ah = 3 -> shading model - jne .next_m6 - cmp [dr_flag],2 - jne @f +; cmp ah,3 ; ah = 3 -> shading model +; jne .next_m6 +; cmp [dr_flag],2 +; jne @f ; call init_envmap2 ; <----! this don't works in env mode ; and more than ~18 kb objects ; call init_envmap_cub2 - @@: +; @@: cmp [dr_flag],4 jne @f call generate_texture2 @@ -402,7 +434,7 @@ START: ; start of execution call detect_chunks mov [chunks_number],ecx mov [chunks_ptr],ebx - + mov ax,1 ; - old style detecting normal vectors ; esi - tri_ch ; edi - t_ptr - every vertice index - pointer to to all triangles ; that have this index @@ -412,6 +444,7 @@ START: ; start of execution call calc_bumpmap_coords ; bump and texture mapping call do_edges_list call write_info + .next_m2: cmp ah,19 je @f @@ -693,6 +726,7 @@ START: ; start of execution lea ecx,[eax*4] if (Ext = MMX)|(Ext = SSE) + emms mov bh,bl push bx shl ebx,16 @@ -884,9 +918,10 @@ clear_vertices_index: movzx ecx,word[size_y_var] imul ecx,eax xor eax,eax - shr ecx,1 + ; shr ecx,1 rep stosd ret + edit: ; mmx required, edit mesh by vertex push ebp mov ebp,esp @@ -895,9 +930,9 @@ edit: ; mmx required, edit mesh by vertex .y_coord equ ebp-2 .x_coord equ ebp-4 .points_translated equ ebp-10 - .points equ ebp-22 - .points_rotated equ ebp-34 - .mx equ ebp-70 + .points equ ebp-26 + .points_rotated equ ebp-26-16 + .mx equ ebp-26-56 macro check_bar { @@ -906,17 +941,11 @@ edit: ; mmx required, edit mesh by vertex movzx edx,word[size_x_var] imul edx,ecx add ebx,edx - push ebx mov ecx,ebx - shl ecx,2 - ; lea ecx,[ebx*2] + shl ecx,2 lea ebx,[ebx*3] - - cmp [dr_flag],12 - jl @f - add ebx,[esp] - @@: - add esp,4 + cmp [dr_flag],10 + cmovg ebx,ecx add ebx,[screen_ptr] mov ebx,[ebx] and ebx,0x00ffffff @@ -935,10 +964,9 @@ edit: ; mmx required, edit mesh by vertex pcmpgtw mm0,mm1 pcmpgtw mm3,mm1 pxor mm3,mm0 - movd eax,mm3 - mov cx,ax - shr eax,16 - and ax,cx + pmovmskb eax,mm3 + and eax,1111b + or ax,ax jz .no_edit @@ -949,15 +977,12 @@ edit: ; mmx required, edit mesh by vertex ; store both x and y coordinates ror eax,16 - ; push eax - ; sub esp,256 mov [.x_coord],eax test word[mouse_state],100000000b jz .not_press ; check if left mouse button press ; left button pressed - check_bar jne .no_edit add ecx,[vertices_index_ptr] @@ -992,29 +1017,17 @@ edit: ; mmx required, edit mesh by vertex check_bar jne .end - mov esi,[vertex_edit_no] - ; dec esi - lea esi,[esi*3] - add esi,esi - add esi,[points_translated_ptr] - emms + movd xmm0,[edit_end_x] + punpcklwd xmm0,[the_zero] + movd xmm1,[vect_x] + punpcklwd xmm1,[the_zero] + ; movd xmm2,[offset_y] + ; punpcklwd xmm2,[the_zero] + psubd xmm0,xmm1 + ; psubd xmm0,xmm2 + cvtdq2ps xmm0,xmm0 + movups [.points],xmm0 - movd mm1,dword[esi] - paddw mm1,mm0 - psubw mm1,qword[vect_x] - movd dword[esi],mm1 - - lea edi,[.points] - ; detranslate - fninit - fild word[esi+4] - fstp dword[edi+8] - fild word[esi+2] - fisub word[offset_x] - fstp dword[edi+4] - fild word[esi] - fisub word[offset_y] ; proteza - fstp dword[edi] mov esi,matrix lea edi,[.mx] @@ -1028,7 +1041,7 @@ edit: ; mmx required, edit mesh by vertex ; inject into vertex list mov edi,[vertex_edit_no] - ; dec edi + ; dec edi lea edi,[edi*3] shl edi,2 add edi,[points_ptr] @@ -1037,11 +1050,8 @@ edit: ; mmx required, edit mesh by vertex movsd movsd movsd - ; mov ecx,3 - ; cld - ; rep movsd - + mov dword[edit_start_x],0 mov dword[edit_end_x],0 mov [vertex_edit_no],-1 @@ -1096,7 +1106,7 @@ alloc_buffer_mem: mov esp,ebp pop ebp - +ret @@ -1511,6 +1521,7 @@ init_point_normals: ;in: ; esi - tri_ch ; edi - t_ptr +; ax = 1 -> old style finding normals .z equ dword [ebp-8] .y equ dword [ebp-12] .x equ [ebp-16] @@ -1519,6 +1530,7 @@ init_point_normals: .t_ptr equ dword [ebp-36] .tri_ch equ dword [ebp-40] .max_val equ dword [ebp-44] +.mark equ word [ebp-45] push ebp mov ebp,esp @@ -1527,9 +1539,9 @@ init_point_normals: mov .t_ptr,edi mov .tri_ch,esi - - - +; mov .mark,ax + bt ax,0 + jc .old1 mov ecx,[triangles_count_var] @@ -1581,6 +1593,9 @@ init_point_normals: jmp .end + .old1: + + xor edx,edx .old: @@ -1644,6 +1659,9 @@ init_point_normals: mov edx,.point_number cmp edx,[points_count_var] jne .ipn_loop + ; cmp .mark,1 + ; je .end1 + ; always free if Ext>=SSE2 .end: mov eax,68 @@ -1656,7 +1674,7 @@ init_point_normals: mov ecx,.tri_ch int 0x40 - + ; .end1: add esp,64 @@ -1817,38 +1835,37 @@ clrscr: movzx ecx,word[size_x_var] movzx eax,word[size_y_var] imul ecx,eax - - + cld xor eax,eax - if Ext=NON + ; if Ext=NON rep stosd - else if Ext = MMX - pxor mm0,mm0 - @@: - movq [edi+00],mm0 - movq [edi+08],mm0 - movq [edi+16],mm0 - movq [edi+24],mm0 - add edi,32 - sub ecx,8 - jnc @b - else - push ecx - mov ecx,edi - and ecx,0x0000000f - rep stosb - pop ecx - and ecx,0xfffffff0 - xorps xmm0,xmm0 - @@: - movaps [edi],xmm0 - movaps [edi+16],xmm0 - movaps [edi+32],xmm0 - movaps [edi+48],xmm0 - add edi,64 - sub ecx,16 - jnz @b - end if +; else if Ext = MMX +; pxor mm0,mm0 + ; @@: + ; movq [edi+00],mm0 +; movq [edi+08],mm0 +; movq [edi+16],mm0 + ; movq [edi+24],mm0 + ; add edi,32 + ; sub ecx,8 + ; jnc @b + ; else + ; push ecx + ; mov ecx,edi + ; and ecx,0x0000000f + ; rep stosb + ; pop ecx + ; and ecx,0xfffffff0 + ; xorps xmm0,xmm0 + ; @@: + ; movaps [edi],xmm0 + ; movaps [edi+16],xmm0 + ; movaps [edi+32],xmm0 + ; movaps [edi+48],xmm0 + ; add edi,64 + ; sub ecx,16 + ; jnz @b + ; end if ret @@ -1879,7 +1896,7 @@ draw_triangles: push ebp mov ebp,esp - sub esp,60 + sub esp,64 ; movzx ax,[dr_flag] mov .dr_flag,ax @@ -2777,6 +2794,7 @@ if Ext >= SSE3 ; je @f ; int3 ; @@: + mov eax, .index1x12 mov ebx, .index2x12 mov ecx, .index3x12 @@ -2945,7 +2963,7 @@ end if .eend: - add esp,60 + add esp,64 pop ebp ret @@ -2956,7 +2974,7 @@ draw_handlers: ; in eax - render model push ebp mov ebp,esp -; emms + emms .fac equ dword[ebp-16] .xplus_scr equ ebp-8 .xplus_index equ ebp-12 @@ -3320,12 +3338,12 @@ alloc_mem_for_tp: int 0x40 ; -> allocate memory to triangles mov [triangles_ptr], eax ; -> eax = pointer to allocated mem - mov eax, 68 - mov ecx,[triangles_count_var] - imul ecx,[i36] - mov edx,[edges_ptr] - int 0x40 ; -> allocate memory to triangles - mov [edges_ptr], eax ; -> eax = pointer to allocated mem +; mov eax, 68 +; mov ecx,[triangles_count_var] +; imul ecx,[i36] +; mov edx,[edges_ptr] +; int 0x40 ; -> allocate memory to triangles +; mov [edges_ptr], eax ; -> eax = pointer to allocated mem ; ststic memory @@ -3411,6 +3429,7 @@ read_from_disk: ; eax = 0 -> ok file loaded ret read_param: + cld mov esi,I_Param cmp dword[esi],0 je .end @@ -3543,9 +3562,6 @@ ret ; ******* WINDOW DEFINITIONS AND DRAW ******** ; ********************************************* draw_window: - movzx eax,[fire_flag] - push eax - ; int3 mov eax,12 ; function 12:tell os about windowdraw mov ebx,1 ; 1, start of draw int 0x40 @@ -3732,8 +3748,8 @@ ret mov eax,12 ; function 12:tell os about windowdraw mov ebx,2 ; 2, end of draw int 0x40 - pop eax - mov [fire_flag],al + ; pop eax + ; mov [fire_flag],al ret