From ffc2e1f7ca6fc1ebf855f3398634a4e24e7b60ea Mon Sep 17 00:00:00 2001 From: "Kirill Lipatov (Leency)" Date: Fri, 16 Sep 2011 11:23:46 +0000 Subject: [PATCH] View3DS v0.60 from Macgub git-svn-id: svn://kolibrios.org@2192 a494cfbc-eb01-0410-851d-a64ba20cac60 --- programs/demos/3DS/BUMP_CAT.INC | 50 +- programs/demos/3DS/BUMP_TEX.INC | 338 ++------- programs/demos/3DS/DATA.INC | 2 +- programs/demos/3DS/GRD_TEX.INC | 57 +- programs/demos/3DS/TWO_TEX.INC | 1216 ++++++++++++++++--------------- programs/demos/3DS/VIEW3DS.ASM | 4 +- programs/demos/3DS/readme.txt | 11 +- 7 files changed, 778 insertions(+), 900 deletions(-) diff --git a/programs/demos/3DS/BUMP_CAT.INC b/programs/demos/3DS/BUMP_CAT.INC index 1e004b67a4..4078192dc3 100644 --- a/programs/demos/3DS/BUMP_CAT.INC +++ b/programs/demos/3DS/BUMP_CAT.INC @@ -81,10 +81,10 @@ bump_triangle_z: .cz2 equ [ebp-108] .cbx1 equ dword[ebp-112] .cby1 equ [ebp-116] -.cbx2 equ dword[ebp-120] -.cby2 equ [ebp-124] -.cex1 equ dword[ebp-128] -.cey1 equ [ebp-132] +.cex1 equ dword[ebp-120] +.cey1 equ [ebp-124] +.cbx2 equ dword[ebp-128] +.cby2 equ [ebp-132] .cex2 equ dword[ebp-136] .cey2 equ [ebp-140] @@ -533,8 +533,25 @@ end if jge .loop12_done .loop12: call .call_bump_line +if Ext >= SSE2 + movups xmm0,.cey2 + movups xmm1,.cey1 + movups xmm2,.dey12 + movups xmm3,.dey13 + paddd xmm0,xmm2 + paddd xmm1,xmm3 + movups .cey2,xmm0 + movups .cey1,xmm1 + movq mm4,.cz1 + movq mm5,.cz2 + paddd mm4,.dz13 + paddd mm5,.dz12 + movq .cz1,mm4 + movq .cz2,mm5 +end if -if Ext >= MMX + +if (Ext = MMX) | (Ext = SSE) movq mm0,.cby2 movq mm1,.cby1 movq mm2,.cey2 @@ -553,7 +570,7 @@ if Ext >= MMX movq .cey2,mm2 movq .cz1,mm4 movq .cz2,mm5 -else +else if Ext = NON mov edx,.dbx13 add .cbx1,edx mov eax,.dbx12 @@ -617,8 +634,23 @@ end if .loop23: call .call_bump_line - -if Ext >= MMX +if Ext >= SSE2 + movups xmm0,.cey2 + movups xmm1,.cey1 + movups xmm2,.dey23 + movups xmm3,.dey13 + paddd xmm0,xmm2 + paddd xmm1,xmm3 + movups .cey2,xmm0 + movups .cey1,xmm1 + movq mm4,.cz1 + movq mm5,.cz2 + paddd mm4,.dz13 + paddd mm5,.dz23 + movq .cz1,mm4 + movq .cz2,mm5 +end if +if (Ext = MMX) | (Ext = SSE) movq mm0,.cby2 movq mm1,.cby1 movq mm2,.cey2 @@ -637,7 +669,7 @@ if Ext >= MMX movq .cey2,mm2 movq .cz1,mm4 movq .cz2,mm5 -else +else if Ext = NON mov eax,.dx13 add .cx1,eax mov ebx,.dx23 diff --git a/programs/demos/3DS/BUMP_TEX.INC b/programs/demos/3DS/BUMP_TEX.INC index 43b5ad152c..fb19b1643c 100644 --- a/programs/demos/3DS/BUMP_TEX.INC +++ b/programs/demos/3DS/BUMP_TEX.INC @@ -292,24 +292,6 @@ if Ext>=SSE cvtps2pi mm1,xmm1 movq .dty12,mm0 movq .dz12,mm1 -;temporaly conversion to keep upside down -; fninit -; fld .dx12 -; fistp .dx12 -; fld dword .dz12 -; fistp dword .dz12 -; fld .dbx12 -; fistp .dbx12 -; fld dword .dby12 -; fistp dword .dby12 -; fld .dex12 -; fistp .dex12 -; fld dword .dey12 -; fistp dword .dey12 -; fld .dtx12 -; fistp .dtx12 -; fld dword .dty12 -; fistp dword .dty12 ;---- ; mov ax,.z2 ; sub ax,.z1 @@ -518,25 +500,6 @@ if Ext>=SSE movq .dty13,mm0 movq .dz13,mm1 -;temporaly conversion to keep upside down -; fninit -; fld .dx13 -; fistp .dx13 -; fld dword .dz13 -; fistp dword .dz13 -; fld .dbx13 -; fistp .dbx13 -; fld dword .dby13 -; fistp dword .dby13 -; fld .dex13 -; fistp .dex13 -; fld dword .dey13 -; fistp dword .dey13 -; fld .dtx13 -; fistp .dtx13 -; fld dword .dty13 -; fistp dword .dty13 - else mov ax,.x3 @@ -699,24 +662,6 @@ if Ext>=SSE movq .dz23,mm1 -;temporaly conversion to keep upside down -; fninit -; fld .dx23 -; fistp .dx23 -; fld dword .dz23 -; fistp dword .dz23 -; fld .dbx23 -; fistp .dbx23 -; fld dword .dby23 -; fistp dword .dby23 -; fld .dex23 -; fistp .dex23 -; fld dword .dey23 -; fistp dword .dey23 -; fld .dtx23 -; fistp .dtx23 -; fld dword .dty23 -; fistp dword .dty23 else mov ax,.x3 sub ax,.x2 @@ -794,57 +739,6 @@ end if ; sub esp,40 .bt_dx23_done: sub esp,64 -;if Ext>=SSE -; movsx eax,.x1 -; shl eax,ROUND -; cvtsi2ss xmm0,eax -; movss .cx1,xmm0 -; movss .cx2,xmm0 -; -; movsx ebx,word[.b_x1] -; shl ebx,ROUND -; cvtsi2ss xmm0,ebx -; movss .cbx1,xmm0 -; movss .cbx2,xmm0 -; -; movsx ecx,word[.b_y1] -; shl ecx,ROUND -; cvtsi2ss xmm0,ecx -; movss .cby1,xmm0 -; movss .cby2,xmm0 - -; movsx edx,word[.e_x1] -; shl edx,ROUND -; cvtsi2ss xmm0,edx -; movss .cex1,xmm0 -; movss .cex2,xmm0 -; -; movsx eax,word[.e_y1] -; shl eax,ROUND -; cvtsi2ss xmm0,eax -; movss .cey1,xmm0 -; movss .cey2,xmm0 -; - -; movsx ebx,.z1 -; shl ebx,CATMULL_SHIFT -; cvtsi2ss xmm0,ebx -; movss .cz1,xmm0 -; movss .cz2,xmm0 -; -; movsx ecx,word[.t_x1] -; shl ecx,ROUND -; cvtsi2ss xmm0,ecx -; movss .ctx1,xmm0 -; movss .ctx2,xmm0 - -; movsx edx,word[.t_y1] -; shl edx,ROUND -; cvtsi2ss xmm0,edx -; movss .cty1,xmm0 -; movss .cty2,xmm0 - -;else movsx eax,.x1 shl eax,ROUND @@ -902,14 +796,8 @@ end if mov .cty2,edx ; push edx ; push edx -;end if - movsx ecx,.y1 - cmp cx,.y2 - jge .loop12_done - .loop12: - call .call_line -;if Ext >= SSE +;if Ext >= SSE2 ; movups xmm0,.cby1 ; movups xmm1,.cty1 ; movups xmm2,.cby2 @@ -918,22 +806,36 @@ end if ; movups xmm5,.dty13 ; movups xmm6,.dby12 ; movups xmm7,.dty12 -; addps xmm0,xmm4 -; addps xmm1,xmm5 -; addps xmm2,xmm6 -; addps xmm3,xmm7 -; -;; addps xmm0,.dby12 -;; addps xmm1,.dty12 -;; addps xmm2,.dby13 -;; addps xmm3,.dty13 -; movups .cby1,xmm0 -; movups .cty1,xmm1 -; movups .cby2,xmm2 -; movups .cty2,xmm3 ;end if + movsx ecx,.y1 + cmp cx,.y2 + jge .loop12_done + .loop12: +;if Ext >= SSE2 +; fxsave [sse_repository] +;end if + call .call_line +if Ext >= SSE2 +; fxrstor [sse_repository] + movups xmm0,.cby1 + movups xmm1,.cty1 + movups xmm2,.cby2 + movups xmm3,.cty2 + movups xmm4,.dby13 + movups xmm5,.dty13 + movups xmm6,.dby12 + movups xmm7,.dty12 + paddd xmm0,xmm4 + paddd xmm1,xmm5 + paddd xmm2,xmm6 + paddd xmm3,xmm7 + movups .cby1,xmm0 + movups .cty1,xmm1 + movups .cby2,xmm2 + movups .cty2,xmm3 +end if -if Ext >= MMX +if (Ext = MMX) | (Ext = SSE) movq mm0,.cby2 movq mm1,.cby1 movq mm2,.cey2 @@ -1005,7 +907,6 @@ end if cmp cx,.y3 jge .loop23_done -;if Ext < SSE movsx eax,.z2 shl eax,CATMULL_SHIFT @@ -1038,54 +939,7 @@ end if movzx ebx,word[.t_y2] shl ebx,ROUND mov .cty2,ebx -;else -; movsx eax,.z2 -; shl eax,CATMULL_SHIFT -; cvtsi2ss xmm0,eax -; movss .cz2,xmm0 -; -; movsx ebx,.x2 -; shl ebx,ROUND -; cvtsi2ss xmm1,ebx -; movss .cx2,xmm1 -; -; movzx edx,word[.b_x2] -; shl edx,ROUND -; cvtsi2ss xmm2,edx -; movss .cbx2,xmm2 -; -; movzx eax,word[.b_y2] -; shl eax,ROUND -; cvtsi2ss xmm0,eax -; movss .cby2,xmm0 -; -; movzx ebx,word[.e_x2] -; shl ebx,ROUND -; cvtsi2ss xmm1,ebx -; movss .cex2,xmm1 -; -; movzx edx,word[.e_y2] -; shl edx,ROUND -; cvtsi2ss xmm2,edx -; movss .cey2,xmm2 -; -; movzx eax,word[.t_x2] -; shl eax,ROUND -; cvtsi2ss xmm0,eax -; movss .ctx2,xmm0 -; -; movzx ebx,word[.t_y2] -; shl ebx,ROUND -; cvtsi2ss xmm1,ebx -; movss .cty2,xmm1 - -;end if - - .loop23: - call .call_line - -;if Ext >= SSE - +;if Ext >= SSE2 ; movups xmm0,.cby1 ; movups xmm1,.cty1 ; movups xmm2,.cby2 @@ -1094,21 +948,34 @@ end if ; movups xmm5,.dty13 ; movups xmm6,.dby23 ; movups xmm7,.dty23 -; addps xmm0,xmm4 -; addps xmm1,xmm5 -; addps xmm2,xmm6 -; addps xmm3,xmm7 -; ; addps xmm0,.dby13 -; ; addps xmm1,.dty13 -; ; addps xmm2,.dby23 -; ; addps xmm3,.dty23 -; movups .cby1,xmm0 -; movups .cty1,xmm1 -; movups .cby2,xmm2 -; movups .cty2,xmm3 -; ;end if -if Ext >= MMX + .loop23: +;if Ext >= SSE2 +; fxsave [sse_repository] +;end if + call .call_line + +if Ext >= SSE2 +; fxrstor [sse_repository] + movups xmm0,.cby1 + movups xmm1,.cty1 + movups xmm2,.cby2 + movups xmm3,.cty2 + movups xmm4,.dby13 + movups xmm5,.dty13 + movups xmm6,.dby23 + movups xmm7,.dty23 + paddd xmm0,xmm4 + paddd xmm1,xmm5 + paddd xmm2,xmm6 + paddd xmm3,xmm7 + movups .cby1,xmm0 + movups .cty1,xmm1 + movups .cby2,xmm2 + movups .cty2,xmm3 +; +end if +if (Ext = MMX) | (Ext = SSE) movq mm0,.cby2 movq mm1,.cby1 movq mm2,.cey2 @@ -1180,90 +1047,7 @@ end if ret 50 .call_line: -;if Ext >= SSE -; pushad -; push .tex_ptr -; fninit -; fld dword .cty1 -; fistp dword [esp-4] -; sub esp,4 -;; push dword .cty1 -; fld .ctx1 -; fistp dword [esp-4] -; sub esp,4 -;; push .ctx1 -; fld dword .cz1 -; fistp dword [esp-4] -; sub esp,4 -; ; push dwod .cz1 -; fld dword .cty2 -; fistp dword [esp-4] -; sub esp,4 -; ; push .cty2 -; fld .ctx2 -; fistp dword [esp-4] -; sub esp,4 -; ; push dword .ctx2 -; fld dword .cz2 -; fistp dword [esp-4] -; sub esp,4 -;; push dword .cz2 -; -; push .z_buff -; push .t_emap -; push .t_bmap -;-------------------------------------- -; fld dword .cey2 -; fistp dword [esp-4] -; sub esp,4 -;; push dword .cey2 -; -; fld .cex2 -; fistp dword [esp-4] -; sub esp,4 -;; push .cex2 -; -; fld dword .cby2 -; fistp dword [esp-4] -; sub esp,4 -; ; push dword .cby2 -; fld .cbx2 -; fistp dword [esp-4] -; sub esp,4 - ; push .cbx2 -;------------------------------------ -; fld dword .cey1 -; fistp dword [esp-4] -; sub esp,4 -;; push dword .cey1 -; fld .cex1 -; fistp dword [esp-4] -; sub esp,4 -; ; push .cex1 -; fld dword .cby1 -; fistp dword [esp-4] -; sub esp,4 -;; push dword .cby1 -; fld .cbx1 -; fistp dword [esp-4] -; sub esp,4 -;; push .cbx1 -; push ecx - -; fld .cx1 -; fistp dword [esp-4] -; mov eax,[esp-4] -; sar eax,ROUND -; fld .cx2 -; fistp dword [esp-4] -; mov ebx,[esp-4] -; sar ebx,ROUND - -; call bump_tex_line_z -; -; popad -;else pushad push .tex_ptr push dword .cty1 @@ -1281,8 +1065,16 @@ ret 50 push .cbx2 push dword .cey1 push .cex1 +;if Ext >= SSE2 +; sub esp,8 +; shufps xmm0,xmm0,10110100b +; movhps [esp],xmm0 ;================================ +;else + push dword .cby1 push .cbx1 +;end if + push ecx mov eax,.cx1 diff --git a/programs/demos/3DS/DATA.INC b/programs/demos/3DS/DATA.INC index 388f0fc475..c3b11ea1ff 100644 --- a/programs/demos/3DS/DATA.INC +++ b/programs/demos/3DS/DATA.INC @@ -271,7 +271,7 @@ base_vector: if Ext=SSE2 db ' (SSE2)' end if - db ' 0.059',0 + db ' 0.060',0 labellen: STRdata db '-1 ' diff --git a/programs/demos/3DS/GRD_TEX.INC b/programs/demos/3DS/GRD_TEX.INC index 4577110c32..e530c1f2c0 100644 --- a/programs/demos/3DS/GRD_TEX.INC +++ b/programs/demos/3DS/GRD_TEX.INC @@ -486,7 +486,7 @@ end if ; pop ebp ebx eax popad -if Ext >= MMX +if (Ext = MMX)|(Ext=SSE) movq mm0,.cur1b movq mm1,.cur1r movq mm2,.scan_y1 @@ -505,7 +505,25 @@ if Ext >= MMX movq .cur2b,mm3 movq .cur2r,mm4 movq .scan_y2,mm5 -else +end if +if Ext >= SSE2 + movups xmm0,.cur1b + movups xmm1,.dc13b + movups xmm2,.cur2b + movups xmm3,.dc12b + movq mm2,.scan_y1 + movq mm5,.scan_y2 + paddd xmm0,xmm1 + paddd xmm2,xmm3 + paddd mm2,.tex_dy13 + paddd mm5,.tex_dy12 + movq .scan_y1,mm2 + movq .scan_y2,mm5 + movups .cur1b,xmm0 + movups .cur2b,xmm2 +end if + +if Ext = NON mov edx,.dc13b add .cur1b,edx mov esi,.dc13g @@ -602,7 +620,7 @@ end if popad -if Ext >= MMX +if (Ext = MMX)|(Ext=SSE) movq mm0,.cur1b movq mm1,.cur1r movq mm2,.scan_y1 @@ -621,7 +639,24 @@ if Ext >= MMX movq .cur2b,mm3 movq .cur2r,mm4 movq .scan_y2,mm5 -else +end if +if Ext >= SSE2 + movups xmm0,.cur1b + movups xmm1,.dc13b + movups xmm2,.cur2b + movups xmm3,.dc23b + movq mm2,.scan_y1 + movq mm5,.scan_y2 + paddd xmm0,xmm1 + paddd xmm2,xmm3 + paddd mm2,.tex_dy13 + paddd mm5,.tex_dy23 + movq .scan_y1,mm2 + movq .scan_y2,mm5 + movups .cur1b,xmm0 + movups .cur2b,xmm2 +end if +if Ext = NON mov edx,.dc13b add .cur1b,edx mov esi,.dc13g @@ -740,8 +775,8 @@ if Ext=NON mov ecx,dword .z1 xchg ecx, .z2 mov dword .z1, ecx - -else +end if +if (Ext=MMX) movq mm0,.b1 ; b, g movq mm1,.b2 movq .b1, mm1 @@ -755,6 +790,16 @@ else movq .tex_x1,mm5 movq .tex_x2,mm4 +end if +if Ext>=SSE + movups xmm0,.b1 + movups xmm1,.b2 + movups .b1,xmm1 + movups .b2,xmm0 + movq mm4,.tex_x1 ; x, z + movq mm5,.tex_x2 + movq .tex_x1,mm5 + movq .tex_x2,mm4 end if @@: diff --git a/programs/demos/3DS/TWO_TEX.INC b/programs/demos/3DS/TWO_TEX.INC index 1654d6975e..045af3320f 100644 --- a/programs/demos/3DS/TWO_TEX.INC +++ b/programs/demos/3DS/TWO_TEX.INC @@ -32,72 +32,68 @@ two_tex_triangle_z: ;---------------------- pointer io Z buffer----- ;-- Z-buffer - filled with coordinates as dword -------- ;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- -.b_x1 equ ebp+4 ; procedure don't save registers !!! -.b_y1 equ ebp+6 ; each coordinate as word -.b_x2 equ ebp+8 -.b_y2 equ ebp+10 ; b - first texture -.b_x3 equ ebp+12 -.b_y3 equ ebp+14 ; e - second texture -.e_x1 equ ebp+16 -.e_y1 equ ebp+18 -.e_x2 equ ebp+20 -.e_y2 equ ebp+22 -.e_x3 equ ebp+24 -.e_y3 equ ebp+26 -.z1 equ word[ebp+28] -.z2 equ word[ebp+30] -.z3 equ word[ebp+32] -.z_buff equ dword[ebp+34] ; pointer to Z-buffer +.b_x1 equ ebp+4 ; procedure don't save registers !!! +.b_y1 equ ebp+6 ; each coordinate as word +.b_x2 equ ebp+8 +.b_y2 equ ebp+10 ; b - first texture +.b_x3 equ ebp+12 +.b_y3 equ ebp+14 ; e - second texture +.e_x1 equ ebp+16 +.e_y1 equ ebp+18 +.e_x2 equ ebp+20 +.e_y2 equ ebp+22 +.e_x3 equ ebp+24 +.e_y3 equ ebp+26 +.z1 equ word[ebp+28] +.z2 equ word[ebp+30] +.z3 equ word[ebp+32] +.z_buff equ dword[ebp+34] ; pointer to Z-buffer -.t_bmap equ dword[ebp-4] ; pointer to b. texture -.t_emap equ dword[ebp-8] ; pointer to e. texture -.x1 equ word[ebp-10] -.y1 equ word[ebp-12] -.x2 equ word[ebp-14] -.y2 equ word[ebp-16] -.x3 equ word[ebp-18] -.y3 equ word[ebp-20] +.t_bmap equ dword[ebp-4] ; pointer to b. texture +.t_emap equ dword[ebp-8] ; pointer to e. texture +.x1 equ word[ebp-10] +.y1 equ word[ebp-12] +.x2 equ word[ebp-14] +.y2 equ word[ebp-16] +.x3 equ word[ebp-18] +.y3 equ word[ebp-20] .dx12 equ dword[ebp-24] .dbx12 equ dword[ebp-28] .dby12 equ dword[ebp-32] -.dby12q equ [ebp-32] +.dby12q equ [ebp-32] .dex12 equ dword[ebp-36] .dey12 equ dword[ebp-40] -.dey12q equ [ebp-40] +.dey12q equ [ebp-40] .dz12 equ dword[ebp-44] .dx13 equ dword[ebp-48] .dbx13 equ dword[ebp-52] .dby13 equ dword[ebp-56] -.dby13q equ [ebp-56] +.dby13q equ [ebp-56] .dex13 equ dword[ebp-60] .dey13 equ dword[ebp-64] -.dey13q equ [ebp-64] +.dey13q equ [ebp-64] .dz13 equ dword[ebp-68] .dx23 equ dword[ebp-72] .dbx23 equ dword[ebp-76] .dby23 equ dword[ebp-80] -.dby23q equ [ebp-80] +.dby23q equ [ebp-80] .dex23 equ dword[ebp-84] .dey23 equ dword[ebp-88] -.dey23q equ [ebp-88] +.dey23q equ [ebp-88] .dz23 equ dword[ebp-92] .cx1 equ dword[ebp-96] ; current variables .cx2 equ dword[ebp-100] -;.cbx1q equ [ebp-104] .cbx1 equ dword[ebp-104] .cby1 equ [ebp-108] -;.cbx2q [ebp-112] -.cbx2 equ dword[ebp-112] -.cby2 equ [ebp-116] -;.cex1q equ [ebp-120] -.cex1 equ dword[ebp-120] -.cey1 equ [ebp-124] -;.cex2q equ [ebp-128] +.cex1 equ dword[ebp-112] +.cey1 equ [ebp-116] +.cbx2 equ dword[ebp-120] +.cby2 equ [ebp-124] .cex2 equ dword[ebp-128] .cey2 equ [ebp-132] @@ -110,10 +106,10 @@ two_tex_triangle_z: cld end if mov ebp,esp - push edx esi ; store bump map + push edx esi ; store bump map ; push esi ; store e. map ; sub esp,120 - .sort3: ; sort triangle coordinates... + .sort3: ; sort triangle coordinates... cmp ax,bx jle .sort1 xchg eax,ebx @@ -127,30 +123,30 @@ two_tex_triangle_z: xchg dx,.z2 mov .z1,dx .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - mov edx,dword[.b_x2] - xchg edx,dword[.b_x3] - mov dword[.b_x2],edx - mov edx,dword[.e_x2] - xchg edx,dword[.e_x3] - mov dword[.e_x2],edx + cmp bx,cx + jle .sort2 + xchg ebx,ecx + mov edx,dword[.b_x2] + xchg edx,dword[.b_x3] + mov dword[.b_x2],edx + mov edx,dword[.e_x2] + xchg edx,dword[.e_x3] + mov dword[.e_x2],edx mov dx,.z2 xchg dx,.z3 mov .z2,dx - jmp .sort3 + jmp .sort3 .sort2: - push eax ebx ecx ; store triangle coords in variables + push eax ebx ecx ; store triangle coords in variables ; push ebx ; push ecx - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .loop23_done + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .loop23_done ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that ; or edx,ebx ; if any *one* of them is negative a sign flag is raised ; or edx,ecx @@ -165,44 +161,44 @@ two_tex_triangle_z: ; jg .loop23_done ; { - mov bx,.y2 ; calc delta 12 - sub bx,.y1 - jnz .bt_dx12_make - mov ecx,6 - xor edx,edx + mov bx,.y2 ; calc delta 12 + sub bx,.y1 + jnz .bt_dx12_make + mov ecx,6 + xor edx,edx @@: - push edx ;dword 0 - loop @b - jmp .bt_dx12_done + push edx ;dword 0 + loop @b + jmp .bt_dx12_done .bt_dx12_make: - mov ax,.x2 - sub ax,.x1 + mov ax,.x2 + sub ax,.x1 cwde - movsx ebx,bx - shl eax,ROUND + movsx ebx,bx + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dx12,eax - push eax + push eax if Ext=SSE - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps + sub esp,16 + cvtsi2ss xmm3,ebx ;rcps ; mov eax,255 cvtsi2ss xmm4,[i255d] ;eax - divss xmm3,xmm4 - rcpss xmm3,xmm3 + divss xmm3,xmm4 + rcpss xmm3,xmm3 ; mulss xmm3,xmm4 - shufps xmm3,xmm3,0 + shufps xmm3,xmm3,0 - movd mm0,[.b_x1] - movd mm1,[.b_x2] - movd mm2,[.e_x1] - movd mm3,[.e_x2] + movd mm0,[.b_x1] + movd mm1,[.b_x2] + movd mm2,[.e_x1] + movd mm3,[.e_x2] ; psubsw mm3,mm2 ; psubsw mm1,mm0 - pxor mm4,mm4 + pxor mm4,mm4 punpcklwd mm0,mm4 punpcklwd mm1,mm4 punpcklwd mm2,mm4 @@ -212,12 +208,12 @@ if Ext=SSE ; pslld mm2,ROUND ; pslld mm3,ROUND cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 + movlhps xmm0,xmm0 cvtpi2ps xmm0,mm2 cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 + movlhps xmm1,xmm1 cvtpi2ps xmm1,mm3 - subps xmm1,xmm0 + subps xmm1,xmm0 ; pxor mm4,mm4 ; movq mm5,mm1 @@ -245,13 +241,13 @@ if Ext=SSE ; movlhps xmm0,xmm0 ; cvtpi2ps xmm0,mm3 ; divps xmm1,xmm3 - mulps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 + mulps xmm1,xmm3 + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 cvtps2pi mm1,xmm1 - movq .dey12q,mm0 - movq .dby12q,mm1 + movq .dey12q,mm0 + movq .dby12q,mm1 ; movd .dex12,mm0 ; psrlq mm0,32 @@ -263,152 +259,152 @@ if Ext=SSE ; movd .dby12,mm0 else - mov ax,word[.b_x2] - sub ax,word[.b_x1] + mov ax,word[.b_x2] + sub ax,word[.b_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dbx12,eax - push eax + push eax - mov ax,word[.b_y2] - sub ax,word[.b_y1] + mov ax,word[.b_y2] + sub ax,word[.b_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dby12,eax - push eax + push eax ; mov eax,.dbx12 ; mov ebx,.dby12 ; int3 - mov ax,word[.e_x2] - sub ax,word[.e_x1] + mov ax,word[.e_x2] + sub ax,word[.e_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dex12,eax - push eax + push eax - mov ax,word[.e_y2] - sub ax,word[.e_y1] + mov ax,word[.e_y2] + sub ax,word[.e_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dey12,eax - push eax + push eax end if - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax + mov ax,.z2 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax .bt_dx12_done: - mov bx,.y3 ; calc delta13 - sub bx,.y1 - jnz .bt_dx13_make - mov ecx,6 - xor edx,edx + mov bx,.y3 ; calc delta13 + sub bx,.y1 + jnz .bt_dx13_make + mov ecx,6 + xor edx,edx @@: - push edx ;dword 0 - loop @b - jmp .bt_dx13_done + push edx ;dword 0 + loop @b + jmp .bt_dx13_done .bt_dx13_make: - mov ax,.x3 - sub ax,.x1 + mov ax,.x3 + sub ax,.x1 cwde - movsx ebx,bx - shl eax,ROUND + movsx ebx,bx + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dx13,eax - push eax + push eax if Ext=SSE cvtsi2ss xmm3,ebx ; mov eax,255 cvtsi2ss xmm4,[i255d] - divss xmm3,xmm4 - rcpss xmm3,xmm3 + divss xmm3,xmm4 + rcpss xmm3,xmm3 ; mulss xmm3,xmm4 - shufps xmm3,xmm3,0 - sub esp,16 + shufps xmm3,xmm3,0 + sub esp,16 - movd mm0,[.b_x1] - movd mm1,[.b_x3] - movd mm2,[.e_x1] - movd mm3,[.e_x3] + movd mm0,[.b_x1] + movd mm1,[.b_x3] + movd mm2,[.e_x1] + movd mm3,[.e_x3] - pxor mm4,mm4 + pxor mm4,mm4 punpcklwd mm0,mm4 punpcklwd mm1,mm4 punpcklwd mm2,mm4 punpcklwd mm3,mm4 cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 + movlhps xmm0,xmm0 cvtpi2ps xmm0,mm2 cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 + movlhps xmm1,xmm1 cvtpi2ps xmm1,mm3 - subps xmm1,xmm0 + subps xmm1,xmm0 ; divps xmm1,xmm3 - mulps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 + mulps xmm1,xmm3 + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 cvtps2pi mm1,xmm1 - movq .dey13q,mm0 - movq .dby13q,mm1 + movq .dey13q,mm0 + movq .dby13q,mm1 else - mov ax,word[.b_x3] - sub ax,word[.b_x1] + mov ax,word[.b_x3] + sub ax,word[.b_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dbx13,eax - push eax + push eax - mov ax,word[.b_y3] - sub ax,word[.b_y1] + mov ax,word[.b_y3] + sub ax,word[.b_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dby13,eax - push eax + push eax - mov ax,word[.e_x3] - sub ax,word[.e_x1] + mov ax,word[.e_x3] + sub ax,word[.e_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dex13,eax - push eax + push eax - mov ax,word[.e_y3] - sub ax,word[.e_y1] + mov ax,word[.e_y3] + sub ax,word[.e_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dey13,eax - push eax + push eax end if @@ -422,99 +418,99 @@ end if push eax .bt_dx13_done: - mov bx,.y3 ; calc delta23 - sub bx,.y2 - jnz .bt_dx23_make - mov ecx,6 - xor edx,edx + mov bx,.y3 ; calc delta23 + sub bx,.y2 + jnz .bt_dx23_make + mov ecx,6 + xor edx,edx @@: - push edx ;dword 0 - loop @b - jmp .bt_dx23_done + push edx ;dword 0 + loop @b + jmp .bt_dx23_done .bt_dx23_make: - mov ax,.x3 - sub ax,.x2 + mov ax,.x3 + sub ax,.x2 cwde - movsx ebx,bx - shl eax,ROUND + movsx ebx,bx + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dx23,eax - push eax + push eax if Ext=SSE cvtsi2ss xmm3,ebx ; mov eax,255 cvtsi2ss xmm4,[i255d] ;eax - divss xmm3,xmm4 - shufps xmm3,xmm3,0 - sub esp,16 + divss xmm3,xmm4 + shufps xmm3,xmm3,0 + sub esp,16 - movd mm0,[.b_x2] - movd mm1,[.b_x3] - movd mm2,[.e_x2] - movd mm3,[.e_x3] + movd mm0,[.b_x2] + movd mm1,[.b_x3] + movd mm2,[.e_x2] + movd mm3,[.e_x3] - pxor mm4,mm4 + pxor mm4,mm4 punpcklwd mm0,mm4 punpcklwd mm1,mm4 punpcklwd mm2,mm4 punpcklwd mm3,mm4 cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 + movlhps xmm0,xmm0 cvtpi2ps xmm0,mm2 cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 + movlhps xmm1,xmm1 cvtpi2ps xmm1,mm3 - subps xmm1,xmm0 + subps xmm1,xmm0 - divps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 + divps xmm1,xmm3 + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 cvtps2pi mm1,xmm1 - movq .dey23q,mm0 - movq .dby23q,mm1 + movq .dey23q,mm0 + movq .dby23q,mm1 else - mov ax,word[.b_x3] - sub ax,word[.b_x2] + mov ax,word[.b_x3] + sub ax,word[.b_x2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dbx23,eax - push eax + push eax - mov ax,word[.b_y3] - sub ax,word[.b_y2] + mov ax,word[.b_y3] + sub ax,word[.b_y2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dby23,eax - push eax + push eax - mov ax,word[.e_x3] - sub ax,word[.e_x2] + mov ax,word[.e_x3] + sub ax,word[.e_x2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dex23,eax - push eax + push eax - mov ax,word[.e_y3] - sub ax,word[.e_y2] + mov ax,word[.e_y3] + sub ax,word[.e_y2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dey23,eax - push eax + push eax end if mov ax,.z3 sub ax,.z2 @@ -526,214 +522,230 @@ end if push eax ; sub esp,40 .bt_dx23_done: - movsx eax,.x1 - shl eax,ROUND + movsx eax,.x1 + shl eax,ROUND ; mov .cx1,eax ; mov .cx2,eax - push eax eax + push eax eax ; push eax - movsx eax,word[.b_x1] - shl eax,ROUND - mov .cbx1,eax - mov .cbx2,eax + movsx eax,word[.b_x1] + shl eax,ROUND + mov .cbx1,eax + mov .cbx2,eax ; push eax eax ; push eax - movsx eax,word[.b_y1] - shl eax,ROUND - mov .cby1,eax - mov .cby2,eax + movsx eax,word[.b_y1] + shl eax,ROUND + mov .cby1,eax + mov .cby2,eax ; push eax eax ; push eax - movsx eax,word[.e_x1] - shl eax,ROUND - mov .cex1,eax - mov .cex2,eax + movsx eax,word[.e_x1] + shl eax,ROUND + mov .cex1,eax + mov .cex2,eax ; push eax eax ;push eax - movsx eax,word[.e_y1] - shl eax,ROUND - mov .cey1,eax - mov .cey2,eax - sub esp,32 + movsx eax,word[.e_y1] + shl eax,ROUND + mov .cey1,eax + mov .cey2,eax + sub esp,32 ; push eax eax ;push eax - movsx eax,.z1 - shl eax,CATMULL_SHIFT + movsx eax,.z1 + shl eax,CATMULL_SHIFT ; mov .cz1,eax ; mov .cz2,eax push eax eax ;push eax - movsx ecx,.y1 - cmp cx,.y2 - jge .loop12_done + movsx ecx,.y1 + cmp cx,.y2 + jge .loop12_done .loop12: - call .call_line + call .call_line - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx12 - add .cx2,ebx - -if Ext >= MMX - movq mm0,.cby2 ; with this optimization object - movq mm1,.cby1 ; looks bit annoying - movq mm2,.cey2 - movq mm3,.cey1 - paddd mm0,.dby12q - paddd mm1,.dby13q - paddd mm2,.dey12q - paddd mm3,.dey13q - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey1,mm3 - movq .cey2,mm2 + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx12 + add .cx2,ebx +if Ext>= SSE2 + movups xmm0,.cey1 + movups xmm1,.cey2 + movups xmm2,.dey12q + movups xmm3,.dey13q + paddd xmm0,xmm3 + paddd xmm1,xmm2 + movups .cey1,xmm0 + movups .cey2,xmm1 +else if (Ext = MMX) | (Ext=SSE) + movq mm0,.cby2 ; with this optimization object + movq mm1,.cby1 ; looks bit annoying + movq mm2,.cey2 + movq mm3,.cey1 + paddd mm0,.dby12q + paddd mm1,.dby13q + paddd mm2,.dey12q + paddd mm3,.dey13q + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey1,mm3 + movq .cey2,mm2 else - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx12 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby12 - add .cby2,edx + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx12 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby12 + add .cby2,edx - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex12 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey12 - add .cey2,eax + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex12 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey12 + add .cey2,eax end if - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz12 - add .cz2,edx + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz12 + add .cz2,edx - inc ecx - cmp cx,.y2 - jl .loop12 + inc ecx + cmp cx,.y2 + jl .loop12 .loop12_done: - movsx ecx,.y2 - cmp cx,.y3 - jge .loop23_done + movsx ecx,.y2 + cmp cx,.y3 + jge .loop23_done - movsx eax,.z2 - shl eax,CATMULL_SHIFT - mov .cz2,eax + movsx eax,.z2 + shl eax,CATMULL_SHIFT + mov .cz2,eax - movsx eax,.x2 - shl eax,ROUND - mov .cx2,eax + movsx eax,.x2 + shl eax,ROUND + mov .cx2,eax - movzx eax,word[.b_x2] - shl eax,ROUND - mov .cbx2,eax + movzx eax,word[.b_x2] + shl eax,ROUND + mov .cbx2,eax - movzx eax,word[.b_y2] - shl eax,ROUND - mov .cby2,eax + movzx eax,word[.b_y2] + shl eax,ROUND + mov .cby2,eax - movzx eax,word[.e_x2] - shl eax,ROUND - mov .cex2,eax + movzx eax,word[.e_x2] + shl eax,ROUND + mov .cex2,eax - movzx eax,word[.e_y2] - shl eax,ROUND - mov .cey2,eax + movzx eax,word[.e_y2] + shl eax,ROUND + mov .cey2,eax .loop23: - call .call_line + call .call_line ;if Ext = NON - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx23 - add .cx2,ebx - -if Ext >= MMX - movq mm0,.cby2 ; with this mmx optimization object looks bit - movq mm1,.cby1 ; annoying - movq mm2,.cey2 - movq mm3,.cey1 - paddd mm0,.dby23q - paddd mm1,.dby13q - paddd mm2,.dey23q - paddd mm3,.dey13q - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey2,mm2 - movq .cey1,mm3 + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx23 + add .cx2,ebx +if Ext>= SSE2 + movups xmm0,.cey1 + movups xmm1,.cey2 + movups xmm2,.dey23q + movups xmm3,.dey13q + paddd xmm0,xmm3 + paddd xmm1,xmm2 + movups .cey1,xmm0 + movups .cey2,xmm1 +else if (Ext = MMX) | ( Ext = SSE) + movq mm0,.cby2 ; with this mmx optimization object looks bit + movq mm1,.cby1 ; annoying + movq mm2,.cey2 + movq mm3,.cey1 + paddd mm0,.dby23q + paddd mm1,.dby13q + paddd mm2,.dey23q + paddd mm3,.dey13q + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey2,mm2 + movq .cey1,mm3 else - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx23 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby23 - add .cby2,edx + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx23 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby23 + add .cby2,edx - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex23 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey23 - add .cey2,eax + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex23 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey23 + add .cey2,eax end if - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz23 - add .cz2,edx + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz23 + add .cz2,edx ;else ; movq mm0,.db13q ; movq mm1,.cbx1q - inc ecx - cmp cx,.y3 - jl .loop23 + inc ecx + cmp cx,.y3 + jl .loop23 .loop23_done: - mov esp,ebp + mov esp,ebp ret 34 .call_line: pushad - push .cz1 - push .cz2 - push .z_buff - push .t_bmap - push .t_emap - push dword .cey2 - push .cex2 - push dword .cey1 - push .cex1 - push dword .cby2 - push .cbx2 - push dword .cby1 - push .cbx1 - push ecx + push .cz1 + push .cz2 + push .z_buff + push .t_bmap + push .t_emap + push dword .cey2 + push .cex2 + push dword .cey1 + push .cex1 + push dword .cby2 + push .cbx2 + push dword .cby1 + push .cbx1 + push ecx - mov eax,.cx1 - sar eax,ROUND - mov ebx,.cx2 - sar ebx,ROUND + mov eax,.cx1 + sar eax,ROUND + mov ebx,.cx2 + sar ebx,ROUND - call two_tex_line_z + call two_tex_line_z popad ret @@ -742,92 +754,92 @@ two_tex_line_z: ;-------------- ebx - x2 ;-------------- edi - pointer to screen buffer ;stack - another parameters : -.y equ dword [ebp+4] -.bx1 equ [ebp+8] ; --- -.by1 equ [ebp+12] ; | -.bx2 equ [ebp+16] ; | -.by2 equ [ebp+20] ; |> b. texture and e. texture coords -.ex1 equ [ebp+24] ; |> shifted shl ROUND -.ey1 equ [ebp+28] ; | -.ex2 equ [ebp+32] ; | -.ey2 equ [ebp+36] ; --- -.emap equ [ebp+40] ; b texture offset -.bmap equ [ebp+44] ; e texture offset +.y equ dword [ebp+4] +.bx1 equ [ebp+8] ; --- +.by1 equ [ebp+12] ; | +.bx2 equ [ebp+16] ; | +.by2 equ [ebp+20] ; |> b. texture and e. texture coords +.ex1 equ [ebp+24] ; |> shifted shl ROUND +.ey1 equ [ebp+28] ; | +.ex2 equ [ebp+32] ; | +.ey2 equ [ebp+36] ; --- +.emap equ [ebp+40] ; b texture offset +.bmap equ [ebp+44] ; e texture offset .z_buff equ dword [ebp+48] -.z2 equ dword [ebp+52] ; -- |> z coords shifted -.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT +.z2 equ dword [ebp+52] ; -- |> z coords shifted +.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT -.x1 equ dword [ebp-4] -.x2 equ dword [ebp-8] -.dbx equ [ebp-12] -.dex equ [ebp-16] -.dby equ [ebp-20] -.dey equ [ebp-24] -.dz equ dword [ebp-28] -.cbx equ [ebp-32] -.cex equ [ebp-36] -.cby equ [ebp-40] -.cey equ [ebp-44] -.cz equ dword [ebp-48] +.x1 equ dword [ebp-4] +.x2 equ dword [ebp-8] +.dbx equ [ebp-12] +.dex equ [ebp-16] +.dby equ [ebp-20] +.dey equ [ebp-24] +.dz equ dword [ebp-28] +.cbx equ [ebp-32] +.cex equ [ebp-36] +.cby equ [ebp-40] +.cey equ [ebp-44] +.cz equ dword [ebp-48] .czbuff equ dword [ebp-52] - mov ebp,esp + mov ebp,esp - mov ecx,.y - or ecx,ecx - jl .bl_end - cmp ecx,SIZE_Y - jge .bl_end + mov ecx,.y + or ecx,ecx + jl .bl_end + cmp ecx,SIZE_Y + jge .bl_end - cmp eax,ebx - jl @f - je .bl_end + cmp eax,ebx + jl @f + je .bl_end - xchg eax,ebx + xchg eax,ebx if Ext=NON - mov edx,.bx1 - xchg edx,.bx2 - mov .bx1,edx - mov edx,.by1 - xchg edx,.by2 - mov .by1,edx + mov edx,.bx1 + xchg edx,.bx2 + mov .bx1,edx + mov edx,.by1 + xchg edx,.by2 + mov .by1,edx - mov edx,.ex1 - xchg edx,.ex2 - mov .ex1,edx - mov edx,.ey1 - xchg edx,.ey2 - mov .ey1,edx + mov edx,.ex1 + xchg edx,.ex2 + mov .ex1,edx + mov edx,.ey1 + xchg edx,.ey2 + mov .ey1,edx else - movq mm0,.bx1 - movq mm1,.ex1 - movq mm2,.bx2 - movq mm3,.ex2 - movq .bx2,mm0 - movq .ex2,mm1 - movq .bx1,mm2 - movq .ex1,mm3 + movq mm0,.bx1 + movq mm1,.ex1 + movq mm2,.bx2 + movq mm3,.ex2 + movq .bx2,mm0 + movq .ex2,mm1 + movq .bx1,mm2 + movq .ex1,mm3 end if - mov edx,.z1 - xchg edx,.z2 - mov .z1,edx + mov edx,.z1 + xchg edx,.z2 + mov .z1,edx @@: - push eax ebx + push eax ebx ; push ebx ;store x1, x2 - cmp .x1,SIZE_X - jge .bl_end - cmp .x2,0 - jle .bl_end + cmp .x1,SIZE_X + jge .bl_end + cmp .x2,0 + jle .bl_end - mov ebx,.x2 - sub ebx,.x1 + mov ebx,.x2 + sub ebx,.x1 if Ext>=SSE - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps - shufps xmm3,xmm3,0 + sub esp,16 + cvtsi2ss xmm3,ebx ;rcps + shufps xmm3,xmm3,0 ; movq mm0,.bx1q ; movq mm1,.bx2q @@ -840,107 +852,107 @@ if Ext>=SSE ; cvtpi2ps xmm1,mm3 cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1 - movlhps xmm0,xmm0 + movlhps xmm0,xmm0 cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1 cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2 - movlhps xmm1,xmm1 + movlhps xmm1,xmm1 cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2 - subps xmm1,xmm0 - ; hi lo - divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex + subps xmm1,xmm0 + ; hi lo + divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex - shufps xmm1,xmm1,11011000b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 + shufps xmm1,xmm1,11011000b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 cvtps2pi mm1,xmm1 - movq .dex,mm0 ; hi - lo -> dbx, dex - movq .dey,mm1 ; hi - lo -> dby, dey + movq .dex,mm0 ; hi - lo -> dbx, dex + movq .dey,mm1 ; hi - lo -> dby, dey else - mov eax,.bx2 ; calc .dbx - sub eax,.bx1 - cdq - idiv ebx - push eax + mov eax,.bx2 ; calc .dbx + sub eax,.bx1 + cdq + idiv ebx + push eax - mov eax,.ex2 ; calc .dby - sub eax,.ex1 - cdq - idiv ebx - push eax + mov eax,.ex2 ; calc .dby + sub eax,.ex1 + cdq + idiv ebx + push eax - mov eax,.by2 ; calc .dex - sub eax,.by1 - cdq - idiv ebx - push eax + mov eax,.by2 ; calc .dex + sub eax,.by1 + cdq + idiv ebx + push eax - mov eax,.ey2 ; calc .dey - sub eax,.ey1 - cdq - idiv ebx - push eax + mov eax,.ey2 ; calc .dey + sub eax,.ey1 + cdq + idiv ebx + push eax end if - mov eax,.z2 ; calc .dz - sub eax,.z1 - cdq - idiv ebx - push eax + mov eax,.z2 ; calc .dz + sub eax,.z1 + cdq + idiv ebx + push eax - cmp .x1,0 ; set correctly begin variable - jge @f ; CLIPPING ON FUNCTION - ; cutting triangle exceedes screen - mov ebx,.x1 - neg ebx - imul ebx ; eax = .dz * abs(.x1) - add .z1,eax - mov .x1,0 + cmp .x1,0 ; set correctly begin variable + jge @f ; CLIPPING ON FUNCTION + ; cutting triangle exceedes screen + mov ebx,.x1 + neg ebx + imul ebx ; eax = .dz * abs(.x1) + add .z1,eax + mov .x1,0 - mov eax,.dbx - imul ebx - add .bx1,eax + mov eax,.dbx + imul ebx + add .bx1,eax - mov eax,.dby - imul ebx - add .by1,eax + mov eax,.dby + imul ebx + add .by1,eax - mov eax,.dex - imul ebx - add .ex1,eax + mov eax,.dex + imul ebx + add .ex1,eax - mov eax,.dey - imul ebx - add .ey1,eax + mov eax,.dey + imul ebx + add .ey1,eax @@: - cmp .x2,SIZE_X - jl @f - mov .x2,SIZE_X + cmp .x2,SIZE_X + jl @f + mov .x2,SIZE_X @@: - mov eax,SIZE_X ;calc memory begin in buffers - mov ebx,.y - mul ebx - mov ebx,.x1 - add eax,ebx - mov ebx,eax - lea eax,[eax*3] - add edi,eax ; edi - screen - mov esi,.z_buff ; z-buffer filled with dd variables - shl ebx,2 - add esi,ebx ; esi - Z buffer + mov eax,SIZE_X ;calc memory begin in buffers + mov ebx,.y + mul ebx + mov ebx,.x1 + add eax,ebx + mov ebx,eax + lea eax,[eax*3] + add edi,eax ; edi - screen + mov esi,.z_buff ; z-buffer filled with dd variables + shl ebx,2 + add esi,ebx ; esi - Z buffer - mov ecx,.x2 - sub ecx,.x1 - ; init current variables - push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi - push dword .ex1 - push dword .by1 - push dword .ey1 + mov ecx,.x2 + sub ecx,.x1 + ; init current variables + push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi + push dword .ex1 + push dword .by1 + push dword .ey1 - push .z1 ; current z shl CATMULL_SHIFT - push esi + push .z1 ; current z shl CATMULL_SHIFT + push esi if Ext >= MMX pxor mm0,mm0 @@ -959,104 +971,104 @@ end if .draw: ; if TEX = SHIFTING ;bump drawing only in shifting mode if Ext=NON - mov esi,.czbuff ; .czbuff current address in buffer - mov ebx,.cz ; .cz - cur z position - cmp ebx,dword[esi] + mov esi,.czbuff ; .czbuff current address in buffer + mov ebx,.cz ; .cz - cur z position + cmp ebx,dword[esi] else - mov ebx,.cz - cmp ebx,dword[edx] + mov ebx,.cz + cmp ebx,dword[edx] end if - jge .skip + jge .skip if Ext=NON - mov eax,.cby - mov esi,.cbx - sar eax,ROUND - sar esi,ROUND - shl eax,TEX_SHIFT ;- - add esi,eax - lea esi,[esi*3] ;- ; esi - current b. texture addres - add esi,.bmap + mov eax,.cby + mov esi,.cbx + sar eax,ROUND + sar esi,ROUND + shl eax,TEX_SHIFT ;- + add esi,eax + lea esi,[esi*3] ;- ; esi - current b. texture addres + add esi,.bmap - mov ebx,.cex ;.cex - current env map X - mov eax,.cey ;.cey - current env map y - sar ebx,ROUND - sar eax,ROUND + mov ebx,.cex ;.cex - current env map X + mov eax,.cey ;.cey - current env map y + sar ebx,ROUND + sar eax,ROUND - shl eax,TEX_SHIFT - add ebx,eax - lea ebx,[ebx*3] - add ebx,.emap + shl eax,TEX_SHIFT + add ebx,eax + lea ebx,[ebx*3] + add ebx,.emap else - movq mm5,mm4 ;.cey - psrad mm5,ROUND - pslld mm5,TEX_SHIFT - movq mm6,mm3 ;.cex - psrad mm6,ROUND - paddd mm5,mm6 - movq mm6,mm5 - paddd mm5,mm5 - paddd mm5,mm6 - paddd mm5,.emap - movd esi,mm5 - psrlq mm5,32 - movd ebx,mm5 + movq mm5,mm4 ;.cey + psrad mm5,ROUND + pslld mm5,TEX_SHIFT + movq mm6,mm3 ;.cex + psrad mm6,ROUND + paddd mm5,mm6 + movq mm6,mm5 + paddd mm5,mm5 + paddd mm5,mm6 + paddd mm5,.emap + movd esi,mm5 + psrlq mm5,32 + movd ebx,mm5 end if if Ext>=MMX - movd mm1,[esi] - movd mm2,[ebx] - punpcklbw mm1,mm0 - punpcklbw mm2,mm0 - pmullw mm1,mm2 - psrlw mm1,8 - packuswb mm1,mm0 - movd [edi],mm1 - mov ebx,.cz - mov dword[edx],ebx + movd mm1,[esi] + movd mm2,[ebx] + punpcklbw mm1,mm0 + punpcklbw mm2,mm0 + pmullw mm1,mm2 + psrlw mm1,8 + packuswb mm1,mm0 + movd [edi],mm1 + mov ebx,.cz + mov dword[edx],ebx else - cld ; esi - tex e. - lodsb ; ebx - tex b. - mov dl,[ebx] - mul dl - shr ax,8 - stosb - inc ebx - lodsb - mov dl,[ebx] - mul dl - shr ax,8 - stosb - inc ebx - lodsb - mov dl,[ebx] - mul dl - shr ax,8 - stosb - mov ebx,.cz - mov esi,.czbuff - mov dword[esi],ebx - jmp .no_skip + cld ; esi - tex e. + lodsb ; ebx - tex b. + mov dl,[ebx] + mul dl + shr ax,8 + stosb + inc ebx + lodsb + mov dl,[ebx] + mul dl + shr ax,8 + stosb + inc ebx + lodsb + mov dl,[ebx] + mul dl + shr ax,8 + stosb + mov ebx,.cz + mov esi,.czbuff + mov dword[esi],ebx + jmp .no_skip end if .skip: - add edi,3 + add edi,3 if Ext = NON .no_skip: - add .czbuff,4 - mov eax,.dbx - add .cbx,eax - mov eax,.dby - add .cby,eax - mov eax,.dex - add .cex,eax - mov eax,.dey - add .cey,eax + add .czbuff,4 + mov eax,.dbx + add .cbx,eax + mov eax,.dby + add .cby,eax + mov eax,.dex + add .cex,eax + mov eax,.dey + add .cey,eax else - add edx,4 - paddd mm3,.dex - paddd mm4,.dey + add edx,4 + paddd mm3,.dex + paddd mm4,.dey ; movq mm5,mm3 ; movq mm6,mm4 ; psrad mm5,ROUND @@ -1064,16 +1076,16 @@ end if ; movq .cex,mm3 ; movq .cey,mm4 end if - mov eax,.dz - add .cz,eax + mov eax,.dz + add .cz,eax if Ext = NON - dec ecx - jnz .draw + dec ecx + jnz .draw else - loop .draw + loop .draw end if .bl_end: - mov esp,ebp + mov esp,ebp ret 56 diff --git a/programs/demos/3DS/VIEW3DS.ASM b/programs/demos/3DS/VIEW3DS.ASM index 05860b2ccc..84ae2d4532 100644 --- a/programs/demos/3DS/VIEW3DS.ASM +++ b/programs/demos/3DS/VIEW3DS.ASM @@ -1,5 +1,5 @@ -; application : View3ds ver. 0.059 - tiny .3ds files viewer. +; application : View3ds ver. 0.060 - tiny .3ds files viewer. ; compiler : FASM ; system : KolibriOS ; author : Macgub aka Maciej Guba @@ -35,7 +35,7 @@ NON = 0 ; -/ \- MMX = 1 SSE = 2 SSE2 = 3 -Ext = MMX ;Ext={ NON | MMX | SSE | SSE2 } +Ext = MMX ;Ext={ NON | MMX | SSE | SSE2 } ; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) USE_LFN = 1 diff --git a/programs/demos/3DS/readme.txt b/programs/demos/3DS/readme.txt index e701010755..36f0ad39aa 100644 --- a/programs/demos/3DS/readme.txt +++ b/programs/demos/3DS/readme.txt @@ -1,11 +1,8 @@ -View3ds 0.059 - tiny viewer to .3ds files. +View3ds 0.060 - tiny viewer to .3ds files. What's new? -1. Bump and pararell two texture mapping functions optimizations. - (files bump_cat.inc & two_tex.inc) - On my P4 changes are rather non visable, but on dual core in KlbrInWin - optimizations runs preety nice. - +1. Header fix by Leency. +2. SSE2 optimizations by me. (Most visable in BUMP_TEX mode.) Buttons description: 1. rotary: choosing rotary axle: x, y, x+y. @@ -33,4 +30,4 @@ Buttons description: 18. re-map tex -> re-map texture and bump map coordinates, to change spherical mapping around axle use 'xchg' and 'mirror' buttons, then press 're-map tex' button. - Macgub Jun 2011 + Macgub Aug 2011