diff --git a/programs/demos/3DS/BUMP3.INC b/programs/demos/3DS/BUMP3.INC index 4db1fed748..c86f068c64 100644 --- a/programs/demos/3DS/BUMP3.INC +++ b/programs/demos/3DS/BUMP3.INC @@ -436,13 +436,13 @@ bump_line: ;-------------- edi - pointer to screen buffer ;stack - another parameters : .y equ dword [ebp+4] -.bx1 equ dword [ebp+8] ; --- +.bx1 equ [ebp+8] ; --- .by1 equ dword [ebp+12] ; | -.bx2 equ dword [ebp+16] ; | +.bx2 equ [ebp+16] ; | .by2 equ dword [ebp+20] ; |> bump and env coords -.ex1 equ dword [ebp+24] ; |> shifted shl ROUND +.ex1 equ [ebp+24] ; |> shifted shl ROUND .ey1 equ dword [ebp+28] ; | -.ex2 equ dword [ebp+32] ; | +.ex2 equ [ebp+32] ; | .ey2 equ dword [ebp+36] ; --- .bmap equ dword [ebp+40] .emap equ dword [ebp+44] @@ -450,13 +450,13 @@ bump_line: .x1 equ dword [ebp-4] .x2 equ dword [ebp-8] .dbx equ dword [ebp-12] -.dby equ dword [ebp-16] +.dby equ [ebp-16] .dex equ dword [ebp-20] -.dey equ dword [ebp-24] +.dey equ [ebp-24] .cbx equ dword [ebp-28] -.cby equ dword [ebp-32] +.cby equ [ebp-32] .cex equ dword [ebp-36] -.cey equ dword [ebp-40] +.cey equ [ebp-40] mov ebp,esp mov ecx,.y @@ -470,7 +470,7 @@ bump_line: je .bl_end xchg eax,ebx - +if Ext = NON mov edx,.bx1 xchg edx,.bx2 mov .bx1,edx @@ -484,6 +484,16 @@ bump_line: mov edx,.ey1 xchg edx,.ey2 mov .ey1,edx +else + movq mm0,.bx1 + movq mm1,.bx2 + movq mm2,.ex1 + movq mm3,.ex2 + movq .bx2,mm0 + movq .bx1,mm1 + movq .ex1,mm3 + movq .ex2,mm2 +end if .bl_ok: push eax push ebx ;store x1, x2 @@ -523,9 +533,9 @@ bump_line: idiv ecx push eax - push .bx1 + push dword .bx1 push .by1 - push .ex1 + push dword .ex1 push .ey1 .draw: ; if TEX = SHIFTING ;bump drawing only in shifting mode @@ -591,7 +601,14 @@ bump_line: .put_pixel: stosd dec edi - +;if Ext >= MMX +; movq mm0,.cby +; movq mm1,.cey +; paddd mm0,.dby +; paddd mm1,.dey +; movq .cby,mm0 +; movq .cey,mm1 +;else mov eax,.dbx add .cbx,eax mov eax,.dby @@ -600,7 +617,7 @@ bump_line: add .cex,eax mov eax,.dey add .cey,eax - +;end if dec ecx jnz .draw ; end if diff --git a/programs/demos/3DS/BUMP_CAT.INC b/programs/demos/3DS/BUMP_CAT.INC index aa0eb34ce6..0ab609f52e 100644 --- a/programs/demos/3DS/BUMP_CAT.INC +++ b/programs/demos/3DS/BUMP_CAT.INC @@ -710,20 +710,14 @@ bump_line_z: ;-------------- edi - pointer to screen buffer ;stack - another parameters : .y equ dword [ebp+4] -.bx1q equ [ebp+8] -.bx2q equ [ebp+16] -.ex1q equ [ebp+24] -.ex2q equ [ebp+32] -.bx1 equ dword [ebp+8] ; --- +.bx1 equ [ebp+8] ; --- .by1 equ dword [ebp+12] ; | -.bx2 equ dword [ebp+16] ; | +.bx2 equ [ebp+16] ; | .by2 equ dword [ebp+20] ; |> bump and env coords -.ex1 equ dword [ebp+24] ; |> shifted shl ROUND +.ex1 equ [ebp+24] ; |> shifted shl ROUND .ey1 equ dword [ebp+28] ; | -.ex2 equ dword [ebp+32] ; | +.ex2 equ [ebp+32] ; | .ey2 equ dword [ebp+36] ; --- -;.bx1q equ qword [ebp+8] ; - new -;.ex1q equ qword [ebp+24] ; - new .bmap equ dword [ebp+40] .emap equ dword [ebp+44] .z_buff equ dword [ebp+48] @@ -733,18 +727,14 @@ bump_line_z: .x1 equ dword [ebp-4] .x2 equ dword [ebp-8] .dbx equ dword [ebp-12] -.dby equ dword [ebp-16] -.dbyq equ qword [ebp-16] ; - new +.dby equ [ebp-16] .dex equ dword [ebp-20] -.dey equ dword [ebp-24] -.deyq equ qword [ebp-24] ; - new +.dey equ [ebp-24] .dz equ dword [ebp-28] .cbx equ dword [ebp-32] -.cby equ dword [ebp-36] -.cbyq equ qword [ebp-36] ; - new +.cby equ [ebp-36] .cex equ dword [ebp-40] -.cey equ dword [ebp-44] -.ceyq equ qword [ebp-44] ; - new +.cey equ [ebp-44] .cz equ dword [ebp-48] .czbuff equ dword [ebp-52] .temp1 equ ebp-60 @@ -782,14 +772,14 @@ if Ext=NON mov .ey1,edx else - movq mm0,.bx1q - movq mm1,.ex1q - movq mm2,.bx2q - movq mm3,.ex2q - movq .bx2q,mm0 - movq .ex2q,mm1 - movq .bx1q,mm2 - movq .ex1q,mm3 + movq mm0,.bx1 + movq mm1,.ex1 + movq mm2,.bx2 + movq mm3,.ex2 + movq .bx2,mm0 + movq .ex2,mm1 + movq .bx1,mm2 + movq .ex1,mm3 end if @@ -816,12 +806,12 @@ if Ext >= SSE cvtsi2ss xmm3,ebx ;rcps shufps xmm3,xmm3,0 - cvtpi2ps xmm0,.bx1q ;mm0 + cvtpi2ps xmm0,.bx1 ;mm0 movlhps xmm0,xmm0 - cvtpi2ps xmm0,.ex1q ;mm2 - cvtpi2ps xmm1,.bx2q ;mm1 + cvtpi2ps xmm0,.ex1 ;mm2 + cvtpi2ps xmm1,.bx2 ;mm1 movlhps xmm1,xmm1 - cvtpi2ps xmm1,.ex2q ;mm3 + cvtpi2ps xmm1,.ex2 ;mm3 subps xmm1,xmm0 divps xmm1,xmm3 @@ -830,8 +820,8 @@ if Ext >= SSE cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords movhlps xmm1,xmm1 cvtps2pi mm1,xmm1 - movq .deyq,mm0 - movq .dbyq,mm1 + movq .dey,mm0 + movq .dby,mm1 else @@ -911,9 +901,9 @@ end if mov ecx,.x2 sub ecx,.x1 ; init current variables - push .bx1 + push dword .bx1 push .by1 - push .ex1 + push dword .ex1 push .ey1 push .z1 ; current z shl CATMULL_SHIFT @@ -1072,6 +1062,14 @@ end if .no_skip: add .czbuff,4 ;; if Ext = NON +if Ext >= MMX + movq mm0,.cby + movq mm1,.cey + paddd mm0,.dby + paddd mm1,.dey + movq .cby,mm0 + movq .cey,mm1 +else mov eax,.dbx add .cbx,eax mov eax,.dby @@ -1080,6 +1078,7 @@ end if add .cex,eax mov eax,.dey add .cey,eax +end if ;; else ;; paddd mm0,mm2 ;; paddd mm1,mm3 diff --git a/programs/demos/3DS/BUMP_TEX.INC b/programs/demos/3DS/BUMP_TEX.INC index 3d80ff5574..7cfefeae72 100644 --- a/programs/demos/3DS/BUMP_TEX.INC +++ b/programs/demos/3DS/BUMP_TEX.INC @@ -178,28 +178,20 @@ bump_tex_triangle_z: loop @b jmp .bt_dx12_done .bt_dx12_make: + movsx ebx,bx mov ax,.x2 sub ax,.x1 cwde - movsx ebx,bx shl eax,ROUND cdq idiv ebx ; mov .dx12,eax push eax - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax - if Ext>=SSE - sub esp,16 + sub esp,28 ; mov eax,256 cvtsi2ss xmm4,[i255d] cvtsi2ss xmm3,ebx ;rcps @@ -228,12 +220,54 @@ if Ext>=SSE shufps xmm1,xmm1,10110001b ;xmm1--> | dbx | dby | dex | dey | - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 + cvtps2pi mm1,xmm1 ;mm1,xmm1 movq .dey12,mm0 movq .dby12,mm1 ;------------- + ; pxor mm0,mm0 + ; pxor mm1,mm1 + ;/ pinsrw mm0,.z1,1 + ;/ pinsrw mm0,.x1,0 + ;/ pinsrw mm1,.z2,1 + ;/ pinsrw mm1,.x2,0 + mov ax,.z2 + sub ax,.z1 + cwde + ;/ movd mm1,eax + + ;/ punpcklwd mm0,mm4 + ;/ punpcklwd mm1,mm4 + + ; cvtpi2ps xmm1,mm1 + ; cvtpi2ps xmm2,mm0 + ; subps xmm1,xmm2 + + ;/ psubd mm1,mm0 + + movd mm2,[.t_x1] + movd mm3,[.t_x2] + + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + psubd mm3,mm2 + + ;/ cvtpi2ps xmm1,mm1 + cvtsi2ss xmm1,eax + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + + divps xmm1,xmm3 ; xmm1--> | xxx | dz | dty | dtx | + + shufps xmm1,xmm1,11100001b + ; xmm1--> | xxx | dz | dtx | dty | + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | + movhlps xmm1,xmm1 + cvtss2si eax,xmm1 ; eax --> 1 delta dword | dz | + movq .dty12,mm0 + mov .dz12,eax +;---- ; mov ax,.z2 ; sub ax,.z1 ; cwde @@ -243,6 +277,7 @@ if Ext>=SSE ; movd mm1,eax ; psllq mm1,32 ; movd mm1,ebx + ;; push ebx ;; push eax ;; movq mm1,[esp] @@ -282,6 +317,13 @@ if Ext>=SSE ; movq .dty12,mm0 ; movq .dz12,mm1 else + mov ax,.z2 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax mov ax,word[.b_x2] sub ax,word[.b_x1] @@ -319,7 +361,7 @@ else ; mov .dey12,eax push eax -end if +;end if mov ax,word[.t_x2] sub ax,word[.t_x1] @@ -338,7 +380,7 @@ end if idiv ebx ; mov .dty12,eax push eax - +end if .bt_dx12_done: mov bx,.y3 ; calc delta13 @@ -361,18 +403,9 @@ end if ; mov .dx13,eax push eax - mov ax,.z3 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz13,eax - push eax - if Ext>=SSE - sub esp,16 + sub esp,28 ; mov eax,255 cvtsi2ss xmm4,[i255d] cvtsi2ss xmm3,ebx ;rcps @@ -406,7 +439,42 @@ if Ext>=SSE cvtps2pi mm1,xmm1 movq .dey13,mm0 movq .dby13,mm1 +;---------- + mov ax,.z3 + sub ax,.z1 + cwde + + movd mm2,[.t_x1] + movd mm3,[.t_x3] + + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + psubd mm3,mm2 + + cvtsi2ss xmm1,eax + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + + divps xmm1,xmm3 ; xmm1--> | xxx | dz | dty | dtx | + + shufps xmm1,xmm1,11100001b + ; xmm1--> | xxx | dz | dtx | dty | + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | + movhlps xmm1,xmm1 + cvtss2si eax,xmm1 ; eax --> 1 delta dword | dz | + movq .dty13,mm0 + mov .dz13,eax else + mov ax,.z3 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + ; mov .dz13,eax + push eax + + mov ax,word[.b_x3] sub ax,word[.b_x1] cwde @@ -442,7 +510,6 @@ else idiv ebx ; mov .dey13,eax push eax -end if mov ax,word[.t_x3] sub ax,word[.t_x1] @@ -461,7 +528,7 @@ end if idiv ebx ; mov .dty13,eax push eax - +end if .bt_dx13_done: mov bx,.y3 ; calc delta23 @@ -484,18 +551,9 @@ end if ; mov .dx23,eax push eax - mov ax,.z3 - sub ax,.z2 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz23,eax - push eax - if Ext>=SSE - sub esp,16 + sub esp,28 ; mov eax,255 cvtsi2ss xmm4,[i255d] cvtsi2ss xmm3,ebx ;rcps @@ -529,7 +587,42 @@ if Ext>=SSE cvtps2pi mm1,xmm1 movq .dey23,mm0 movq .dby23,mm1 +;---------- + mov ax,.z3 + sub ax,.z2 + cwde + + movd mm2,[.t_x2] + movd mm3,[.t_x3] + + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + psubd mm3,mm2 + + cvtsi2ss xmm1,eax + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + + divps xmm1,xmm3 ; xmm1--> | xxx | dz | dty | dtx | + + shufps xmm1,xmm1,11100001b + ; xmm1--> | xxx | dz | dtx | dty | + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | + movhlps xmm1,xmm1 + cvtss2si eax,xmm1 ; eax --> 1 delta dword | dz | + movq .dty23,mm0 + mov .dz23,eax else + + mov ax,.z3 + sub ax,.z2 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + ; mov .dz23,eax + push eax + mov ax,word[.b_x3] sub ax,word[.b_x2] cwde @@ -565,7 +658,7 @@ else idiv ebx ; mov .dey23,eax push eax -end if + mov ax,word[.t_x3] sub ax,word[.t_x2] @@ -584,7 +677,7 @@ end if idiv ebx ; mov .dty23,eax push eax - +end if ; sub esp,40 .bt_dx23_done: sub esp,64 diff --git a/programs/demos/3DS/GRD3.INC b/programs/demos/3DS/GRD3.INC index ade6e1be47..1cbe2d00ac 100644 --- a/programs/demos/3DS/GRD3.INC +++ b/programs/demos/3DS/GRD3.INC @@ -22,27 +22,25 @@ gouraud_triangle: .x3 equ word[ebp-10] .y3 equ word[ebp-12] -.dc12r equ dword[ebp-16] -.dc12g equ dword[ebp-20] -.dc12b equ dword[ebp-24] -.dc13r equ dword[ebp-28] -.dc13g equ dword[ebp-32] -.dc13b equ dword[ebp-36] -.dc23r equ dword[ebp-40] -.dc23g equ dword[ebp-44] -.dc23b equ dword[ebp-48] - -.c1r equ dword[ebp-52] -.c1g equ dword[ebp-56] -.c1b equ dword[ebp-60] -.c2r equ dword[ebp-64] -.c2g equ dword[ebp-68] -.c2b equ dword[ebp-72] - -.dx12 equ dword[ebp-76] -.dx13 equ dword[ebp-80] -.dx23 equ dword[ebp-84] +.dx12 equ dword[ebp-16] +.dc12r equ dword[ebp-20] +.dc12g equ [ebp-24] +.dc12b equ dword[ebp-28] +.dx13 equ dword[ebp-32] +.dc13r equ dword[ebp-36] +.dc13g equ [ebp-40] +.dc13b equ dword[ebp-44] +.dx23 equ dword[ebp-48] +.dc23r equ dword[ebp-52] +.dc23g equ [ebp-56] +.dc23b equ dword[ebp-60] +.c1r equ dword[ebp-64] +.c1g equ [ebp-68] +.c1b equ dword[ebp-72] +.c2r equ dword[ebp-76] +.c2g equ [ebp-80] +.c2b equ dword[ebp-84] mov ebp,esp @@ -73,7 +71,7 @@ gouraud_triangle: push eax ;store triangle coordinates in user friendly variables push ebx push ecx - sub esp,72 ; set correctly value of esp + ; sub esp,72 ; set correctly value of esp mov edx,eax ; check only X triangle coordinate or edx,ebx @@ -94,10 +92,15 @@ gouraud_triangle: mov bx,.y2 ; calc deltas sub bx,.y1 jnz .gt_dx12_make - mov .dx12,0 - mov .dc12r,0 - mov .dc12g,0 - mov .dc12b,0 + xor edx,edx + mov ecx,4 + @@: + push edx + loop @b + ; mov .dx12,0 + ; mov .dc12r,0 + ; mov .dc12g,0 + ; mov .dc12b,0 jmp .gt_dx12_done .gt_dx12_make: @@ -108,7 +111,8 @@ gouraud_triangle: shl eax,ROUND cdq idiv ebx - mov .dx12,eax + ; mov .dx12,eax + push eax mov ax,word[.col2r] sub ax,word[.col1r] @@ -116,30 +120,39 @@ gouraud_triangle: shl eax,ROUND cdq idiv ebx - mov .dc12r,eax + ; mov .dc12r,eax + push eax + mov ax,word[.col2g] sub ax,word[.col1g] cwde shl eax,ROUND cdq idiv ebx - mov .dc12g,eax + ; mov .dc12g,eax + push eax + mov ax,word[.col2b] sub ax,word[.col1b] cwde shl eax,ROUND cdq idiv ebx - mov .dc12b,eax + ; mov .dc12b,eax + push eax .gt_dx12_done: - mov bx,.y3 sub bx,.y1 jnz .gt_dx13_make - mov .dx13,0 - mov .dc13r,0 - mov .dc13g,0 - mov .dc13b,0 + xor edx,edx + mov ecx,4 + @@: + push edx + loop @b + ; mov .dx13,0 + ; mov .dc13r,0 + ; mov .dc13g,0 + ; mov .dc13b,0 jmp .gt_dx13_done .gt_dx13_make: mov ax,.x3 @@ -149,7 +162,8 @@ gouraud_triangle: shl eax,ROUND cdq idiv ebx - mov .dx13,eax + ; mov .dx13,eax + push eax mov ax,word[.col3r] sub ax,word[.col1r] @@ -157,30 +171,38 @@ gouraud_triangle: shl eax,ROUND cdq idiv ebx - mov .dc13r,eax + ; mov .dc13r,eax + push eax + mov ax,word[.col3g] sub ax,word[.col1g] cwde shl eax,ROUND cdq idiv ebx - mov .dc13g,eax + ; mov .dc13g,eax + push eax mov ax,word[.col3b] sub ax,word[.col1b] cwde shl eax,ROUND cdq idiv ebx - mov .dc13b,eax + ; mov .dc13b,eax + push eax .gt_dx13_done: - mov bx,.y3 sub bx,.y2 jnz .gt_dx23_make - mov .dx23,0 - mov .dc23r,0 - mov .dc23g,0 - mov .dc23b,0 + xor edx,edx + mov ecx,4 + @@: + push edx + loop @b + ; mov .dx23,0 + ; mov .dc23r,0 + ; mov .dc23g,0 + ; mov .dc23b,0 jmp .gt_dx23_done .gt_dx23_make: mov ax,.x3 @@ -190,7 +212,8 @@ gouraud_triangle: shl eax,ROUND cdq idiv ebx - mov .dx23,eax + ; mov .dx23,eax + push eax mov ax,word[.col3r] sub ax,word[.col2r] @@ -198,23 +221,30 @@ gouraud_triangle: shl eax,ROUND cdq idiv ebx - mov .dc23r,eax + ; mov .dc23r,eax + push eax + + mov ax,word[.col3g] sub ax,word[.col2g] cwde shl eax,ROUND cdq idiv ebx - mov .dc23g,eax + ; mov .dc23g,eax + push eax + mov ax,word[.col3b] sub ax,word[.col2b] cwde shl eax,ROUND cdq idiv ebx - mov .dc23b,eax -.gt_dx23_done: + ; mov .dc23b,eax + push eax +.gt_dx23_done: + sub esp,24 movsx eax,.x1 shl eax,ROUND mov ebx,eax @@ -240,6 +270,8 @@ gouraud_triangle: push edi push ebp + sar ebx,ROUND + push bx mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors sar edx,ROUND push dx @@ -249,6 +281,9 @@ gouraud_triangle: mov edx,.c2b sar edx,ROUND push dx + + sar eax,ROUND + push ax mov edx,.c1r sar edx,ROUND push dx @@ -259,10 +294,6 @@ gouraud_triangle: sar edx,ROUND push dx push cx - sar ebx,ROUND - push bx - sar eax,ROUND - push ax call gouraud_line pop ebp @@ -271,16 +302,28 @@ gouraud_triangle: pop ebx pop eax +if Ext >= MMX + movq mm0,.c1g + paddd mm0,.dc13g + movq .c1g,mm0 +else mov edx,.dc13r add .c1r,edx mov edx,.dc13g add .c1g,edx +end if mov edx,.dc13b add .c1b,edx +if Ext >= MMX + movq mm0,.c2g + paddd mm0,.dc12g + movq .c2g,mm0 +else mov edx,.dc12r add .c2r,edx mov edx,.dc12g add .c2g,edx +end if mov edx,.dc12b add .c2b,edx @@ -313,6 +356,8 @@ gouraud_triangle: push edi push ebp + sar ebx,ROUND + push bx mov edx,.c2r sar edx,ROUND push dx @@ -322,6 +367,9 @@ gouraud_triangle: mov edx,.c2b sar edx,ROUND push dx + + sar eax,ROUND + push ax mov edx,.c1r sar edx,ROUND push dx @@ -332,10 +380,6 @@ gouraud_triangle: sar edx,ROUND push dx push cx - sar ebx,ROUND - push bx - sar eax,ROUND - push ax call gouraud_line pop ebp @@ -344,16 +388,28 @@ gouraud_triangle: pop ebx pop eax +if Ext >= MMX + movq mm0,.c1g + paddd mm0,.dc13g + movq .c1g,mm0 +else mov edx,.dc13r add .c1r,edx mov edx,.dc13g add .c1g,edx +end if mov edx,.dc13b add .c1b,edx +if Ext >= MMX + movq mm0,.c2g + paddd mm0,.dc23g + movq .c2g,mm0 +else mov edx,.dc23r add .c2r,edx mov edx,.dc23g add .c2g,edx +end if mov edx,.dc23b add .c2b,edx @@ -370,15 +426,15 @@ ret 18 gouraud_line: ;-------------in - edi - pointer to screen buffer ;----------------- stack - another parameters -.x1 equ word [ebp+4] -.x2 equ word [ebp+6] -.y equ word [ebp+8] -.col1b equ ebp+10 -.col1g equ ebp+12 -.col1r equ ebp+14 -.col2b equ ebp+16 -.col2g equ ebp+18 -.col2r equ ebp+20 +.y equ word [ebp+4] +.col1b equ ebp+6 +.col1g equ ebp+8 +.col1r equ ebp+10 +.x1 equ [ebp+12] +.col2b equ ebp+14 +.col2g equ ebp+16 +.col2r equ ebp+18 +.x2 equ [ebp+20] .dc_r equ dword[ebp-4] .dc_g equ dword[ebp-8] .dc_b equ dword[ebp-12] @@ -395,14 +451,19 @@ gouraud_line: je .gl_quit jl .gl_ok - xchg ax,.x2 - mov .x1,ax - mov eax,dword[.col1b] - xchg eax,dword[.col2b] - mov dword[.col1b],eax - mov ax,word[.col1r] - xchg ax,word[.col2r] - mov word[.col1r],ax +if Ext >= MMX + movq mm0,[.col1b] + movq mm1,[.col2b] + movq [.col1b],mm1 + movq [.col2b],mm0 +else + mov eax,[.col1b] + xchg eax,[.col2b] + mov [.col1b],eax + mov eax,[.col1r] + xchg eax,[.col2r] + mov [.col1r],eax +end if .gl_ok: ; cmp .x1,SIZE_X-1 ;check ; jg .gl_quit @@ -420,7 +481,7 @@ gouraud_line: movsx ecx,.y mov eax,SIZE_X*3 mul ecx - movsx ebx,.x1 + movsx ebx,word .x1 lea ecx,[ebx*2+eax] add edi,ecx add edi,ebx @@ -478,4 +539,152 @@ gouraud_line: .gl_quit: ; add esp,12 mov esp,ebp -ret 18 \ No newline at end of file +ret 18 +if 0 +gouraud_line_SSE: ; new +;-------------in - edi - pointer to screen buffer +;----------------- stack - another parameters +.y equ word [ebp+4] +.col1b equ ebp+6 +.col1g equ ebp+8 +.col1r equ ebp+10 +.x1 equ [ebp+12] +.col2b equ ebp+14 +.col2g equ ebp+16 +.col2r equ ebp+18 +.x2 equ [ebp+20] +.dc_r equ dword[ebp-4] +.dc_g equ dword[ebp-8] +.dc_b equ dword[ebp-12] +.lenght equ [ebp-16] +.factor equ [ebp-24] ;new + mov ebp,esp + + mov ax,.y + or ax,ax + jl .gl_quit + cmp ax,SIZE_Y-1 + jg .gl_quit + + mov ax,.x1 + cmp ax,.x2 + je .gl_quit + jl .gl_ok + +if Ext >= MMX + movq mm0,[.col1b] + movq mm1,[.col2b] + movq [.col1b],mm1 + movq [.col2b],mm0 +else + mov eax,[.col1b] + xchg eax,[.col2b] + mov [.col1b],eax + mov eax,[.col1r] + xchg eax,[.col2r] + mov [.col1r],eax +end if +.gl_ok: + ; cmp .x1,SIZE_X-1 ;check + ; jg .gl_quit + ; cmp .x2,SIZE_X-1 + ; jl @f + ; mov .x2,SIZE_X-1 + ; @@: + ; cmp .x1,0 + ; jg @f + ; mov .x1,0 + ; @@: + ; cmp .x2,0 + ; jl .gl_quit + + movsx ecx,.y + mov eax,SIZE_X*3 + mul ecx + movsx ebx,word .x1 + lea ecx,[ebx*2+eax] + add edi,ecx + add edi,ebx + + mov ax,word[.col2r] + sub ax,word[.col1r] + cwde + shl eax,ROUND + cdq + mov cx,.x2 + sub cx,.x1 + movsx ecx,cx + idiv ecx + ;mov .dc_r,eax ;first delta + push eax + + mov ax,word[.col2g] + sub ax,word[.col1g] + cwde + shl eax,ROUND + cdq + idiv ecx + ;mov .dc_g,eax + push eax + + mov ax,word[.col2b] + sub ax,word[.col1b] + cwde + shl eax,ROUND + cdq + idiv ecx + ; mov .dc_b,eax + push eax + + movsx ebx,word[.col1r] + shl ebx,ROUND + movsx edx,word[.col1g] + shl edx,ROUND + movsx esi,word[.col1b] + shl esi,ROUND + + push ecx ; store line lenght + movd mm3,.dc_r + psrlq mm3,16 ; load dr to lowest word of mm3 + pxor mm2,mm2 ; clear mm2 + movd mm4,.dc_g + punpcklwd mm3,mm3 ; unpack dr to lower 2 words in in mm3 + psrlq mm4,16 ; load dg to lowest word of mm4 + movd mm5,.dc_b + psrlq mm5,16 ; load db to lowest word of mm5 + punpcklwd mm4,mm4 ; unpack dg to lower 2 words in in mm3 + lea ecx,[factor] + punpckldq mm3,mm3 + punpcklwd mm5,mm5 ; unpack db to lower 2 words in in mm5 + movq mm6,[.col1b] + xor eax,eax + pinsrw mm6,eax,3 ; clear the highest word in mm6 + mov eax,010000h + punpckldq mm4,mm4 ; unpack dg to 4 words in mm4 + mov [ecx],eax + mov eax,030002h + punpckldq mm5,mm5 ; unpack db to 4 words in mm5 + movq mm7,mm6 ; load r1r1,g1g1,b1b1 to the first three + ; words of mm7 + pxor mm1,mm1 ; clear mm1 + + +.gl_draw: + mov eax,ebx + sar eax,ROUND + stosb + mov eax,edx + sar eax,ROUND + stosb + mov eax,esi + sar eax,ROUND + stosb + add ebx,.dc_r + add edx,.dc_g + add esi,.dc_b + loop .gl_draw +.gl_quit: + ; add esp,12 + mov esp,ebp +ret 18 +end if diff --git a/programs/demos/3DS/GRD_TEX.INC b/programs/demos/3DS/GRD_TEX.INC index 191c2016b0..4577110c32 100644 --- a/programs/demos/3DS/GRD_TEX.INC +++ b/programs/demos/3DS/GRD_TEX.INC @@ -65,46 +65,44 @@ tex_plus_grd_triangle: .dx12 equ dword[ebp-28] ;dd ? .tex_dx12 equ dword[ebp-32] ;dd ? - .tex_dy12 equ dword[ebp-36] ;dd ? + .tex_dy12 equ [ebp-36] ;dd ? .dz12 equ dword[ebp-40] ;dd ? - .dc12r equ dword[ebp-44] ;dd ? + .dc12r equ [ebp-44] ;dd ? .dc12g equ dword[ebp-48] ;dd ? - .dc12b equ dword[ebp-52] ;dd ? + .dc12b equ [ebp-52] ;dd ? .dx23 equ dword[ebp-56] ;dd ? .tex_dx23 equ dword[ebp-60] ;dd ? - .tex_dy23 equ dword[ebp-64] ;dd ? + .tex_dy23 equ [ebp-64] ;dd ? .dz23 equ dword[ebp-68] ;dd ? - .dc23r equ dword[ebp-72] ;dd ? + .dc23r equ [ebp-72] ;dd ? .dc23g equ dword[ebp-76] ;dd ? - .dc23b equ dword[ebp-80] ;dword[ebp-8]dd ? + .dc23b equ [ebp-80] ;dword[ebp-8]dd ? .dx13 equ dword[ebp-84] ;dd ? .tex_dx13 equ dword[ebp-88] ;dd ? - .tex_dy13 equ dword[ebp-92] ;dd ? + .tex_dy13 equ [ebp-92] ;dd ? .dz13 equ dword[ebp-96] ;dd ? - .dc13r equ dword[ebp-100] ;dd ? + .dc13r equ [ebp-100] ;dd ? .dc13g equ dword[ebp-104] ;dd ? - .dc13b equ dword[ebp-108] ;dd ? + .dc13b equ [ebp-108] ;dd ? - .zz1 equ dword[ebp-112] ;dw ? - .zz2 equ dword[ebp-116] ;dw ? - .cur1r equ dword[ebp-120] ;dw ? - .cur1g equ dword[ebp-124] ;dw ? - .cur1b equ dword[ebp-128] ;dw ? - .cur2r equ dword[ebp-132] ;dw ? - .cur2g equ dword[ebp-136] ;dw ? - .cur2b equ dword[ebp-140] ;dw ? - .scan_x1 equ dword[ebp-144] ;dd ? - .scan_x2 equ dword[ebp-148] ;dd ? - .scan_y1 equ dword[ebp-152] ;dd ? - .scan_y2 equ dword[ebp-156] ;dd ? + .scan_x1 equ dword[ebp-112] ;dd ? + .scan_y1 equ [ebp-116] ;dd ? + .zz1 equ dword[ebp-120] ;dw ? + .cur1r equ [ebp-124] ;dw ? + .cur1g equ dword[ebp-128] ;dw ? + .cur1b equ [ebp-132] ;dw ? + + .scan_x2 equ dword[ebp-136] ;dd ? + .scan_y2 equ [ebp-140] ;dd ? + .zz2 equ dword[ebp-144] ;dw ? + .cur2r equ [ebp-148] ;dw ? + .cur2g equ dword[ebp-152] ;dw ? + .cur2b equ [ebp-156] ;dw ? mov ebp,esp -if Ext>=MMX - emms -end if ; mov .tex_ptr,edx ; mov .z_ptr,esi @@ -468,18 +466,18 @@ end if push .zz2 push .scan_x2 - push .scan_y2 - push .cur2r + push dword .scan_y2 + push dword .cur2r push .cur2g - push .cur2b + push dword .cur2b push .zz1 push .scan_x1 - push .scan_y1 - push .cur1r + push dword .scan_y1 + push dword .cur1r push .cur1g - push .cur1b + push dword .cur1b sar eax,ROUND sar ebx,ROUND @@ -487,18 +485,39 @@ end if ; pop ebp ebx eax popad + +if Ext >= MMX + movq mm0,.cur1b + movq mm1,.cur1r + movq mm2,.scan_y1 + movq mm3,.cur2b + movq mm4,.cur2r + movq mm5,.scan_y2 + paddd mm0,.dc13b + paddd mm1,.dc13r + paddd mm2,.tex_dy13 + paddd mm3,.dc12b + paddd mm4,.dc12r + paddd mm5,.tex_dy12 + movq .cur1b,mm0 + movq .cur1r,mm1 + movq .scan_y1,mm2 + movq .cur2b,mm3 + movq .cur2r,mm4 + movq .scan_y2,mm5 +else mov edx,.dc13b add .cur1b,edx mov esi,.dc13g add .cur1g,esi mov edi,.dc13r add .cur1r,edi + mov edx,.dz13 + add .zz1,edx mov edx,.tex_dx13 add .scan_x1,edx mov esi,.tex_dy13 add .scan_y1,esi - mov edx,.dz13 - add .zz1,edx mov edi,.dc12b add .cur2b,edi @@ -512,6 +531,7 @@ end if add .scan_y2,esi mov edx,.dz12 add .zz2,edx +end if add eax,.dx13 add ebx,.dx12 inc cx @@ -563,24 +583,45 @@ end if push .zz2 push .scan_x2 - push .scan_y2 - push .cur2r + push dword .scan_y2 + push dword .cur2r push .cur2g - push .cur2b + push dword .cur2b push .zz1 push .scan_x1 - push .scan_y1 - push .cur1r + push dword .scan_y1 + push dword .cur1r push .cur1g - push .cur1b + push dword .cur1b sar eax,ROUND sar ebx,ROUND call horizontal_tex_grd_line popad + +if Ext >= MMX + movq mm0,.cur1b + movq mm1,.cur1r + movq mm2,.scan_y1 + movq mm3,.cur2b + movq mm4,.cur2r + movq mm5,.scan_y2 + paddd mm0,.dc13b + paddd mm1,.dc13r + paddd mm2,.tex_dy13 + paddd mm3,.dc23b + paddd mm4,.dc23r + paddd mm5,.tex_dy23 + movq .cur1b,mm0 + movq .cur1r,mm1 + movq .scan_y1,mm2 + movq .cur2b,mm3 + movq .cur2r,mm4 + movq .scan_y2,mm5 +else mov edx,.dc13b add .cur1b,edx mov esi,.dc13g @@ -606,6 +647,7 @@ end if add .scan_y2,esi mov edx,.dz23 add .zz2,edx +end if add eax,.dx13 add ebx,.dx23 inc cx diff --git a/programs/demos/3DS/TEX_CAT.INC b/programs/demos/3DS/TEX_CAT.INC index 68a3ee87d1..8fb9db5df9 100644 --- a/programs/demos/3DS/TEX_CAT.INC +++ b/programs/demos/3DS/TEX_CAT.INC @@ -59,9 +59,6 @@ tex_triangle_z: .cz1 equ dword[ebp-88] .cz2 equ dword[ebp-92] -if Ext >= MMX - emms -end if mov ebp,esp push esi ; store memory pointers push edx diff --git a/programs/demos/3DS/VIEW3DS.ASM b/programs/demos/3DS/VIEW3DS.ASM index 7a4751c306..f0c331d022 100644 --- a/programs/demos/3DS/VIEW3DS.ASM +++ b/programs/demos/3DS/VIEW3DS.ASM @@ -1,6 +1,6 @@ -; application : View3ds ver. 0.053 - tiny .3ds files viewer. -; compiler : FASM 1.65.13 +; application : View3ds ver. 0.054 - tiny .3ds files viewer. +; compiler : FASM 1.67.11 ; system : KolibriOS/MenuetOS ; author : Macgub aka Maciej Guba ; email : macgub3@wp.pl @@ -215,6 +215,13 @@ still: .next_m: cmp ah,18 jne .next_m2 + if USE_LFN + mov [re_alloc_flag],1 ; reallocate memory + mov [triangles_count_var],1000 + mov [points_count_var],1000 + call alloc_mem_for_tp + mov [re_alloc_flag],0 + end if mov bl,[generator_flag] or bl,bl jz .next_m2 @@ -244,8 +251,11 @@ still: @@: mov edi,bumpmap call calc_bumpmap - .next_m3: - + .next_m3: + cmp ah,21 ; re map bumps, texture coordinates + jne @f + call calc_bumpmap_coords + @@: jmp noclose @@ -595,14 +605,16 @@ calc_bumpmap_coords: ; map texture, bump ; mov ebp,esp ; sub esp,4 - fninit ; spherical mapping around y axle + fninit fldpi fadd st,st -; fstp .Pi2 mov esi,[points_ptr] mov edi,tex_points movzx ecx,[points_count_var] inc ecx +; cmp [map_tex_flag],1 +; jne .cylindric + ; spherical mapping around y axle @@: fld dword[esi] ; x coord @@ -635,8 +647,27 @@ calc_bumpmap_coords: ; map texture, bump add edi,4 loop @b ffree st0 +; jmp .end_map +; .cylindric: +; fld dword[esi] ; around y axle +; fld dword[esi+8] +; fpatan +; fdiv st0,st1 +; fimul [tex_x_div2] +; fiadd [tex_x_div2] +; fistp word[edi] -; mov esp,ebp +; fld dword[esi+4] +; fimul [tex_y_div2] +; fiadd [tex_y_div2] +; fistp word[edi+2] + +; add esi,12 +; add edi,4 +; loop .cylindric +; ffree st0 +;; mov esp,ebp +; .end_map: ret @@ -2399,11 +2430,18 @@ ret if USE_LFN alloc_mem_for_tp: mov eax, 68 + cmp [re_alloc_flag],1 + jz @f mov ebx, 12 + jmp .alloc + @@: + mov ebx,20 + .alloc: movzx ecx, [triangles_count_var] inc ecx lea ecx, [ecx*3] add ecx, ecx + mov edx,[triangles_ptr] int 0x40 ; -> allocate memory to triangles mov [triangles_ptr], eax ; -> eax = pointer to allocated mem @@ -2419,6 +2457,7 @@ alloc_mem_for_tp: movzx ecx, [triangles_count_var] lea ecx, [3+ecx*3] shl ecx, 2 + mov edx,[triangles_normals_ptr] int 0x40 ; -> allocate memory for triangles normals mov [triangles_normals_ptr], eax ; -> eax = pointer to allocated mem @@ -2426,22 +2465,26 @@ alloc_mem_for_tp: movzx ecx, [points_count_var] lea ecx,[3+ecx*3] shl ecx, 2 + mov edx,[points_normals_ptr] int 0x40 mov [points_normals_ptr], eax mov eax, 68 - mov ebx, 12 + ; mov ebx, 12 movzx ecx, [points_count_var] lea ecx,[3+ecx*3] shl ecx, 2 + mov edx,[points_normals_rot_ptr] int 0x40 mov [points_normals_rot_ptr], eax mov eax, 68 + mov edx,[points_ptr] int 0x40 mov [points_ptr], eax mov eax, 68 + mov edx,[points_rotated_ptr] int 0x40 mov [points_rotated_ptr], eax @@ -2449,6 +2492,7 @@ alloc_mem_for_tp: movzx ecx, [points_count_var] inc ecx shl ecx, 3 + mov edx,[points_translated_ptr] int 0x40 mov [points_translated_ptr], eax ret @@ -2592,7 +2636,7 @@ ret add bx,110 mov cx,[size_y] add cx,30 - mov edx,0x02000000 ; color of work area RRGGBB,8->color gl + mov edx,0x14000000 ; color of work area RRGGBB,8->color gl mov esi,0x805080d0 ; color of grab bar RRGGBB,8->color gl mov edi,0x005080d0 ; color of frames RRGGBB int 0x40 @@ -2606,15 +2650,15 @@ ret int 0x40 ; CLOSE BUTTON - mov eax,8 ; function 8 : define and draw button - movzx ebx,[size_x] - shl ebx,16 - add ebx, 91 shl 16 + 12 -; mov ebx,(SIZE_X+80+30-19)*65536+12 ; [x start] *65536 + [x size] - mov ecx,5*65536+12 ; [y start] *65536 + [y size] - mov edx,1 ; button id - mov esi,0x6688dd ; button color RRGGBB - int 0x40 +; mov eax,8 ; function 8 : define and draw button +; movzx ebx,[size_x] +; shl ebx,16 +; add ebx, 91 shl 16 + 12 +;; mov ebx,(SIZE_X+80+30-19)*65536+12 ; [x start] *65536 + [x size] +; mov ecx,5*65536+12 ; [y start] *65536 + [y size] +; mov edx,1 ; button id +; mov esi,0x6688dd ; button color RRGGBB +; int 0x40 call buttons ; more buttons diff --git a/programs/demos/3DS/data.inc b/programs/demos/3DS/data.inc index e6352b799e..0c753d684b 100644 --- a/programs/demos/3DS/data.inc +++ b/programs/demos/3DS/data.inc @@ -15,6 +15,7 @@ size_x dw SIZE_X size_y dw SIZE_Y + re_alloc_flag db 0 angle_counter dw 0 piD180 dd 0.017453292519943295769236907684886 piD128 dd 0.024544 @@ -146,6 +147,11 @@ bumps_flag db 0 bumps_deep_flag db 3 dd bumps_d_f + db 21 + db 're-map tex' + db 255 +map_tex_flag db ? ;1 + dd ? ;bumps_d_f ; db 21 ; db 'light No. ' ; db 3 @@ -262,7 +268,7 @@ base_vector: if Ext=SSE db ' (SSE)' end if - db ' 0.053' + db ' 0.054' labellen: STRdata db '-1 ' @@ -277,7 +283,7 @@ if USE_LFN fsize dd 0 ;180000 ; sizeof(workarea) fptr dd 0 ;workarea file_name: - db '/rd/1/3d/teapot.3ds',0 + db '/rd/1/3d/house.3ds',0 else @@ -288,7 +294,7 @@ else dd workarea dd hash_table file_name: - db '/rd/1/teapot.3ds',0 + db '/rd/1/house.3ds',0 end if I_END: @@ -390,7 +396,7 @@ align 8 rand_seed dw ? align 8 buffer dq ? - err_ dd ? + err dd ? drr dd ? xx dd ? yy dd ? diff --git a/programs/demos/3DS/readme.txt b/programs/demos/3DS/readme.txt index 09818815f8..6c93385b82 100644 --- a/programs/demos/3DS/readme.txt +++ b/programs/demos/3DS/readme.txt @@ -1,8 +1,11 @@ -View3ds 0.053 - tiny viewer to .3ds files. +View3ds 0.054 - tiny viewer to .3ds files. What's new? -1. Optimizations. - +1. Skinned window by Leency. +2. Optimizations. +3. Re map texture, bumps option - allow spherical mapping around each axle (X,Y,Z). +4. Problem with too small memory to generate object fixed. (Problem ocurred with + house.3ds object and others objects contains less than 1000 faces and points). Buttons description: 1. rotary: choosing rotary axle: x, y, x+y. @@ -14,8 +17,8 @@ Buttons description: mapping). 3. speed: idle, full. 4,5. zoom in, out: no comment. -6. catmull: on( use z buffer ( z coordinate interpolation), off( depth sorting, painters alghoritm). - txgrd and 2tex models only with catmull = on. +6. catmull: on( use z buffer ( z coordinate interpolation), off( depth sorting, painters + alghoritm).Txgrd and 2tex models only with catmull = on. 7. culling: backface culling on/ off. 8. rand. light: Randomize 3 unlinear lights( so called Phong's illumination). 9. Blur: blur N times; N=0,1,2,3,4,5 @@ -26,6 +29,9 @@ Buttons description: 14. move: changes meaning x,y,z +/- buttons -> obj: moving object, camr: moving camera. 15. generate: Generates some objects: node, Thorn Crown, heart... 16. bumps: random, according to texture. -15. bumps deep -> create bumps deeper or lighter. +17. bumps deep -> create bumps deeper or lighter. +18. re-map tex -> re-map texture and bump map coordinates, to change spherical mapping + around axle use 'xchg' and 'mirror' buttons, then press 're-map tex' button. - Macgub X 2009 \ No newline at end of file + Macgub december 2009 + Last edited Jan 2010 \ No newline at end of file