diff --git a/programs/demos/view3ds/3dmath.inc b/programs/demos/view3ds/3dmath.inc index af467e4ebc..b144f91dd1 100644 --- a/programs/demos/view3ds/3dmath.inc +++ b/programs/demos/view3ds/3dmath.inc @@ -245,7 +245,7 @@ ret ;---------------------- out : none ------------------------ make_vector_r: - if Ext < SSE2 + if Ext < SSE fninit fld dword[edi] ;edi+x3d fsub dword[esi] ;esi+x3d @@ -297,6 +297,7 @@ cross_product: fstp dword [ebx+vec_z] ret cross_aligned: +; params as above cross_p movaps xmm0,[esi] movaps xmm1,[esi] movaps xmm2,[edi] @@ -607,6 +608,19 @@ ret ;---------------------------------------------- ; esi - pointer to 3x3 matrix add_scale_to_matrix: + if Ext>SSE + movss xmm0,[rsscale] + shufps xmm0,xmm0,0 + movups xmm1,[esi] + movups xmm2,[esi+16] + movss xmm3,[esi+32] + mulps xmm1,xmm0 + mulps xmm2,xmm0 + mulss xmm3,xmm0 + movups [esi],xmm1 + movups [esi+16],xmm2 + movss [esi+32],xmm3 + else fninit fld [rsscale] fld dword[esi] ;----- @@ -639,14 +653,19 @@ add_scale_to_matrix: fld dword[esi+32] fmulp st1,st fstp dword[esi+32] ;------ - + end if ret ;in esi - offset to 3d points (point as 3 dwords float) ; edi - offset to 2d points ( as 3 words integer) ; ecx - number of points translate_points: ; just convert into integer; z coord still needed + if Ext < SSE fninit + else + ; movaps xmm1,[vect_x] + end if + .again: if 0 fld dword[esi+8] @@ -676,7 +695,18 @@ translate_points: ; just convert into integer; z coord still needed fiadd [vect_y] fistp word[edi+2] end if - ; movups xmm0,[esi] + if Ext>=SSE + movups xmm0,[esi] + cvtps2dq xmm0,xmm0 + packssdw xmm0,xmm0 + paddw xmm0,[vect_x] + movd [edi],xmm0 + ; psrldq xmm0,4 + ; movd eax,xmm0 + pextrw eax,xmm0,6 + mov [edi+4],ax + else + ; cvtps2dq xmm0,xmm0 ; packsdw xmm0,xmm0 ; movq [edi] @@ -688,9 +718,12 @@ translate_points: ; just convert into integer; z coord still needed fistp word[edi+2] fld dword[esi+8] fistp word[edi+4] + end if + add esi,12 add edi,6 - dec ecx - jnz .again + ; dec ecx + ; jnz .again + loop .again ret diff --git a/programs/demos/view3ds/3r_phg.inc b/programs/demos/view3ds/3r_phg.inc index b0a5f20908..a3ed7c554e 100644 --- a/programs/demos/view3ds/3r_phg.inc +++ b/programs/demos/view3ds/3r_phg.inc @@ -341,7 +341,6 @@ end if pop ebp ret -align 16 real_phong_line_z: ; in: ; xmm0 - normal vector 1 @@ -456,7 +455,7 @@ real_phong_line_z: sub ecx,.lx1 movaps xmm0,.n1 movss xmm2,.z1 -align 16 + .ddraw: movss xmm7,xmm2 cmpnltss xmm7,dword[esi] diff --git a/programs/demos/view3ds/3ray_shd.inc b/programs/demos/view3ds/3ray_shd.inc index f769141f2a..7da685927e 100644 --- a/programs/demos/view3ds/3ray_shd.inc +++ b/programs/demos/view3ds/3ray_shd.inc @@ -353,7 +353,7 @@ end if ret -align 16 + ray_shd_l: ; in: ; xmm0 - normal vector 1 @@ -392,7 +392,7 @@ ray_shd_l: .dn equ [ebp-96] .dz equ [ebp-100] .y equ [ebp-104] -; .cur_tri equ [ebp-108] + .startx equ [ebp-108] .cnv equ [ebp-128] .Rlen equ [ebp-128-16] .r1 equ [ebp-128-32] @@ -431,6 +431,7 @@ ray_shd_l: movaps .n1,xmm0 movaps .n2,xmm1 mov .lx1,eax +; mov .startx,eax mov .lx2,ebx movlps .z1,xmm3 @@ -546,9 +547,15 @@ end if mov edi,[triangles_ptr] xor ecx,ecx .nx_tri: ; next triangle + ; mov eax,.lx1 + ; cmp eax,.startx + ; je @f ; prevent artifact borders on tri + ; cmp eax,.lx2 ; NOT work as I want !! +; je @f cmp ecx,.cur_tri ; prevent self shadowing je .skipp + @@: if 0 mov edi,ecx imul edi,[i12] diff --git a/programs/demos/view3ds/3stencil.inc b/programs/demos/view3ds/3stencil.inc index 9b2576de40..9207856db9 100644 --- a/programs/demos/view3ds/3stencil.inc +++ b/programs/demos/view3ds/3stencil.inc @@ -16,22 +16,22 @@ stencil_tri: .y3 equ [ebp-12] .dx12 equ dword[ebp-20] -.dx13 equ dword[ebp-24] -.dx23 equ dword[ebp-28] -.dz12 equ dword[ebp-32] -.dz13 equ dword[ebp-36] +.dz12 equ dword[ebp-24] +.dx13 equ dword[ebp-28] +.dz13 equ dword[ebp-32] +.dx23 equ dword[ebp-36] .dz23 equ dword[ebp-40] .zz2 equ [ebp-44] .zz1 equ [ebp-48] .z3 equ [ebp-56] .z2 equ [ebp-60] .z1 equ [ebp-64] -.s_buff equ [ebp-68] +;.s_buff equ [ebp-68] push ebp mov ebp,esp - sub esp,128 - and ebp,0xfffffff0 + ; sub esp,128 + ; and ebp,0xfffffff0 .sort2: cmp ax,bx jle .sort1 @@ -44,19 +44,24 @@ stencil_tri: shufps xmm0,xmm0,11011000b jmp .sort2 .sort3: - mov .y1,eax ; store triangle coordinates in user friendly variables - mov .y2,ebx - mov .y3,ecx + ; mov .y1,eax ; store triangle coordinates in user friendly variables + ; mov .y2,ebx + ; mov .y3,ecx + push eax + push ebx + push ecx + sub esp,60 + ; mov edx,100.11 ; movd xmm0,edx ; shufps xmm0,xmm0,11100000b - movaps .z1,xmm0 + movups .z1,xmm0 ; mov dword .z1,edx ; mov .z2,edx ; mov .z3,edx - mov .s_buff,esi + ; mov .s_buff,esi mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that and edx,ebx ; if *all* of them are negative a sign flag is raised @@ -157,7 +162,7 @@ stencil_tri: ; mov edx,0.11 ; movd xmm0,edx ; shufps xmm0,xmm0,11100000b - mov esi,.s_buff +; mov esi,.s_buff call stencil_line @@ -192,7 +197,7 @@ stencil_tri: sar ebx,ROUND2 sar eax,ROUND2 movlps xmm0,.zz1 - mov esi,.s_buff +; mov esi,.s_buff call stencil_line @@ -215,7 +220,7 @@ stencil_tri: jl .loop2 .loop2_end: - add esp,128 + mov esp,ebp pop ebp ret @@ -312,17 +317,19 @@ stencil_line: sub ecx,.x1 movss xmm2,.z1 ; cz .ccalc: - movss xmm1,xmm2 - cmpltss xmm1,dword[esi] - movd eax,xmm1 - cmp eax,-1 - jnz @f + ; movss xmm1,xmm2 + ; cmpltss xmm1,dword[esi] + ; movd eax,xmm1 + ; cmp eax,-1 + comiss xmm2,[esi] + ja @f movss dword[esi],xmm2 @@: add esi,4 addss xmm2,.dz - sub ecx,1 - jnz .ccalc +; sub ecx,1 +; jnz .ccalc + loop .ccalc .l_quit: mov esp,ebp pop ebp diff --git a/programs/demos/view3ds/a_procs.inc b/programs/demos/view3ds/a_procs.inc index 55a96e41a0..05799e390e 100644 --- a/programs/demos/view3ds/a_procs.inc +++ b/programs/demos/view3ds/a_procs.inc @@ -1,4 +1,63 @@ +;============================================================= +remove_dead_tri: +; push ebp +; mov ebp,esp + mov edi,-1 + movd xmm7,edi + pshufd xmm7,xmm7,0 + + mov esi,[triangles_ptr] + mov ecx,[triangles_count_var] + .chck: +; jecxz .cop + mov eax,[esi] +; mov ebx,[esi+4] +; mov edx,[esi+8] + cmp eax,[esi+4] + je .tri_fail + cmp eax,[esi+8] + je .tri_fail + mov eax,[esi+4] + cmp eax,[esi+8] + je .tri_fail + +; cmp ebx,[esi] +; je .tri_fail +; cmp ebx,[esi+8] +; je .tri_fail +; cmp edx,[esi] +; je .tri_fail +; cmp edx,[esi+4] +; je .tri_fail + add esi,12 + loop .chck + jmp .cop + .tri_fail: + movq [esi],xmm7 + movd [esi+8],xmm7 + add esi,12 + loop .chck + .cop: + mov esi,[triangles_ptr] + mov edi,[triangles_ptr] + mov ecx,[triangles_count_var] + xor edx,edx + .cp: + cmp [esi],dword -1 + je @f + movdqu xmm0,[esi] + movq [edi],xmm0 + movhlps xmm0,xmm0 + movd [edi+8],xmm0 + add edi,12 + inc edx + @@: + add esi,12 + loop .cp + mov [triangles_count_var],edx +ret +;======================================================== if Ext > SSE2 ;-------------------------------------------------------------------- diff --git a/programs/demos/view3ds/bump_tex.inc b/programs/demos/view3ds/bump_tex.inc index 3b325d04e3..12c696b6a7 100644 --- a/programs/demos/view3ds/bump_tex.inc +++ b/programs/demos/view3ds/bump_tex.inc @@ -25,192 +25,171 @@ bump_tex_triangle_z: ;---------------------- texture coordinates----- ;-- Z-buffer - filled with coordinates as dword -------- ;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- -.b_x1 equ ebp+4 ; procedure don't save registers !!! -.b_y1 equ ebp+6 ; each coordinate as word -.b_x2 equ ebp+8 -.b_y2 equ ebp+10 ; b - bump map coords -.b_x3 equ ebp+12 ; e - env map coords -.b_y3 equ ebp+14 -.e_x1 equ ebp+16 -.e_y1 equ ebp+18 -.e_x2 equ ebp+20 -.e_y2 equ ebp+22 -.e_x3 equ ebp+24 -.e_y3 equ ebp+26 -.z1 equ word[ebp+28] -.z2 equ word[ebp+30] -.z3 equ word[ebp+32] -.z_buff equ dword[ebp+34] ; pointer to Z-buffer -.tex_ptr equ dword[ebp+38] ; ptr to texture -.t_x1 equ ebp+42 ; texture coords -.t_y1 equ ebp+44 -.t_x2 equ ebp+46 -.t_y2 equ ebp+48 -.t_x3 equ ebp+50 -.t_y3 equ ebp+52 -.t_bmap equ dword[ebp-4] ; pointer to bump map -.t_emap equ dword[ebp-8] ; pointer to env map -.x1 equ word[ebp-10] -.y1 equ word[ebp-12] -.x2 equ word[ebp-14] -.y2 equ word[ebp-16] -.x3 equ word[ebp-18] -.y3 equ word[ebp-20] +.t_y1 equ ebp+4 ; procedure don't save registers !!! +.t_x1 equ ebp+6 ; each coordinate as word +.e_y1 equ ebp+8 ; texture coords +.e_x1 equ ebp+10 +.b_y1 equ ebp+12 +.b_x1 equ ebp+14 -if 0 ;Ext <= SSE2 -.dx12 equ dword[edi-4] -.dz12 equ [edi-8] -.dbx12 equ dword[edi-12] -.dby12 equ [edi-16] -.dex12 equ dword[edi-20] -.dey12 equ [edi-24] -.dtx12 equ dword[edi-28] -.dty12 equ [edi-32] + +.t_y2 equ ebp+16 +.t_x2 equ ebp+18 ; b - bump map coords +.e_y2 equ ebp+20 ; texture coords +.e_x2 equ ebp+22 +.b_y2 equ ebp+24 +.b_x2 equ ebp+26 + + + + +.t_y3 equ ebp+28 ; e - env map coords +.t_x3 equ ebp+30 +.e_y3 equ ebp+32 ; texture coords +.e_x3 equ ebp+34 +.b_y3 equ ebp+36 +.b_x3 equ ebp+38 + +.z1 equ word[ebp+40] +.z2 equ word[ebp+42] +.z3 equ word[ebp+44] +.z_buff equ dword[ebp+46] ; pointer to Z-buffer +.tex_ptr equ dword[ebp+50] ; ptr to texture + + + +.t_bmap equ dword[ebp-4] ; pointer to bump map +.t_emap equ dword[ebp-8] ; pointer to env map +.x1 equ word[ebp-10] +.y1 equ word[ebp-12] +.x2 equ word[ebp-14] +.y2 equ word[ebp-16] +.x3 equ word[ebp-18] +.y3 equ word[ebp-20] + + + + +.dx12 equ dword[ebp-24] +.dz12 equ [ebp-28] +.dbx12 equ dword[ebp-32] +.dby12 equ [ebp-36] +.dex12 equ dword[ebp-40] +.dey12 equ [ebp-44] +.dtx12 equ dword[ebp-48] +.dty12 equ [ebp-52] .dx13 equ dword[ebp-52-4*1] -.dz13 equ [ebp-52-4*2] +.dz13 equ [ebp-52-4*2] .dbx13 equ dword[ebp-52-4*3] -.dby13 equ [ebp-52-4*4] +.dby13 equ [ebp-52-4*4] .dex13 equ dword[ebp-52-4*5] -.dey13 equ [ebp-52-4*6] +.dey13 equ [ebp-52-4*6] .dtx13 equ dword[ebp-52-4*7] -.dty13 equ [ebp-52-4*8] +.dty13 equ [ebp-52-4*8] .dx23 equ dword[ebp-(52+4*9)] -.dz23 equ [ebp-(52+4*10)] +.dz23 equ [ebp-(52+4*10)] .dbx23 equ dword[ebp-(52+4*11)] -.dby23 equ [ebp-(52+4*12)] +.dby23 equ [ebp-(52+4*12)] .dex23 equ dword[ebp-(52+4*13)] -.dey23 equ [ebp-(52+4*14)] +.dey23 equ [ebp-(52+4*14)] .dtx23 equ dword[ebp-(52+4*15)] -.dty23 equ [ebp-(52+4*16)] - -else - -.dx12 equ dword[ebp-24] -.dz12 equ [ebp-28] -.dbx12 equ dword[ebp-32] -.dby12 equ [ebp-36] -.dex12 equ dword[ebp-40] -.dey12 equ [ebp-44] -.dtx12 equ dword[ebp-48] -.dty12 equ [ebp-52] - -.dx13 equ dword[ebp-52-4*1] -.dz13 equ [ebp-52-4*2] -.dbx13 equ dword[ebp-52-4*3] -.dby13 equ [ebp-52-4*4] -.dex13 equ dword[ebp-52-4*5] -.dey13 equ [ebp-52-4*6] -.dtx13 equ dword[ebp-52-4*7] -.dty13 equ [ebp-52-4*8] +.dty23 equ [ebp-(52+4*16)] -.dx23 equ dword[ebp-(52+4*9)] -.dz23 equ [ebp-(52+4*10)] -.dbx23 equ dword[ebp-(52+4*11)] -.dby23 equ [ebp-(52+4*12)] -.dex23 equ dword[ebp-(52+4*13)] -.dey23 equ [ebp-(52+4*14)] -.dtx23 equ dword[ebp-(52+4*15)] -.dty23 equ [ebp-(52+4*16)] -end if - -if Ext < SSE - -.cx1 equ dword[ebp-(52+4*17)] ; current variables -.cz1 equ [ebp-(52+4*18)] -.cx2 equ dword[ebp-(52+4*19)] -.cz2 equ [ebp-(52+4*20)] -.cbx1 equ dword[ebp-(52+4*21)] -.cby1 equ [ebp-(52+4*22)] -.cbx2 equ dword[ebp-(52+4*23)] -.cby2 equ [ebp-(52+4*24)] -.cex1 equ dword[ebp-(52+4*25)] -.cey1 equ [ebp-(52+4*26)] -.cex2 equ dword[ebp-(52+4*27)] -.cey2 equ [ebp-(52+4*28)] - -.ctx1 equ dword[ebp-(52+4*29)] -.cty1 equ [ebp-(52+4*30)] -.ctx2 equ dword[ebp-(52+4*31)] -.cty2 equ [ebp-(52+4*32)] - -else - -.cx1 equ dword[ebp-(52+4*17)] ; current variables -.cz1 equ [ebp-(52+4*18)] +.cx1 equ dword[ebp-(52+4*17)] ; current variables +.cz1 equ [ebp-(52+4*18)] .cbx1 equ dword[ebp-(52+4*19)] -.cby1 equ [ebp-(52+4*20)] +.cby1 equ [ebp-(52+4*20)] .cex1 equ dword[ebp-(52+4*21)] -.cey1 equ [ebp-(52+4*22)] +.cey1 equ [ebp-(52+4*22)] .ctx1 equ dword[ebp-(52+4*23)] -.cty1 equ [ebp-(52+4*24)] +.cty1 equ [ebp-(52+4*24)] .cx2 equ dword[ebp-(52+4*25)] -.cz2 equ [ebp-(52+4*26)] +.cz2 equ [ebp-(52+4*26)] .cbx2 equ dword[ebp-(52+4*27)] -.cby2 equ [ebp-(52+4*28)] +.cby2 equ [ebp-(52+4*28)] .cex2 equ dword[ebp-(52+4*29)] -.cey2 equ [ebp-(52+4*30)] +.cey2 equ [ebp-(52+4*30)] .ctx2 equ dword[ebp-(52+4*31)] -.cty2 equ [ebp-(52+4*32)] +.cty2 equ [ebp-(52+4*32)] +if Ext >+ MMX + emms end if + cld mov ebp,esp - push edx ; store bump map - push esi ; store e. map + push edx ; store bump map + push esi ; store e. map ; sub esp,120 - .sort3: ; sort triangle coordinates... + .sort3: ; sort triangle coordinates... cmp ax,bx jle .sort1 xchg eax,ebx - mov edx,dword[.b_x1] - xchg edx,dword[.b_x2] - mov dword[.b_x1],edx - mov edx,dword[.e_x1] - xchg edx,dword[.e_x2] - mov dword[.e_x1],edx - mov edx,dword[.t_x1] - xchg edx,dword[.t_x2] - mov dword[.t_x1],edx + if Ext >= MMX + movq mm0,[.t_y1] + movq mm1,[.t_y2] + movq [.t_y1],mm1 + movq [.t_y2],mm0 + end if + mov edx,dword[.b_y1] + xchg edx,dword[.b_y2] + mov dword[.b_y1],edx + if Ext = NON + mov edx,dword[.e_y1] + xchg edx,dword[.e_y2] + mov dword[.e_y1],edx + mov edx,dword[.t_y1] + xchg edx,dword[.t_y2] + mov dword[.t_y1],edx + end if mov dx,.z1 xchg dx,.z2 mov .z1,dx .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - mov edx,dword[.b_x2] - xchg edx,dword[.b_x3] - mov dword[.b_x2],edx - mov edx,dword[.e_x2] - xchg edx,dword[.e_x3] - mov dword[.e_x2],edx - mov edx,dword[.t_x2] - xchg edx,dword[.t_x3] - mov dword[.t_x2],edx + cmp bx,cx + jle .sort2 + xchg ebx,ecx + if Ext >= MMX + movq mm0,[.t_y3] + movq mm1,[.t_y2] + movq [.t_y3],mm1 + movq [.t_y2],mm0 + end if + mov edx,dword[.b_y2] + xchg edx,dword[.b_y3] + mov dword[.b_y2],edx + if Ext = NON + mov edx,dword[.e_y2] + xchg edx,dword[.e_y3] + mov dword[.e_y2],edx + mov edx,dword[.t_y2] + xchg edx,dword[.t_y3] + mov dword[.t_y2],edx + end if mov dx,.z2 xchg dx,.z3 mov .z2,dx - jmp .sort3 + jmp .sort3 .sort2: - push eax ; store triangle coords in variables - push ebx - push ecx - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .loop23_done + push eax ; store triangle coords in variables + push ebx + push ecx + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .loop23_done ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that ; or edx,ebx ; if any *one* of them is negative a sign flag is raised ; or edx,ecx @@ -225,177 +204,68 @@ end if ; jg .loop23_done ; { - mov bx,.y2 ; calc delta 12 - sub bx,.y1 - jnz .bt_dx12_make -if 0 ;Ext >= SSE2 - pxor xmm0,xmm0 - movups .dty12,xmm0 - movups .dey12,xmm0 - sub esp,16 -else - mov ecx,8 - xor edx,edx + mov bx,.y2 ; calc delta 12 + sub bx,.y1 + jnz .bt_dx12_make + + mov ecx,8 + xor edx,edx @@: - push edx ;dword 0 - loop @b -end if - jmp .bt_dx12_done + push edx + loop @b + + jmp .bt_dx12_done .bt_dx12_make: - movsx ebx,bx - - -if Ext>=SSE - sub esp,32 - ; mov eax,256 - cvtsi2ss xmm4,[i255d] - cvtsi2ss xmm3,ebx ;rcps -if 0 ;Ext >= SSE2 - mov edi,ebp - sub edi,512 - or edi,0x0000000f -end if - divss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x1] - movd mm1,[.b_x2] - movd mm2,[.e_x1] - movd mm3,[.e_x2] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - psubd mm1,mm0 - psubd mm3,mm2 - - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | - - shufps xmm1,xmm1,10110001b - ;xmm1--> | dbx | dby | dex | dey | -;1 movups .dey12,xmm1 - cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 ;mm1,xmm1 - movq .dey12,mm0 - movq .dby12,mm1 -;------------- - ; pxor mm0,mm0 - ; pxor mm1,mm1 - ;/ pinsrw mm0,.z1,1 - ;/ pinsrw mm0,.x1,0 - ;/ pinsrw mm1,.z2,1 - ;/ pinsrw mm1,.x2,0 - mov ax,.z2 - sub ax,.z1 - cwde - - mov dx,.x2 - sub dx,.x1 - movsx edx,dx - - ;/ movd mm1,eax - - ;/ punpcklwd mm0,mm4 - ;/ punpcklwd mm1,mm4 - - ; cvtpi2ps xmm1,mm1 - ; cvtpi2ps xmm2,mm0 - ; subps xmm1,xmm2 - - ;/ psubd mm1,mm0 - - movd mm2,[.t_x1] - movd mm3,[.t_x2] - - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - psubd mm3,mm2 - - ;/ cvtpi2ps xmm1,mm1 - cvtsi2ss xmm1,eax - movlhps xmm1,xmm1 - cvtsi2ss xmm1,edx - ; movss xmm1,xmm4 - shufps xmm1,xmm1,00101111b - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | - - shufps xmm1,xmm1,11100001b - ; xmm1--> | dx | dz | dtx | dty | -;1 movlps .dty12,xmm1 -;1 movhps .dz12,xmm1 - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dty12,mm0 - movq .dz12,mm1 -;---- -; mov ax,.z2 -; sub ax,.z1 -; cwde -; mov bx,.x2 -; sub bx,.x1 -; movsx ebx,bx -; movd mm1,eax -; psllq mm1,32 -; movd mm1,ebx - -;; push ebx -;; push eax -;; movq mm1,[esp] -;; add esp,8 -;;; mov ax,.z1 -;;; mov bx,.z2 -;;; shl eax,16 -;;; shl ebx,16 -;;; mov ax,.x1 -;;; mov bx,.x2 -; movd mm2,[.t_x1] -; movd mm3,[.t_x2] -;; movd mm0,eax -;; movd mm1,ebx - -; pxor mm4,mm4 -;; punpcklwd mm0,mm4 -;; punpcklwd mm1,mm4 -; punpcklwd mm2,mm4 -; punpcklwd mm3,mm4 - -;; psubd mm1,mm0 -; psubd mm3,mm2 - - -; cvtpi2ps xmm1,mm1 -; movlhps xmm1,xmm1 -; cvtpi2ps xmm1,mm3 - -; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx | - -; shufps xmm1,xmm1,10110001b - ; xmm1--> | dx | dz | dtx | dty | -; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | -; movhlps xmm1,xmm1 -; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | -; movq .dty12,mm0 -; movq .dz12,mm1 -else - mov ax,.x2 - sub ax,.x1 - cwde - shl eax,ROUND + movsx ebx,bx +if Ext >= SSE2 + mov eax,1 shl 15 cdq - idiv ebx + idiv ebx + mov ebx,eax + + + mov ax,.x2 + sub ax,.x1 + cwde + imul ebx + sar eax,15 - ROUND + push eax + + mov ax,.z2 + sub ax,.z1 + cwde + imul ebx + sar eax,15 - ROUND + push eax + + sub esp,4*6 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movlhps xmm0,xmm0 + movdqu xmm1,[.t_y1] + movdqu xmm2,[.t_y2] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + movhlps xmm4,xmm2 + movhlps xmm5,xmm3 + punpcklwd xmm2,xmm3 + punpcklwd xmm4,xmm5 + psrad xmm2,15 - ROUND + psrad xmm4,15 - ROUND + movdqu .dty12,xmm2 + movq .dby12,xmm4 +else + + mov ax,.x2 + sub ax,.x1 + cwde + shl eax,ROUND + cdq + idiv ebx ; mov .dx12,eax - push eax + push eax mov ax,.z2 sub ax,.z1 @@ -405,157 +275,124 @@ else idiv ebx push eax - mov ax,word[.b_x2] - sub ax,word[.b_x1] + mov ax,word[.b_x2] + sub ax,word[.b_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dbx12,eax - push eax + push eax - mov ax,word[.b_y2] - sub ax,word[.b_y1] + mov ax,word[.b_y2] + sub ax,word[.b_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dby12,eax - push eax + push eax - mov ax,word[.e_x2] - sub ax,word[.e_x1] + mov ax,word[.e_x2] + sub ax,word[.e_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dex12,eax - push eax + push eax - mov ax,word[.e_y2] - sub ax,word[.e_y1] + mov ax,word[.e_y2] + sub ax,word[.e_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dey12,eax - push eax + push eax - mov ax,word[.t_x2] - sub ax,word[.t_x1] + mov ax,word[.t_x2] + sub ax,word[.t_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dtx12,eax - push eax + push eax - mov ax,word[.t_y2] - sub ax,word[.t_y1] + mov ax,word[.t_y2] + sub ax,word[.t_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dty12,eax - push eax + push eax end if .bt_dx12_done: - mov bx,.y3 ; calc delta13 - sub bx,.y1 - jnz .bt_dx13_make - mov ecx,8 - xor edx,edx + mov bx,.y3 ; calc delta13 + sub bx,.y1 + jnz .bt_dx13_make + mov ecx,8 + xor edx,edx @@: - push edx ;dword 0 - loop @b - jmp .bt_dx13_done + push edx ;dword 0 + loop @b + jmp .bt_dx13_done .bt_dx13_make: - movsx ebx,bx + movsx ebx,bx -if Ext>=SSE +if Ext >= SSE2 + mov eax,1 shl 15 + cdq + idiv ebx + mov ebx,eax - sub esp,32 - ; mov eax,256 - cvtsi2ss xmm4,[i255d] - cvtsi2ss xmm3,ebx ;rcps - divss xmm3,xmm4 - shufps xmm3,xmm3,0 - movd mm0,[.b_x1] - movd mm1,[.b_x3] - movd mm2,[.e_x1] - movd mm3,[.e_x3] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - psubd mm1,mm0 - psubd mm3,mm2 - - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | - - shufps xmm1,xmm1,10110001b - ;xmm1--> | dbx | dby | dex | dey | -;1 movups .dey13,xmm1 - - cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 ;mm1,xmm1 - movq .dey13,mm0 - movq .dby13,mm1 - - mov ax,.z3 - sub ax,.z1 + mov ax,.x3 + sub ax,.x1 cwde + imul ebx + sar eax,15 - ROUND + push eax + ; mov .dx12,eax - mov dx,.x3 - sub dx,.x1 - movsx edx,dx - - movd mm2,[.t_x1] - movd mm3,[.t_x3] - - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - psubd mm3,mm2 - - cvtsi2ss xmm1,eax - movlhps xmm1,xmm1 - cvtsi2ss xmm1,edx - shufps xmm1,xmm1,00101111b - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | - - shufps xmm1,xmm1,11100001b - ; xmm1--> | dx | dz | dtx | dty | -;1 movlps .dty13,xmm1 -;1 movhps .dz13,xmm1 - - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dty13,mm0 - movq .dz13,mm1 + mov ax,.z3 + sub ax,.z1 + cwde + imul ebx + sar eax,15 - ROUND + push eax + sub esp,4*6 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movlhps xmm0,xmm0 + movdqu xmm1,[.t_y1] + movdqu xmm2,[.t_y3] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + movhlps xmm4,xmm2 + movhlps xmm5,xmm3 + punpcklwd xmm2,xmm3 + punpcklwd xmm4,xmm5 + psrad xmm2,15 - ROUND + psrad xmm4,15 - ROUND + movdqu .dty13,xmm2 + movq .dby13,xmm4 else - mov ax,.x3 - sub ax,.x1 + mov ax,.x3 + sub ax,.x1 cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dx13,eax - push eax + push eax mov ax,.z3 sub ax,.z1 @@ -567,156 +404,124 @@ else push eax - mov ax,word[.b_x3] - sub ax,word[.b_x1] + mov ax,word[.b_x3] + sub ax,word[.b_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dbx13,eax - push eax + push eax - mov ax,word[.b_y3] - sub ax,word[.b_y1] + mov ax,word[.b_y3] + sub ax,word[.b_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dby13,eax - push eax + push eax - mov ax,word[.e_x3] - sub ax,word[.e_x1] + mov ax,word[.e_x3] + sub ax,word[.e_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dex13,eax - push eax + push eax - mov ax,word[.e_y3] - sub ax,word[.e_y1] + mov ax,word[.e_y3] + sub ax,word[.e_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dey13,eax - push eax + push eax - mov ax,word[.t_x3] - sub ax,word[.t_x1] + mov ax,word[.t_x3] + sub ax,word[.t_x1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dtx13,eax - push eax + push eax - mov ax,word[.t_y3] - sub ax,word[.t_y1] + mov ax,word[.t_y3] + sub ax,word[.t_y1] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dty13,eax - push eax + push eax end if .bt_dx13_done: - mov bx,.y3 ; calc delta23 - sub bx,.y2 - jnz .bt_dx23_make - mov ecx,8 - xor edx,edx + mov bx,.y3 ; calc delta23 + sub bx,.y2 + jnz .bt_dx23_make + mov ecx,8 + xor edx,edx @@: - push edx ;dword 0 - loop @b - jmp .bt_dx23_done + push edx ;dword 0 + loop @b + jmp .bt_dx23_done .bt_dx23_make: - movsx ebx,bx + movsx ebx,bx +if Ext >= SSE2 -if Ext>=SSE - - sub esp,32 - ; mov eax,256 - cvtsi2ss xmm4,[i255d] - cvtsi2ss xmm3,ebx ;rcps - divss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x2] - movd mm1,[.b_x3] - movd mm2,[.e_x2] - movd mm3,[.e_x3] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - psubd mm1,mm0 - psubd mm3,mm2 - - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | - - shufps xmm1,xmm1,10110001b - ;xmm1--> | dbx | dby | dex | dey | -;1 movups .dey23,xmm1 - - cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 ;mm1,xmm1 - movq .dey23,mm0 - movq .dby23,mm1 - - mov ax,.z3 - sub ax,.z2 - cwde - - mov dx,.x3 - sub dx,.x2 - movsx edx,dx - - movd mm2,[.t_x2] - movd mm3,[.t_x3] - - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - psubd mm3,mm2 - - cvtsi2ss xmm1,eax - movlhps xmm1,xmm1 - cvtsi2ss xmm1,edx - shufps xmm1,xmm1,00101111b - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | - - shufps xmm1,xmm1,11100001b - ; xmm1--> | dx | dz | dtx | dty | -; movlps .dty23,xmm1 -; movhps .dz23,xmm1 - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | - movq .dty23,mm0 - movq .dz23,mm1 - - -else - mov ax,.x3 - sub ax,.x2 - cwde - shl eax,ROUND + mov eax,1 shl 15 cdq - idiv ebx + idiv ebx + ; push eax + mov ebx,eax + + + mov ax,.x3 + sub ax,.x2 + cwde + imul ebx + sar eax,15 - ROUND + push eax + ; mov .dx12,eax + + mov ax,.z3 + sub ax,.z2 + cwde + imul ebx + sar eax,15 - ROUND + push eax + + sub esp,4*6 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movlhps xmm0,xmm0 + movdqu xmm1,[.t_y2] + movdqu xmm2,[.t_y3] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + movhlps xmm4,xmm2 + movhlps xmm5,xmm3 + punpcklwd xmm2,xmm3 + punpcklwd xmm4,xmm5 + psrad xmm2,15 - ROUND + psrad xmm4,15 - ROUND + movdqu .dty23,xmm2 + movq .dby23,xmm4 +else + mov ax,.x3 + sub ax,.x2 + cwde + shl eax,ROUND + cdq + idiv ebx ; mov .dx23,eax - push eax + push eax mov ax,.z3 sub ax,.z2 @@ -727,123 +532,125 @@ else ; mov .dz23,eax push eax - mov ax,word[.b_x3] - sub ax,word[.b_x2] + mov ax,word[.b_x3] + sub ax,word[.b_x2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dbx23,eax - push eax + push eax - mov ax,word[.b_y3] - sub ax,word[.b_y2] + mov ax,word[.b_y3] + sub ax,word[.b_y2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dby23,eax - push eax + push eax - mov ax,word[.e_x3] - sub ax,word[.e_x2] + mov ax,word[.e_x3] + sub ax,word[.e_x2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dex23,eax - push eax + push eax - mov ax,word[.e_y3] - sub ax,word[.e_y2] + mov ax,word[.e_y3] + sub ax,word[.e_y2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dey23,eax - push eax + push eax - mov ax,word[.t_x3] - sub ax,word[.t_x2] + mov ax,word[.t_x3] + sub ax,word[.t_x2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dtx23,eax - push eax + push eax - mov ax,word[.t_y3] - sub ax,word[.t_y2] + mov ax,word[.t_y3] + sub ax,word[.t_y2] cwde - shl eax,ROUND + shl eax,ROUND cdq - idiv ebx + idiv ebx ; mov .dty23,eax - push eax + push eax end if - ; sub esp,40 + + .bt_dx23_done: - sub esp,64 - movsx eax,.x1 - shl eax,ROUND - mov .cx1,eax - mov .cx2,eax + sub esp,64 + + movsx eax,.x1 + shl eax,ROUND + mov .cx1,eax + mov .cx2,eax ; push eax ; push eax - movsx ebx,word[.b_x1] - shl ebx,ROUND - mov .cbx1,ebx - mov .cbx2,ebx + movsx ebx,word[.b_x1] + shl ebx,ROUND + mov .cbx1,ebx + mov .cbx2,ebx ; push ebx ; push ebx - movsx ecx,word[.b_y1] - shl ecx,ROUND - mov .cby1,ecx - mov .cby2,ecx + movsx ecx,word[.b_y1] + shl ecx,ROUND + mov .cby1,ecx + mov .cby2,ecx ; push ecx ; push ecx - movsx edx,word[.e_x1] - shl edx,ROUND - mov .cex1,edx - mov .cex2,edx + movsx edx,word[.e_x1] + shl edx,ROUND + mov .cex1,edx + mov .cex2,edx ; push edx ; push edx - movsx eax,word[.e_y1] - shl eax,ROUND - mov .cey1,eax - mov .cey2,eax + movsx eax,word[.e_y1] + shl eax,ROUND + mov .cey1,eax + mov .cey2,eax ; push eax ; push eax - movsx ebx,.z1 - shl ebx,CATMULL_SHIFT - mov .cz1,ebx - mov .cz2,ebx + movsx ebx,.z1 + shl ebx,CATMULL_SHIFT + mov .cz1,ebx + mov .cz2,ebx ; push ebx ; push ebx ; sub esp,16 - movsx ecx,word[.t_x1] - shl ecx,ROUND - mov .ctx1,ecx - mov .ctx2,ecx + movsx ecx,word[.t_x1] + shl ecx,ROUND + mov .ctx1,ecx + mov .ctx2,ecx ;push ecx ;push ecx - movsx edx,word[.t_y1] - shl edx,ROUND - mov .cty1,edx - mov .cty2,edx + movsx edx,word[.t_y1] + shl edx,ROUND + mov .cty1,edx + mov .cty2,edx ; push edx ; push edx -if Ext >= SSE2 +if 0 ;Ext >= SSE2 movups xmm0,.cby1 movups xmm1,.cty1 movups xmm2,.cby2 @@ -852,15 +659,15 @@ if Ext >= SSE2 movups xmm5,.dty13 movups xmm6,.dby12 movups xmm7,.dty12 - .scby1 equ [edi] - .scty1 equ [edi+16] - .scby2 equ [edi+32] - .scty2 equ [edi+48] - .sdby13 equ [edi+64] - .sdty13 equ [edi+80] - .sdby12 equ [edi+96] - .sdty12 equ [edi+128] - push edi + ; .scby1 equ [edi] + ; .scty1 equ [edi+16] + ; .scby2 equ [edi+32] + ; .scty2 equ [edi+48] + ; .sdby13 equ [edi+64] + ; .sdty13 equ [edi+80] + ; .sdby12 equ [edi+96] + ; .sdty12 equ [edi+128] + ; push edi mov edi,sse_repository movaps .scby1,xmm0 movaps .scty1,xmm1 @@ -873,626 +680,483 @@ if Ext >= SSE2 pop edi end if - movsx ecx,.y1 - cmp cx,.y2 - jge .loop12_done + + movsx ecx,.y1 + cmp cx,.y2 + jge .loop12_done .loop12: -;if Ext >= SSE2 -; fxsave [sse_repository] -;end if - call .call_line -if Ext >= SSE2 -; fxrstor [sse_repository] + call .call_line + +if Ext >= SSE2 + movups xmm0,.cby1 movups xmm1,.cty1 movups xmm2,.cby2 movups xmm3,.cty2 - ; movups xmm4,.dby13 - ; movups xmm5,.dty13 - ; movups xmm6,.dby12 - ; movups xmm7,.dty12 - ; paddd xmm0,xmm4 - ; paddd xmm1,xmm5 - ; paddd xmm2,xmm6 - ; paddd xmm3,xmm7 - push edi - mov edi,sse_repository - paddd xmm0,.sdby13 - paddd xmm1,.sdty13 - paddd xmm2,.sdby12 - paddd xmm3,.sdty12 - pop edi + movups xmm4,.dby13 + movups xmm5,.dty13 + movups xmm6,.dby12 + movups xmm7,.dty12 + paddd xmm0,xmm4 + paddd xmm1,xmm5 + paddd xmm2,xmm6 + paddd xmm3,xmm7 movups .cby1,xmm0 movups .cty1,xmm1 movups .cby2,xmm2 movups .cty2,xmm3 end if -if (Ext = MMX) | (Ext = SSE) - movq mm0,.cby2 - movq mm1,.cby1 - movq mm2,.cey2 - movq mm3,.cey1 - movq mm4,.cty1 - movq mm5,.cty2 - movq mm6,.cz1 - movq mm7,.cz2 - paddd mm0,.dby12 - paddd mm1,.dby13 - paddd mm2,.dey12 - paddd mm3,.dey13 - paddd mm4,.dty13 - paddd mm5,.dty12 - paddd mm6,.dz13 - paddd mm7,.dz12 - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey1,mm3 - movq .cey2,mm2 - movq .cty1,mm4 - movq .cty2,mm5 - movq .cz1,mm6 - movq .cz2,mm7 +if (Ext = MMX) + movq mm0,.cby2 + movq mm1,.cby1 + movq mm2,.cey2 + movq mm3,.cey1 + movq mm4,.cty1 + movq mm5,.cty2 + movq mm6,.cz1 + movq mm7,.cz2 + paddd mm0,.dby12 + paddd mm1,.dby13 + paddd mm2,.dey12 + paddd mm3,.dey13 + paddd mm4,.dty13 + paddd mm5,.dty12 + paddd mm6,.dz13 + paddd mm7,.dz12 + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey1,mm3 + movq .cey2,mm2 + movq .cty1,mm4 + movq .cty2,mm5 + movq .cz1,mm6 + movq .cz2,mm7 end if if Ext = NON - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx12 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby12 - add .cby2,edx + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx12 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby12 + add .cby2,edx - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex12 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey12 - add .cey2,eax + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex12 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey12 + add .cey2,eax - mov eax,.dtx13 - add .ctx1,eax - mov ebx,.dtx12 - add .ctx2,ebx - mov edx,.dty13 - add .cty1,edx - mov eax,.dty12 - add .cty2,eax + mov eax,.dtx13 + add .ctx1,eax + mov ebx,.dtx12 + add .ctx2,ebx + mov edx,.dty13 + add .cty1,edx + mov eax,.dty12 + add .cty2,eax - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx12 - add .cx2,ebx - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz12 - add .cz2,edx + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx12 + add .cx2,ebx + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz12 + add .cz2,edx end if - inc ecx - cmp cx,.y2 - jl .loop12 + inc ecx + cmp cx,.y2 + jl .loop12 .loop12_done: - movsx ecx,.y2 - cmp cx,.y3 - jge .loop23_done + movsx ecx,.y2 + cmp cx,.y3 + jge .loop23_done - movsx eax,.z2 - shl eax,CATMULL_SHIFT - mov .cz2,eax + movsx eax,.z2 + shl eax,CATMULL_SHIFT + mov .cz2,eax - movsx ebx,.x2 - shl ebx,ROUND - mov .cx2,ebx + movsx ebx,.x2 + shl ebx,ROUND + mov .cx2,ebx - movzx edx,word[.b_x2] - shl edx,ROUND - mov .cbx2,edx + movzx edx,word[.b_x2] + shl edx,ROUND + mov .cbx2,edx - movzx eax,word[.b_y2] - shl eax,ROUND - mov .cby2,eax + movzx eax,word[.b_y2] + shl eax,ROUND + mov .cby2,eax - movzx ebx,word[.e_x2] - shl ebx,ROUND - mov .cex2,ebx + movzx ebx,word[.e_x2] + shl ebx,ROUND + mov .cex2,ebx - movzx edx,word[.e_y2] - shl edx,ROUND - mov .cey2,edx + movzx edx,word[.e_y2] + shl edx,ROUND + mov .cey2,edx - movzx eax,word[.t_x2] - shl eax,ROUND - mov .ctx2,eax + movzx eax,word[.t_x2] + shl eax,ROUND + mov .ctx2,eax - movzx ebx,word[.t_y2] - shl ebx,ROUND - mov .cty2,ebx -if Ext >= SSE2 - movups xmm2,.cby2 - movups xmm3,.cty2 - ; movups xmm4,.dby13 - ; movups xmm5,.dty13 - movups xmm6,.dby23 - movups xmm7,.dty23 -; .scby1 equ [edi] -; .scty1 equ [edi+16] -; .scby2 equ [edi+32] -; .scty2 equ [edi+48] -; .sdby13 equ [edi+64] -; .sdty13 equ [edi+80] - .sdby23 equ [edi+160] - .sdty23 equ [edi+192] - push edi - mov edi,sse_repository -; movaps .scby1,xmm0 -; movaps .scty1,xmm1 - movaps .scby2,xmm2 - movaps .scty2,xmm3 -; movaps .sdby13,xmm4 -; movaps .sdty13,xmm5 - movaps .sdby23,xmm6 - movaps .sdty23,xmm7 - pop edi - -end if + movzx ebx,word[.t_y2] + shl ebx,ROUND + mov .cty2,ebx .loop23: -;if Ext >= SSE2 -; fxsave [sse_repository] -;end if - call .call_line - -if Ext >= SSE2 + call .call_line +if Ext >= SSE2 +; fxrstor [sse_repository] movups xmm0,.cby1 movups xmm1,.cty1 movups xmm2,.cby2 movups xmm3,.cty2 - - - push edi - mov edi,sse_repository - paddd xmm0,.sdby13 - paddd xmm1,.sdty13 - paddd xmm2,.sdby23 - paddd xmm3,.sdty23 - pop edi + movups xmm4,.dby13 + movups xmm5,.dty13 + movups xmm6,.dby23 + movups xmm7,.dty23 + paddd xmm0,xmm4 + paddd xmm1,xmm5 + paddd xmm2,xmm6 + paddd xmm3,xmm7 + ; push edi + ; mov edi,sse_repository + ; paddd xmm0,.sdby13 + ; paddd xmm1,.sdty13 + ; paddd xmm2,.sdby12 + ; paddd xmm3,.sdty12 + ; pop edi movups .cby1,xmm0 movups .cty1,xmm1 movups .cby2,xmm2 movups .cty2,xmm3 - - - - -; fxrstor [sse_repository] -; movups xmm0,.cby1 -; movups xmm1,.cty1 -; movups xmm2,.cby2 -; movups xmm3,.cty2 -; movups xmm4,.dby13 -; movups xmm5,.dty13 -; movups xmm6,.dby23 -; movups xmm7,.dty23 -; paddd xmm0,xmm4 -; paddd xmm1,xmm5 -; paddd xmm2,xmm6 - ; paddd xmm3,xmm7 - ; movups .cby1,xmm0 - ; movups .cty1,xmm1 - ; movups .cby2,xmm2 - ; movups .cty2,xmm3 -; end if -if (Ext = MMX) | (Ext = SSE) - movq mm0,.cby2 - movq mm1,.cby1 - movq mm2,.cey2 - movq mm3,.cey1 - movq mm4,.cty1 - movq mm5,.cty2 - movq mm6,.cz1 - movq mm7,.cz2 - paddd mm0,.dby23 - paddd mm1,.dby13 - paddd mm2,.dey23 - paddd mm3,.dey13 - paddd mm4,.dty13 - paddd mm5,.dty23 - paddd mm6,.dz13 - paddd mm7,.dz23 - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey2,mm2 - movq .cey1,mm3 - movq .cty1,mm4 - movq .cty2,mm5 - movq .cz1,mm6 - movq .cz2,mm7 + + +if (Ext = MMX) + movq mm0,.cby2 + movq mm1,.cby1 + movq mm2,.cey2 + movq mm3,.cey1 + movq mm4,.cty1 + movq mm5,.cty2 + movq mm6,.cz1 + movq mm7,.cz2 + paddd mm0,.dby23 + paddd mm1,.dby13 + paddd mm2,.dey23 + paddd mm3,.dey13 + paddd mm4,.dty13 + paddd mm5,.dty23 + paddd mm6,.dz13 + paddd mm7,.dz23 + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey2,mm2 + movq .cey1,mm3 + movq .cty1,mm4 + movq .cty2,mm5 + movq .cz1,mm6 + movq .cz2,mm7 end if -If Ext = NON - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx23 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby23 - add .cby2,edx +If Ext = NON + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx23 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby23 + add .cby2,edx - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex23 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey23 - add .cey2,eax + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex23 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey23 + add .cey2,eax - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx23 - add .cx2,ebx - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz23 - add .cz2,edx + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx23 + add .cx2,ebx + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz23 + add .cz2,edx - mov eax,.dtx13 - add .ctx1,eax - mov ebx,.dtx23 - add .ctx2,ebx - mov edx,.dty13 - add .cty1,edx - mov eax,.dty23 - add .cty2,eax + mov eax,.dtx13 + add .ctx1,eax + mov ebx,.dtx23 + add .ctx2,ebx + mov edx,.dty13 + add .cty1,edx + mov eax,.dty23 + add .cty2,eax end if - inc ecx - cmp cx,.y3 - jl .loop23 + inc ecx + cmp cx,.y3 + jl .loop23 .loop23_done: - mov esp,ebp + mov esp,ebp ret 50 .call_line: pushad - ; xmm0= cby1,cbx1,cz1,cx1 - ; xmm1= cty1,ctx1,cey1,cex1 -if Ext >= SSE2 - sub esp,8 - shufps xmm1,xmm1,10110001b - shufps xmm3,xmm3,10110001b - movlps [esp],xmm1 -else - push dword .cty1 - push .ctx1 -end if - push dword .cz1 -if Ext>=SSE2 - sub esp,8 - movlps [esp],xmm3 -else - push dword .cty2 - push .ctx2 -end if - push dword .cz2 -if Ext>=SSE2 - sub esp,32 - movhps [esp+24],xmm3 - shufps xmm2,xmm2,10110001b - movlps [esp+16],xmm2 - movhps [esp+8],xmm1 - shufps xmm0,xmm0,10110001b - movlps [esp],xmm0 ;================================ + push dword .cty1 + push .ctx1 -else - push dword .cey2 - push .cex2 - push dword .cby2 - push .cbx2 - push dword .cey1 - push .cex1 - push dword .cby1 - push .cbx1 -end if + push dword .cz1 + push dword .cty2 + push .ctx2 - push .tex_ptr - push .z_buff - push .t_emap - push .t_bmap + push dword .cz2 - push ecx + push dword .cey2 + push .cex2 + push dword .cby2 + push .cbx2 + push dword .cey1 + push .cex1 + push dword .cby1 + push .cbx1 - mov eax,.cx1 - sar eax,ROUND - mov ebx,.cx2 - sar ebx,ROUND - call bump_tex_line_z + push .tex_ptr + push .z_buff + push .t_emap + push .t_bmap + + push ecx + + mov eax,.cx1 + sar eax,ROUND + mov ebx,.cx2 + sar ebx,ROUND + + call bump_tex_line_z popad -;end if + ret bump_tex_line_z: ;--------------in: eax - x1 ;-------------- ebx - x2 ;-------------- edi - pointer to screen buffer ;stack - another parameters : -.y equ dword [ebp+4] -.bmap equ dword [ebp+8] ; bump map pointer -.emap equ dword [ebp+12] ; env map pointer -.z_buff equ dword [ebp+16] ; z buffer -.tex_map equ dword [ebp+20] ; texture pointer +.y equ dword [ebp+4] +.bmap equ dword [ebp+8] ; bump map pointer +.emap equ dword [ebp+12] ; env map pointer +.z_buff equ dword [ebp+16] ; z buffer +.tex_map equ dword [ebp+20] ; texture pointer -.bx1 equ [ebp+24] ; --- -.by1 equ [ebp+28] ; | -.ex1 equ [ebp+32] ; | -.ey1 equ [ebp+36] ; | -.bx2 equ [ebp+40] ; | -.by2 equ [ebp+44] ; |> b. map and e. map coords -.ex2 equ [ebp+48] ; |> shifted shl ROUND -.ey2 equ [ebp+52] ; --- -.z2 equ [ebp+56] -.tx2 equ [ebp+60] -.ty2 equ [ebp+64] -.z1 equ [ebp+68] -.tx1 equ [ebp+72] -.ty1 equ [ebp+76] +.bx1 equ [ebp+24] ; --- +.by1 equ [ebp+28] ; | +.ex1 equ [ebp+32] ; | +.ey1 equ [ebp+36] ; | +.bx2 equ [ebp+40] ; | +.by2 equ [ebp+44] ; |> b. map and e. map coords +.ex2 equ [ebp+48] ; |> shifted shl ROUND +.ey2 equ [ebp+52] ; --- +.z2 equ [ebp+56] +.tx2 equ [ebp+60] +.ty2 equ [ebp+64] +.z1 equ [ebp+68] +.tx1 equ [ebp+72] +.ty1 equ [ebp+76] -.x1 equ [ebp-4] -.x2 equ [ebp-8] -.dbx equ [ebp-12] -.dby equ [ebp-16] -.dex equ [ebp-20] -.dey equ [ebp-24] -.dz equ [ebp-28] -.dtx equ [ebp-32] -.dty equ [ebp-36] +.x1 equ [ebp-4] +.x2 equ [ebp-8] +.dbx equ [ebp-12] +.dby equ [ebp-16] +.dex equ [ebp-20] +.dey equ [ebp-24] +.dz equ [ebp-28] +.dtx equ [ebp-32] +.dty equ [ebp-36] -.cbx equ [ebp-40] -.cby equ [ebp-44] -.cex equ [ebp-48] -.cey equ [ebp-52] -.cz equ [ebp-56] +.cbx equ [ebp-40] +.cby equ [ebp-44] +.cex equ [ebp-48] +.cey equ [ebp-52] +.cz equ [ebp-56] .czbuff equ [ebp-60] -.ctx equ [ebp-64] -.cty equ [ebp-68] -.c_scr equ [ebp-72] +.ctx equ [ebp-64] +.cty equ [ebp-68] +.c_scr equ [ebp-72] -.temp1 equ ebp-80 -.temp2 equ ebp-88 -.temp3 equ ebp-76 -.temp4 equ ebp-84 -.temp5 equ ebp-92 +.temp1 equ ebp-80 +.temp2 equ ebp-88 +.temp3 equ ebp-76 +.temp4 equ ebp-84 +.temp5 equ ebp-92 - mov ebp,esp + mov ebp,esp - mov ecx,.y - or ecx,ecx - jl .bl_end - movzx edx,word[size_y_var] - cmp ecx,edx ;SIZE_Y - jge .bl_end + mov ecx,.y + or ecx,ecx + jl .bl_end + movzx edx,word[size_y_var] + cmp ecx,edx ;SIZE_Y + jge .bl_end - cmp eax,ebx - jl .bl_ok - je .bl_end + cmp eax,ebx + jl .bl_ok + je .bl_end if Ext=NON - mov edx,.bx1 - xchg edx,.bx2 - mov .bx1,edx - mov edx,.by1 - xchg edx,.by2 - mov .by1,edx + mov edx,.bx1 + xchg edx,.bx2 + mov .bx1,edx + mov edx,.by1 + xchg edx,.by2 + mov .by1,edx - mov edx,.ex1 - xchg edx,.ex2 - mov .ex1,edx - mov edx,.ey1 - xchg edx,.ey2 - mov .ey1,edx + mov edx,.ex1 + xchg edx,.ex2 + mov .ex1,edx + mov edx,.ey1 + xchg edx,.ey2 + mov .ey1,edx - mov edx,.tx1 - xchg edx,.tx2 - mov .tx1,edx - mov edx,.ty1 - xchg edx,.ty2 - mov .ty1,edx + mov edx,.tx1 + xchg edx,.tx2 + mov .tx1,edx + mov edx,.ty1 + xchg edx,.ty2 + mov .ty1,edx end if if Ext = MMX - movq mm0,.bx1 - movq mm1,.bx2 - movq mm2,.ex1 - movq mm3,.ex2 - movq mm4,.tx1 - movq mm5,.tx2 - movq .bx2,mm0 - movq .bx1,mm1 - movq .ex1,mm3 - movq .ex2,mm2 - movq .tx1,mm5 - movq .tx2,mm4 + movq mm0,.bx1 + movq mm1,.bx2 + movq mm2,.ex1 + movq mm3,.ex2 + movq mm4,.tx1 + movq mm5,.tx2 + movq .bx2,mm0 + movq .bx1,mm1 + movq .ex1,mm3 + movq .ex2,mm2 + movq .tx1,mm5 + movq .tx2,mm4 end if if Ext>=SSE - movups xmm0,.bx1 - movups xmm1,.bx2 - movups .bx1,xmm1 - movups .bx2,xmm0 - movq mm0,.tx1 - movq mm1,.tx2 - movq .tx1,mm1 - movq .tx2,mm0 + movups xmm0,.bx1 + movups xmm1,.bx2 + movups .bx1,xmm1 + movups .bx2,xmm0 + movq mm0,.tx1 + movq mm1,.tx2 + movq .tx1,mm1 + movq .tx2,mm0 end if -;if Ext>=SSE2 -; movaps xmm4,xmm0 -; movaps xmm0,xmm2 -; movaps xmm2,xmm4 -; movaps xmm5,xmm1 -; movaps xmm1,xmm3 -; movaps xmm3,xmm5 -;else - xchg eax,ebx - mov edx,.z1 - xchg edx,.z2 - mov .z1,edx -;end if + xchg eax,ebx + mov edx,.z1 + xchg edx,.z2 + mov .z1,edx + .bl_ok: -;if Ext >= SSE2 -; shufps xmm0,xmm0,11100001b -; shufps xmm2,xmm2,11100001b -; movlps .bx1,xmm0 -; movlps .bx2,xmm2 - -; shufps xmm0,xmm0,00011011b -; shufps xmm2,xmm2,00011011b -; movd eax,xmm0 -; movd ebx,xmm2 -; shufps xmm0,xmm0,11000110b -; shufps xmm2,xmm2,11000110b -; movd .z1,xmm0 -; movd .z2,xmm2 -; shufps xmm1,xmm1,10110001b -; shufps xmm3,xmm3,10110001b -; movlps .ex1,xmm1 -; movlps .ex2,xmm2 -; movhps .tx1,xmm1 -; movhps .tx2,xmm2 - -; xchg eax,ebx -; mov edx,.z1 -; xchg edx,.z2 -; mov .z1,edx - - -;end if - - push eax - push ebx ;store x1, x2 - movzx ebx,word[size_x_var] + push eax + push ebx ;store x1, x2 + movzx ebx,word[size_x_var] ; mov eax,.x1 - cmp dword .x1,ebx ;dword .x1,SIZE_X - jge .bl_end - cmp dword .x2,0 - jle .bl_end + cmp dword .x1,ebx ;dword .x1,SIZE_X + jge .bl_end + cmp dword .x2,0 + jle .bl_end - mov ebx,.x2 - sub ebx,.x1 + mov ebx,.x2 + sub ebx,.x1 -if Ext>=SSE - - sub esp,28 - cvtsi2ss xmm3,ebx ;rcps - shufps xmm3,xmm3,0 -; float using SSE variant ::--> -; movups xmm0,.bx1 ; new -; movups xmm1,.bx2 ; new - - cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point - movlhps xmm0,xmm0 - cvtpi2ps xmm0,.ex1 ;mm2 - cvtpi2ps xmm1,.bx2 ;mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,.ex2 ;mm3 - subps xmm1,xmm0 - - divps xmm1,xmm3 - - shufps xmm1,xmm1,10110001b -; movups .dey,xmm1 ; new - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey,mm0 - movq .dby,mm1 - - movd mm2,.z1 - movd mm3,.z2 - - cvtpi2ps xmm0,.tx1 ;mm0 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,.tx2 ;mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 -; movups xmm0,,z1 ; new -; movups xmm1,.z2 ; new - subps xmm1,xmm0 - - divps xmm1,xmm3 - -; movups .dz,xmm1 ;new - - shufps xmm1,xmm1,10110100b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movd .dz,mm0 - movq .dty,mm1 - -else - - mov eax,.bx2 ; calc .dbx - sub eax,.bx1 - cdq - idiv ebx - push eax - - mov eax,.by2 ; calc .dby - sub eax,.by1 - cdq - idiv ebx - push eax - - mov eax,.ex2 ; calc .dex - sub eax,.ex1 - cdq - idiv ebx - push eax - - mov eax,.ey2 ; calc .dey - sub eax,.ey1 - cdq - idiv ebx - push eax + mov eax,1 shl 15 + cdq + idiv ebx + mov ebx,eax - mov eax,.z2 ; calc .dz - sub eax,.z1 - cdq - idiv ebx - push eax + mov eax,.bx2 + sub eax,.bx1 + sar eax,ROUND + imul ebx + sar eax,15 - ROUND + push eax - mov eax,.tx2 ; calc .dtx - sub eax,.tx1 - cdq - idiv ebx - push eax - mov eax,.ty2 ; calc .dty - sub eax,.ty1 - cdq - idiv ebx - push eax -end if - cmp dword .x1,0 ; set correctly begin variable - jge @f ; CLIPPING ON FUNCTION - ; cutting triangle exceedes screen - mov ebx,.x1 - neg ebx + + mov eax,.by2 + sub eax,.by1 + sar eax,ROUND + imul ebx + sar eax,15 - ROUND + push eax + + + + mov eax,.ex2 + sub eax,.ex1 + sar eax,ROUND + imul ebx + sar eax,15 - ROUND + push eax + + + + mov eax,.ey2 + sub eax,.ey1 + sar eax,ROUND + imul ebx + sar eax,15 - ROUND + push eax + + + mov eax,.z2 + sub eax,.z1 + sar eax,ROUND + imul ebx + sar eax,15 - ROUND + push eax + + mov eax,.tx2 + sub eax,.tx1 + sar eax,ROUND + imul ebx + sar eax,15 - ROUND + push eax + + + mov eax,.ty2 + sub eax,.ty1 + sar eax,ROUND + imul ebx + sar eax,15 - ROUND + push eax + + + cmp dword .x1,0 ; set correctly begin variable + jge @f ; CLIPPING ON FUNCTION + ; cutting triangle exceedes screen + mov ebx,.x1 + neg ebx ;if Ext >= SSE @@ -1505,305 +1169,305 @@ end if ; addps xmm2,xmm1 ; movups .bx1,xmm2 - mov eax,.dz - imul ebx ; eax = .dz * abs(.x1) - add .z1,eax - mov dword .x1,0 + mov eax,.dz + imul ebx ; eax = .dz * abs(.x1) + add .z1,eax + mov dword .x1,0 - mov eax,.dbx - imul ebx - add .bx1,eax + mov eax,.dbx + imul ebx + add .bx1,eax - mov eax,.dby - imul ebx - add .by1,eax + mov eax,.dby + imul ebx + add .by1,eax - mov eax,.dex - imul ebx - add .ex1,eax + mov eax,.dex + imul ebx + add .ex1,eax - mov eax,.dey - imul ebx - add .ey1,eax + mov eax,.dey + imul ebx + add .ey1,eax - mov eax,.dtx - imul ebx - add .tx1,eax + mov eax,.dtx + imul ebx + add .tx1,eax - mov eax,.dty - imul ebx - add .ty1,eax + mov eax,.dty + imul ebx + add .ty1,eax @@: ; mov ebx,.x2 - movzx eax,word[size_x_var] + movzx eax,word[size_x_var] ; cmp dword .x2,SIZE_X - cmp dword .x2,eax ; eax,ebx - jl @f - mov dword .x2,eax ;SIZE_X + cmp dword .x2,eax ; eax,ebx + jl @f + mov dword .x2,eax ;SIZE_X @@: - movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers - mul .y - add eax,.x1 - lea esi,[4*eax] - add esi,.z_buff ; z-buffer filled with dd variables - lea eax,[eax*3] - add edi,eax + movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers + mul .y + add eax,.x1 + lea esi,[4*eax] + add esi,.z_buff ; z-buffer filled with dd variables + lea eax,[eax*3] + add edi,eax - mov ecx,.x2 - sub ecx,.x1 - ; init current variables - push dword .bx1 ; current b, e and t shifted shl ROUND .cbx - push dword .by1 ; .cby - push dword .ex1 ; .cex - push dword .ey1 ; .cey + mov ecx,.x2 + sub ecx,.x1 + ; init current variables + push dword .bx1 ; current b, e and t shifted shl ROUND .cbx + push dword .by1 ; .cby + push dword .ex1 ; .cex + push dword .ey1 ; .cey - push dword .z1 ; current z shl CATMULL_SHIFT ; .cz - push esi ; .czbuff + push dword .z1 ; current z shl CATMULL_SHIFT ; .cz + push esi ; .czbuff - push dword .tx1 ; .ctx - push dword .ty1 ; .cty - push edi ; .c_scr + push dword .tx1 ; .ctx + push dword .ty1 ; .cty + push edi ; .c_scr if Ext = SSE2 - mov eax,TEXTURE_SIZE - movd xmm1,eax - shufps xmm1,xmm1,0 - push dword TEX_X - push dword -TEX_X - push dword 1 - push dword -1 - movups xmm2,[esp] - movd xmm3,.bmap - shufps xmm3,xmm3,0 + mov eax,TEXTURE_SIZE + movd xmm1,eax + shufps xmm1,xmm1,0 + push dword TEX_X + push dword -TEX_X + push dword 1 + push dword -1 + movups xmm2,[esp] + movd xmm3,.bmap + shufps xmm3,xmm3,0 end if if Ext>=MMX - movq mm7,.cty - movq mm6,.cby - movq mm5,.cey + movq mm7,.cty + movq mm6,.cby + movq mm5,.cey ; movq mm4,.dtyq ; movq mm3,.dbyq end if .draw: ; if TEX = SHIFTING ;bump drawing only in shifting mode - mov esi,.czbuff ; .czbuff current address in buffer - mov ebx,.cz ; .cz - cur z position - cmp ebx,dword[esi] - jge .skip + mov esi,.czbuff ; .czbuff current address in buffer + mov ebx,.cz ; .cz - cur z position + cmp ebx,dword[esi] + jge .skip if Ext=NON - mov eax,.cby - shr eax,ROUND - mov esi,.cbx - shr esi,ROUND + mov eax,.cby + shr eax,ROUND + mov esi,.cbx + shr esi,ROUND else - movq mm1,mm6 - psrld mm1,ROUND - movd eax,mm1 - psrlq mm1,32 - movd esi,mm1 + movq mm1,mm6 + psrld mm1,ROUND + movd eax,mm1 + psrlq mm1,32 + movd esi,mm1 end if - shl eax,TEX_SHIFT - add esi,eax ;- ; esi - current bump map index + shl eax,TEX_SHIFT + add esi,eax ;- ; esi - current bump map index if Ext = SSE2 - movd xmm0,esi - shufps xmm0,xmm0,0 - paddd xmm0,xmm2 - pand xmm0,xmm1 - paddd xmm0,xmm3 + movd xmm0,esi + shufps xmm0,xmm0,0 + paddd xmm0,xmm2 + pand xmm0,xmm1 + paddd xmm0,xmm3 - movd ebx,xmm0 - movzx eax,byte[ebx] + movd ebx,xmm0 + movzx eax,byte[ebx] ; ; shufps xmm0,xmm0,11100001b - psrldq xmm0,4 - movd ebx,xmm0 - movzx ebx,byte[ebx] - sub eax,ebx + psrldq xmm0,4 + movd ebx,xmm0 + movzx ebx,byte[ebx] + sub eax,ebx ; ; shufps xmm0,xmm0,11111110b - psrldq xmm0,4 - movd ebx,xmm0 - movzx edx, byte [ebx] + psrldq xmm0,4 + movd ebx,xmm0 + movzx edx, byte [ebx] ; ; shufps xmm0,xmm0,11111111b - psrldq xmm0,4 - movd ebx,xmm0 - movzx ebx, byte [ebx] - sub edx,ebx + psrldq xmm0,4 + movd ebx,xmm0 + movzx ebx, byte [ebx] + sub edx,ebx ; else ; mov ebx,esi ; dec ebx - lea ebx,[esi-1] - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx eax,byte [ebx] + lea ebx,[esi-1] + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx eax,byte [ebx] ; mov ebx,esi ; inc ebx - lea ebx,[esi+1] - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx ebx,byte [ebx] - sub eax,ebx + lea ebx,[esi+1] + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx ebx,byte [ebx] + sub eax,ebx ; mov ebx,esi ; sub ebx,TEX_X - lea ebx,[esi-TEX_X] - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx edx,byte [ebx] + lea ebx,[esi-TEX_X] + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx edx,byte [ebx] ; mov ebx,esi ; add ebx,TEX_X - lea ebx,[esi+TEX_X] - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx ebx,byte [ebx] - sub edx,ebx + lea ebx,[esi+TEX_X] + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx ebx,byte [ebx] + sub edx,ebx end if ; eax - horizontal sub modificated x coord ; edx - vertical sub modificated y coord if Ext=NON - mov ebx,.cex ;.cex - current env map X - shr ebx,ROUND - add eax,ebx + mov ebx,.cex ;.cex - current env map X + shr ebx,ROUND + add eax,ebx - mov ebx,.cey ;.cey - current env map y - shr ebx,ROUND - add edx,ebx + mov ebx,.cey ;.cey - current env map y + shr ebx,ROUND + add edx,ebx else - movq mm1,mm5 ; mm5 - copy of cur env coords - psrld mm1,ROUND - movd ebx,mm1 - psrlq mm1,32 - add eax,ebx - movd ebx,mm1 - add edx,ebx + movq mm1,mm5 ; mm5 - copy of cur env coords + psrld mm1,ROUND + movd ebx,mm1 + psrlq mm1,32 + add eax,ebx + movd ebx,mm1 + add edx,ebx ; movq qword[.temp1],mm3 ; add eax,dword [.temp1] ; add edx,dword [.temp1+4] end if - or eax,eax - jl .black - cmp eax,TEX_X - jg .black - or edx,edx - jl .black - cmp edx,TEX_Y - jg .black + or eax,eax + jl .black + cmp eax,TEX_X + jg .black + or edx,edx + jl .black + cmp edx,TEX_Y + jg .black - shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze - add edx,eax ; proponuje nie stawiac czarnego pixela tylko - lea esi,[edx*3] ; niezaburzony. - add esi,.emap ; - lodsd + shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze + add edx,eax ; proponuje nie stawiac czarnego pixela tylko + lea esi,[edx*3] ; niezaburzony. + add esi,.emap ; + lodsd if Ext=NON - mov edx,.cty - shr edx,ROUND ; sar + mov edx,.cty + shr edx,ROUND ; sar - mov edi,.ctx - shr edi,ROUND ; sar + mov edi,.ctx + shr edi,ROUND ; sar else - movq mm1,mm7 - psrld mm1,ROUND - movd edx,mm1 - psrlq mm1,32 - movd edi,mm1 + movq mm1,mm7 + psrld mm1,ROUND + movd edx,mm1 + psrlq mm1,32 + movd edi,mm1 end if - shl edx,TEX_SHIFT - add edi,edx - and edi,TEXTURE_SIZE - lea esi,[edi*3] - add esi,.tex_map + shl edx,TEX_SHIFT + add edi,edx + and edi,TEXTURE_SIZE + lea esi,[edi*3] + add esi,.tex_map if Ext=NON - mov edx,eax - lodsd - push ax - mul dl - mov dl,ah - pop ax - shr ax,8 - mul dh - mov al,dl - mov edi,.c_scr - stosw - shr edx,16 - shr eax,16 - mul dl - shr ax,8 - stosb + mov edx,eax + lodsd + push ax + mul dl + mov dl,ah + pop ax + shr ax,8 + mul dh + mov al,dl + mov edi,.c_scr + stosw + shr edx,16 + shr eax,16 + mul dl + shr ax,8 + stosb else - movd mm0,eax - pxor mm1,mm1 - punpcklbw mm0,mm1 - movd mm2,[esi] - punpcklbw mm2,mm1 - pmullw mm0,mm2 - psrlw mm0,8 - packuswb mm0,mm1 - mov edi,.c_scr - movd [edi],mm0 + movd mm0,eax + pxor mm1,mm1 + punpcklbw mm0,mm1 + movd mm2,[esi] + punpcklbw mm2,mm1 + pmullw mm0,mm2 + psrlw mm0,8 + packuswb mm0,mm1 + mov edi,.c_scr + movd [edi],mm0 end if - jmp .actual_zbuff ; actualize z buffer + jmp .actual_zbuff ; actualize z buffer @@: .black: - xor eax,eax - mov edi,.c_scr - stosd + xor eax,eax + mov edi,.c_scr + stosd .actual_zbuff: - mov eax,.cz - mov edi,.czbuff - stosd + mov eax,.cz + mov edi,.czbuff + stosd .skip: - add dword .czbuff,4 - add dword .c_scr,3 + add dword .czbuff,4 + add dword .c_scr,3 if Ext=NON - mov eax,.dbx - add .cbx,eax - mov ebx,.dby - add .cby,ebx + mov eax,.dbx + add .cbx,eax + mov ebx,.dby + add .cby,ebx - mov edx,.dex - add .cex,edx - mov eax,.dey - add .cey,eax + mov edx,.dex + add .cex,edx + mov eax,.dey + add .cey,eax - mov ebx,.dtx - add .ctx,ebx - mov edx,.dty - add .cty,edx + mov ebx,.dtx + add .ctx,ebx + mov edx,.dty + add .cty,edx else - paddd mm7,.dty - paddd mm6,.dby - paddd mm5,.dey + paddd mm7,.dty + paddd mm6,.dby + paddd mm5,.dey end if - mov eax,.dz - add .cz,eax + mov eax,.dz + add .cz,eax - dec ecx - jnz .draw + dec ecx + jnz .draw .bl_end: - mov esp,ebp + mov esp,ebp ret 76 ;Ext = MMX diff --git a/programs/demos/view3ds/chunks.inc b/programs/demos/view3ds/chunks.inc new file mode 100644 index 0000000000..051c1a8a1b --- /dev/null +++ b/programs/demos/view3ds/chunks.inc @@ -0,0 +1,417 @@ +;========================================================================= +detect_chunks: +; make pivot table, sort, remove unused vertices, find chunks... +; in - some global variables +; out: +; ebx - chunks list ptr, every chunk as word +; ecx - chunks number +; esi - tri_ch ; vertices with triangles list +; edi - t_ptr ; pointers to tri_ch list + + push ebp + mov ebp,esp + sub esp,60 + + + .tri_ch equ dword[ebp-4] ; tri chunks list ptr + .chunks equ dword[ebp-8] ; chunks ptreach tri chunk No. as word + .endt equ dword[ebp-12] ; + .t_ptr equ dword[ebp-16] ; pointers list + .tri_ch1 equ dword[ebp-20] ; + .up equ dword[ebp-24] ; upload ptr + .chmr equ dword[ebp-28] ; bit mark list if tri stored + .str equ dword[ebp-32] ; store ptr + .ltch1 equ dword[ebp-36] ; end of tri_ch1 ptr + .ch_cnt equ dword[ebp-40] + .cntt equ dword[ebp-44] + .cc equ dword[ebp-48] + .lsparam equ dword[ebp-52] + .fix_mark equ dword[ebp-56] + .endVptr equ dword[ebp-60] + + ; some triangles have repeated indices of vertices + ; check and remove such triangles + call remove_dead_tri + + mov ecx,[triangles_count_var] + shl ecx,3 + lea ecx,[ecx*3] + add ecx,100 + mov eax,68 + mov ebx,12 + int 0x40 + mov .tri_ch,eax + + + mov ecx,[triangles_count_var] + imul ecx,[i12] + add ecx,32 + mov eax,68 + mov ebx,12 + int 0x40 + mov .tri_ch1,eax + + mov ecx,[points_count_var] + shl ecx,2 + add ecx,1120 + mov eax,68 + mov ebx,12 + int 0x40 + mov .t_ptr,eax + + mov ecx,[triangles_count_var] + shl ecx,1 + add ecx,20 + mov eax,68 + mov ebx,12 + int 0x40 + mov .chunks,eax + + mov ecx,[triangles_count_var] + shr ecx,3 + add ecx,20 + mov eax,68 + mov ebx,12 + int 0x40 + mov .chmr,eax ; chunks mark if bit is set - tri was used + + mov edi,eax + pxor xmm0,xmm0 + mov ecx,[triangles_count_var] + shr ecx,7 + inc ecx + @@: + movdqa [edi],xmm0 + add edi,16 + loop @b + + + mov eax,[points_count_var] + imul eax,[i12] + add eax,[points_ptr] + mov .endVptr,eax + +; make pivot table + + mov edi,.tri_ch + mov esi,[triangles_ptr] + xor ecx,ecx + @@: + movd xmm1,ecx + movq xmm0,[esi] + pshufd xmm1,xmm1,0 + movd xmm2,[esi+8] + punpckldq xmm0,xmm1 + punpckldq xmm2,xmm1 + movdqu [edi],xmm0 + movq [edi+16],xmm2 + add esi,12 + add edi,24 + inc ecx + cmp ecx,[triangles_count_var] + jnz @b + +;sort + + mov ebx,.tri_ch + mov ecx,[triangles_count_var] + lea ecx,[ecx*3] + + mov esi,ecx + shl esi,3 + add esi,ebx + mov .endt,esi + + .ccc: ; ebx - vert index + mov eax,[ebx+8] ; ebx+4 - tri index + cmp eax,[ebx] + jge .g + movq xmm0,[ebx+8] + push ebx + .c: + cmp ebx,esi + jae .done + cmp ebx,.tri_ch + jb .done + cmp eax,[ebx] + jae .done + movq xmm7,[ebx] + movq [ebx+8],xmm7 + sub ebx,8 + jnc .c + add ebx,8 + .done: + movq [ebx+8],xmm0 + .p: + pop ebx + .g: + add ebx,8 + dec ecx + cmp ecx,1 + jnz .ccc + + + + + + mov ecx,[points_count_var] + mov esi,.tri_ch + dec ecx + .ptC: + mov eax,[esi] + add esi,8 + .ptCff: + cmp esi,.endt + jae .dnC + cmp eax,[esi] + je @f + lea ebx,[eax+1] + cmp ebx,[esi] + jne .movt + dec ecx + jz .dnC ; check done + @@: + jmp .ptC + +; jmp .dnC + + .movt: + + movd xmm5,esi + movd xmm7,ebx + mov edi,[esi] + sub edi,ebx + movd xmm6,edi + + @@: + cmp esi,.endt + jnb @f + sub [esi],edi ; fix .tri_ch pivot table list + add esi,8 + jmp @b + @@: + + + ;shrink vert + lea ebx,[ebx*3] + shl ebx,2 + add ebx,[points_ptr] + imul edi,[i12] + add edi,ebx + + cmp edi,.endVptr ; fix points_r list + ja .dnV + @@: + movq xmm0,[edi] + movd xmm1,[edi+8] + movq [ebx],xmm0 + movd [ebx+8],xmm1 + add edi,12 + add ebx,12 + cmp edi,.endVptr ; fix point_r list + jna @b + + .dnV: + +; recalc tri all indices above ebx - sub edi + push ecx + + mov esi,[triangles_ptr] + mov ecx,[triangles_count_var] + lea ecx,[ecx*3] + movd edi,xmm6 + movd ebx,xmm7 + .agT: + cmp [esi],ebx + jb @f + sub [esi],edi + @@: + add esi,4 + loop .agT + + pop ecx + + movd esi,xmm5 + + sub [points_count_var],edi + + dec ecx + + jmp .ptCff ; again check sth found + + + .dnC: ; check done + + + + .do_ch: + + +;make t_ptr - table with pointers/adresses + + + mov ecx,[points_count_var] + mov esi,.tri_ch + mov edi,.t_ptr + mov ebx,ecx + + mov [edi],esi + add edi,4 + dec ecx + jz .dn + .pt: + mov eax,[esi] ; [esi] - vert ind + add esi,8 + cmp eax,[esi] ; [esi+4] - tri ind + je @f + mov [edi],esi + add edi,4 + dec ecx + jz .dn + @@: + cmp esi,.endt + jb .pt + + + .dn: + + + ; each dword position in .t_ptr list - adress of corresponding + ; triangles indices, each triangles from such index contains this + ; vertice + + + mov eax,[triangles_count_var] + mov .cntt,eax ; temp help cnt + xor ecx,ecx + mov .cc,ecx + mov esi,[triangles_ptr] + mov edi,.tri_ch1 + imul eax,[i12] + add eax,edi + mov .ltch1,eax ; last + + mov .up,esi + mov .str,edi + .lb1: ; nx chunk + cmp edi,.ltch1 + jnb .endl + mov edi,.tri_ch1 + mov .str,edi + mov eax,.cc + mov edx,.cc + inc .cc + cmp edx,[triangles_count_var] + jz .endl + shr eax,3 + and edx,111b + add eax,.chmr + + xor ebx,ebx + bt [eax],edx ; mark + jc @f ; tri was stored + inc ecx + or ebx,1b + mov esi,.up + movdqu xmm0,[esi] + movdqu [edi],xmm0 + add .str,12 + @@: + add .up,12 + or ebx,ebx + jz .lb1 + + .lb2: + mov eax,[edi] + mov edx,[edi] ; edx - vert ind + shl eax,2 + add eax,.t_ptr + mov eax,[eax] ; [eax] - t ptr + or eax,eax + jz .endl + .nxt: + + mov esi,[eax+4] + mov ebx,[eax+4] + shr esi,3 + and ebx,111b + add esi,.chmr + bts [esi],ebx ; mark + jc @f ; tri was stored + dec .cntt + je .endl + mov esi,[eax+4] ; [eax+4] - tri ind + add esi,esi + add esi,.chunks + mov [esi],cx + mov esi,[eax+4] + + imul esi,[i12] + add esi,[triangles_ptr] + movups xmm0,[esi] + mov esi,.str + movups [esi],xmm0 + add .str,12 + @@: + add eax,8 + cmp edx,[eax] + je .nxt + add edi,4 + + cmp edi,.str + jne .lb2 + jmp .lb1 + + .endl: + + mov .ch_cnt,ecx + + + .end: + + + +; mov eax,68 +; mov ebx,13 +; mov ecx,.t_ptr +; int 0x40 + +; mov eax,68 +; mov ebx,13 +; mov ecx,.tri_ch +; int 0x40 + + mov eax,68 + mov ebx,13 + mov ecx,.tri_ch1 + int 0x40 + + mov eax,68 + mov ebx,13 + mov ecx,.chmr + int 0x40 +; for now free mem - cunks list - unused + +; mov eax,68 +; mov ebx,13 +; mov ecx,.chunks +; int 0x40 + + + +; mov ebx,.chunks + mov ecx,.ch_cnt + + mov esi,.tri_ch + mov edi,.t_ptr + + + mov esp,ebp + pop ebp +ret + + + + + + diff --git a/programs/demos/view3ds/data.inc b/programs/demos/view3ds/data.inc index 4a7d99d894..9b3f2eef7c 100644 --- a/programs/demos/view3ds/data.inc +++ b/programs/demos/view3ds/data.inc @@ -1,7 +1,7 @@ ; DATA AREA ************************************ - if Ext > SSE2 + ; if Ext > SSE2 isSSE3 db 1 - end if + ; end if i3 dw 3 i6 dd 6 i12 dd 12 @@ -28,14 +28,15 @@ y_offset dw SIZE_Y / 2 z_offset dw 0 rsscale dd 175.0 ; next real scale - vect_x: dw SIZE_X / 2 - vect_y dw SIZE_Y / 2 - vect_z dw 0 - size_y_var: - yres_var dw SIZE_Y - size_x_var: - xres_var dw SIZE_X +; vect_x: dw SIZE_X / 2 +; vect_y dw SIZE_Y / 2 +; vect_z dw 0 +; size_y_var: +; yres_var dw SIZE_Y +; +; size_x_var: +; xres_var dw SIZE_X angle_x dw 0 angle_y dw 0 @@ -64,7 +65,7 @@ screen_ptr dd 0 Zbuffer_ptr dd 0 vertices_index_ptr dd 0 - vertex_edit_no dw 0 + vertex_edit_no dd -1 edit_start_x: dw 0 edit_start_y dw 0 @@ -86,10 +87,11 @@ db 3 db 'shd. model' + max_dr_flg: if Ext >= SSE3 - max_dr_flg db 15 + db 15 else - db 12 + db 12 end if dr_flag db 0 ; 6 - dots dd shd_f @@ -290,6 +292,7 @@ flags: ; flags description db 'x+y ' db ' x ' db 'keys' + onoff_f: db 'off ' db 'on ' @@ -354,7 +357,7 @@ base_vector: if Ext=SSE3 db ' (SSE3)' end if - db ' 0.075',0 + db ' 0.076',0 labellen: STRdata db '-1 ' lab_vert: @@ -367,6 +370,9 @@ base_vector: db 'Edges count: ' lab_ed_end: + db 'Chunks detected:' + + all_lights_size dw lightsend-lights @@ -483,6 +489,17 @@ end if times 4 dd 1.0 eps: times 4 dd 0.00000 + + vect_x: dw SIZE_X / 2 + vect_y dw SIZE_Y / 2 + vect_z dw 0 + size_y_var: + yres_var dw SIZE_Y + + size_x_var: + xres_var dw SIZE_X + + epsone dd 1.0001 aprox dd 0.0001 epsminus dd -0.0001 @@ -495,8 +512,8 @@ end if fsize dd 0 ;180000 ; sizeof(workarea) fptr dd 0 ;workarea file_name: - db '/rd/1/3d/house.3ds',0 - ; db '/tmp0/1/ant.3ds',0 + db '/rd/1/3d/house.3ds',0 + ; db '/tmp0/1/sc.3ds',0 rb 256 @@ -521,7 +538,9 @@ align 8 points_count_var dd ? ; triangles_count_var dd ? ; dont change order edges_count dd ? ; + chunks_number dd ? tex_points_ptr dd ? + chunks_ptr dd ? temp_col dw ? high dd ? diff --git a/programs/demos/view3ds/flat_cat.inc b/programs/demos/view3ds/flat_cat.inc index b51f653c7e..3bd2842886 100644 --- a/programs/demos/view3ds/flat_cat.inc +++ b/programs/demos/view3ds/flat_cat.inc @@ -13,63 +13,55 @@ flat_triangle_z: ; -------------------- stack : z coordinates ; -------------------- Z-buffer : each z variable as dword ; -------------------- (Z coor. as word) shl CATMULL_SHIFT -.z1 equ word[ebp+4] -.z2 equ word[ebp+6] ; each z coordinate as word integer -.z3 equ word[ebp+8] +.z1 equ word[ebp+4] +.z2 equ word[ebp+6] ; each z coordinate as word integer +.z3 equ word[ebp+8] -.col equ dword[ebp-4] -.x1 equ word[ebp-6] -.y1 equ word[ebp-8] -.x2 equ word[ebp-10] -.y2 equ word[ebp-12] -.x3 equ word[ebp-14] -.y3 equ word[ebp-16] +.col equ dword[ebp-4] +.x1 equ word[ebp-6] +.y1 equ word[ebp-8] +.x2 equ word[ebp-10] +.y2 equ word[ebp-12] +.x3 equ word[ebp-14] +.y3 equ word[ebp-16] -.dx12 equ dword[ebp-20] -;.dz12 equ dword[ebp-24] -.dx13 equ dword[ebp-24] -.dz13 equ dword[ebp-28] -.dz12 equ dword[ebp-32] -;.dz13 equ dword[ebp-32] -.dx23 equ dword[ebp-36] -.dz13M equ [ebp-40] -.dz23 equ dword[ebp-44] -.zz1 equ dword[ebp-48] -.zz2 equ dword[ebp-52] -.zz2M equ qword[ebp-52] -.dz12M equ qword[ebp-32] -.dz23M equ qword[ebp-44] -;if Ext>=MMX -; emms -;end if - mov ebp,esp +.dx12 equ dword[ebp-20] +.dz12 equ dword[ebp-24] +.dx13 equ dword[ebp-28] +.dz13 equ dword[ebp-32] +.dx23 equ dword[ebp-36] +.dz23 equ dword[ebp-40] +.zz1 equ dword[ebp-44] +.zz2 equ dword[ebp-48] - push edx ; store edx in variable .col + mov ebp,esp + + push edx ; store edx in variable .col .sort2: - cmp ax,bx - jle .sort1 - xchg eax,ebx - mov dx,.z1 - xchg dx,.z2 - mov .z1,dx + cmp ax,bx + jle .sort1 + xchg eax,ebx + mov dx,.z1 + xchg dx,.z2 + mov .z1,dx .sort1: - cmp bx,cx - jle .sort3 - xchg ebx,ecx - mov dx,.z2 - xchg dx,.z3 - mov .z2,dx - jmp .sort2 + cmp bx,cx + jle .sort3 + xchg ebx,ecx + mov dx,.z2 + xchg dx,.z3 + mov .z2,dx + jmp .sort2 .sort3: - push eax ; store triangle coordinates in user friendly variables - push ebx - push ecx - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .ft_loop2_end + push eax ; store triangle coordinates in user friendly variables + push ebx + push ecx + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .ft_loop2_end ; cmp ax,SIZE_Y ; jle @f ; cmp bx,SIZE_Y @@ -87,189 +79,167 @@ flat_triangle_z: ; jle @f ; jmp .ft_loop2_end ;@@: - sub esp,52-12 + ; sub esp,52-12 - mov bx,.y2 ; calc delta 12 - sub bx,.y1 - jnz .ft_dx12_make - mov .dx12,0 - mov .dz12,0 - jmp .ft_dx12_done + mov bx,.y2 ; calc delta 12 + sub bx,.y1 + jnz .ft_dx12_make + push dword 0 + push dword 0 + jmp .ft_dx12_done .ft_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - mov .dx12,eax + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + push eax - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - mov .dz12,eax + mov ax,.z2 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax .ft_dx12_done: - mov bx,.y3 ; calc delta 13 - sub bx,.y1 - jnz .ft_dx13_make - mov .dx13,0 - mov .dz13,0 - mov dword .dz13M,0 - jmp .ft_dx13_done + mov bx,.y3 ; calc delta 13 + sub bx,.y1 + jnz .ft_dx13_make + push dword 0 + push dword 0 + jmp .ft_dx13_done .ft_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - mov .dx13,eax + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + push eax + + mov ax,.z3 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax - mov ax,.z3 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - mov .dz13,eax - mov dword .dz13M,eax .ft_dx13_done: - mov bx,.y3 ; calc delta 23 - sub bx,.y2 - jnz .gt_dx23_make - mov .dx23,0 - mov .dz23,0 - jmp .gt_dx23_done + ; sub esp,48 + mov bx,.y3 ; calc delta 23 + sub bx,.y2 + jnz .gt_dx23_make + push dword 0 + push dword 0 + ; mov .dx23,0 + ; mov .dz23,0 + jmp .gt_dx23_done .gt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - mov .dx23,eax + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + push eax - mov ax,.z3 - sub ax,.z2 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - mov .dz23,eax + mov ax,.z3 + sub ax,.z2 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax + ; mov .dz23,eax .gt_dx23_done: - movsx edx,.z1 - shl edx,CATMULL_SHIFT - mov .zz1,edx - mov .zz2,edx - movsx eax,.x1 - shl eax,ROUND ; eax - x1 - mov ebx,eax ; ebx - x2 -;if Ext>=MMX -; movq mm0,.zz2M -;end if - mov cx,.y1 - cmp cx,.y2 - jge .ft_loop1_end + movsx edx,.z1 + shl edx,CATMULL_SHIFT + push edx + push edx + + movsx eax,.x1 + shl eax,ROUND ; eax - x1 + mov ebx,eax ; ebx - x2 + mov cx,.y1 + cmp cx,.y2 + jge .ft_loop1_end .ft_loop1: - pushad + pushad - push .col - push cx ; y - sar ebx,ROUND - push bx ; x2 - sar eax,ROUND - push ax ; x1 -;if Ext>=MMX -; sub esp,8 -; movq [esp],mm0 -;else - push .zz2 ; z2 shl CATMULL_SHIFT - push .zz1 ; z1 shl CATMULL_SHIFT + push .col + push cx ; y + sar ebx,ROUND + push bx ; x2 + sar eax,ROUND + push ax ; x1 + push .zz2 ; z2 shl CATMULL_SHIFT + push .zz1 ; z1 shl CATMULL_SHIFT + + call flat_line_z + + popad + + add eax,.dx13 + add ebx,.dx12 + + mov edx,.dz13 + add .zz1,edx + mov edx,.dz12 + add .zz2,edx ;end if - call flat_line_z - - popad - - add eax,.dx13 - add ebx,.dx12 -;if Ext>=MMX -; paddd mm0,.dz12M -;else - - mov edx,.dz13 - add .zz1,edx - mov edx,.dz12 - add .zz2,edx -;end if - inc cx - cmp cx,.y2 - jl .ft_loop1 + inc cx + cmp cx,.y2 + jl .ft_loop1 .ft_loop1_end: - movsx edx,.z2 - shl edx,CATMULL_SHIFT - mov .zz2,edx - movsx ebx,.x2 - shl ebx,ROUND -;if Ext>=MMX -; movq mm0,.zz2M -;; push .dz13 ; exchange -;; pop .dz12 -;; push .dz23 ; exchange -;; pop .dz13 -;end if - mov cx,.y2 - cmp cx,.y3 - jge .ft_loop2_end + movsx edx,.z2 + shl edx,CATMULL_SHIFT + mov .zz2,edx + movsx ebx,.x2 + shl ebx,ROUND + + mov cx,.y2 + cmp cx,.y3 + jge .ft_loop2_end .ft_loop2: - pushad + pushad - push .col - push cx - sar ebx,ROUND - push bx - sar eax,ROUND - push ax ; x1 -;if Ext>=MMX -; sub esp,8 -; movq [esp],mm0 -;else - push .zz2 ; z2 shl CATMULL_SHIFT - push .zz1 ; z1 shl CATMULL_SHIFT -;end if - call flat_line_z + push .col + push cx + sar ebx,ROUND + push bx + sar eax,ROUND + push ax ; x1 - popad + push .zz2 ; z2 shl CATMULL_SHIFT + push .zz1 ; z1 shl CATMULL_SHIFT - add eax,.dx13 - add ebx,.dx23 -;if Ext>=MMX -; paddd mm0,.dz23M -;else - mov edx,.dz13 - add .zz1,edx - mov edx,.dz23 - add .zz2,edx + call flat_line_z -; mov edx,.dz13 -; add .zz1,edx -; mov edx,.dz12 -; add .zz2,edx -;end if - inc cx - cmp cx,.y3 - jl .ft_loop2 + popad + + add eax,.dx13 + add ebx,.dx23 + + mov edx,.dz13 + add .zz1,edx + mov edx,.dz23 + add .zz2,edx + + inc cx + cmp cx,.y3 + jl .ft_loop2 .ft_loop2_end: - mov esp,ebp + mov esp,ebp ret 6 flat_line_z: @@ -286,16 +256,16 @@ flat_line_z: .dz equ dword [ebp-4] - mov ebp,esp + mov ebp,esp ;; sub esp,4 - mov ax,.y - or ax,ax - jl .fl_quit - mov bx,[size_y_var] - dec bx - cmp ax,bx ;[size_y_var] + mov ax,.y + or ax,ax + jl .fl_quit + mov bx,[size_y_var] + dec bx + cmp ax,bx ;[size_y_var] ; cmp ax,SIZE_Y-1 - jg .fl_quit + jg .fl_quit ; cmp .x1,0 ; jge .fl_ok1 @@ -307,91 +277,93 @@ flat_line_z: ; cmp .x2,SIZE_X ; jg .fl_quit ; .fl_ok2: - mov ax,.x1 - cmp ax,.x2 - je .fl_quit - jl .fl_ok + mov ax,.x1 + cmp ax,.x2 + je .fl_quit + jl .fl_ok - xchg ax,.x2 - mov .x1,ax - mov edx,.z1 - xchg edx,.z2 - mov .z1,edx + xchg ax,.x2 + mov .x1,ax + mov edx,.z1 + xchg edx,.z2 + mov .z1,edx .fl_ok: - mov bx,[size_x_var] - dec bx - cmp .x1,bx ;SIZE_X-1 - jg .fl_quit - cmp .x2,0 - jle .fl_quit + mov bx,[size_x_var] + dec bx + cmp .x1,bx ;SIZE_X-1 + jg .fl_quit + cmp .x2,0 + jle .fl_quit - mov eax,.z2 - sub eax,.z1 + mov eax,.z2 + sub eax,.z1 cdq - mov bx,.x2 - sub bx,.x1 - movsx ebx,bx - idiv ebx + mov bx,.x2 + sub bx,.x1 + movsx ebx,bx + idiv ebx ;; mov .dz,eax ; calculated delta - shifted .dz - push eax + push eax - cmp .x1,0 - jge @f - movsx ebx,.x1 - neg ebx - imul ebx - add .z1,eax - mov .x1,0 + cmp .x1,0 + jge @f + movsx ebx,.x1 + neg ebx + imul ebx + add .z1,eax + mov .x1,0 @@: - movzx edx,word[size_x_var] - cmp .x2,dx ;[size_x_var] ;SIZE_X - jl @f - mov .x2,dx ;[size_x_var] ;SIZE_X + movzx edx,word[size_x_var] + cmp .x2,dx ;[size_x_var] ;SIZE_X + jl @f + mov .x2,dx ;[size_x_var] ;SIZE_X @@: ; movzx edx,[size_x_var] ;SIZE_X - movsx eax,.y - mul edx ; edi = edi + (SIZE_X * y + x1)*3 - movsx edx,.x1 - add eax,edx - push eax - lea eax,[eax*3] - add edi,eax ; esi = esi + (SIZE_X * y + x1)*4 - pop eax - shl eax,2 - add esi,eax + movsx eax,.y + mul edx ; edi = edi + (SIZE_X * y + x1)*3 + movsx edx,.x1 + add eax,edx + push eax + lea eax,[eax*3] + add edi,eax ; esi = esi + (SIZE_X * y + x1)*4 + pop eax + shl eax,2 + add esi,eax - mov cx,.x2 - sub cx,.x1 - movzx ecx,cx + mov cx,.x2 + sub cx,.x1 + movzx ecx,cx - mov eax,.col - mov ebx,.z1 ; ebx : curr. z - mov edx,.dz - dec ecx - jecxz .draw_last + mov eax,.col + mov ebx,.z1 ; ebx : curr. z + mov edx,.dz + dec ecx + jecxz .draw_last .ddraw: - cmp ebx,dword[esi] - ; cmovl [edi],eax - ; cmovl [esi],ebx - jge @f - stosd - dec edi - mov dword[esi],ebx - jmp .no_skip + cmp ebx,dword[esi] + ; cmovl [edi],eax + ; cmovl [esi],ebx + jge @f + mov [edi],eax + mov [esi],ebx + ; stosd ; less branches + ; dec edi + ; mov dword[esi],ebx + ; jmp .no_skip @@: - add edi,3 - .no_skip: - add esi,4 - add ebx,edx - loop .ddraw + add edi,3 + ; .no_skip: + add esi,4 + add ebx,edx + loop .ddraw .draw_last: - cmp ebx,dword[esi] - jge .fl_quit + cmp ebx,dword[esi] + jge .fl_quit stosw - shr eax,16 + shr eax,16 stosb - mov dword[esi],ebx + mov dword[esi],ebx .fl_quit: diff --git a/programs/demos/view3ds/grd_tex.inc b/programs/demos/view3ds/grd_tex.inc index ffa326d39c..b23e1b6eed 100644 --- a/programs/demos/view3ds/grd_tex.inc +++ b/programs/demos/view3ds/grd_tex.inc @@ -4,12 +4,18 @@ CATMULL_SHIFT equ 8 ROUND equ 8 ;NON=0 ;MMX=1 -;Ext=MMX +;SSE=2 +;SSE2=3 + +;Ext=SSE2 + + ;TEX_SIZE=0x3fff ;SIZE_X equ 512 ;SIZE_Y equ 512 ;ROUND = 8 -;TEX_SHIFT equ 6 + ; TEX_SHIFT equ 6 + ; TEXTURE_SIZE = 0xFFFFF ; procedure drawing textured triangle with Gouraud shading ; Z-buffer alghoritm included, Z coord interpolation ---- @@ -35,17 +41,17 @@ tex_plus_grd_triangle: .tex_y1 equ [ebp+30] .tex_x1 equ [ebp+28] - .z3 equ [ebp+26] + .z3 equ [ebp+26] .col3b equ [ebp+24] .col3g equ [ebp+22] .col3r equ [ebp+20] - .z2 equ [ebp+18] + .z2 equ [ebp+18] .col2b equ [ebp+16] .col2g equ [ebp+14] .col2r equ [ebp+12] - .z1 equ [ebp+10] + .z1 equ [ebp+10] .col1b equ [ebp+8] .col1g equ [ebp+6] .col1r equ [ebp+4] @@ -56,73 +62,69 @@ tex_plus_grd_triangle: .z_ptr equ dword[ebp-8] .scr_buff equ dword[ebp-12] - .x1 equ word[ebp-14] ;dw ? ;equ word[ebp-10] - .y1 equ word[ebp-16] ;dw ? ;equ word[ebp-12] - .x2 equ word[ebp-18] ;dw ? ;equ word[ebp-14] - .y2 equ word[ebp-20] ;dw ? ;equ word[ebp-16] - .x3 equ word[ebp-22] ;dw ? ;equ word[ebp-18] - .y3 equ word[ebp-24] ;dw ? ;equ word[ebp-20] + .x1 equ word[ebp-14] ;dw ? ;equ word[ebp-10] + .y1 equ word[ebp-16] ;dw ? ;equ word[ebp-12] + .x2 equ word[ebp-18] ;dw ? ;equ word[ebp-14] + .y2 equ word[ebp-20] ;dw ? ;equ word[ebp-16] + .x3 equ word[ebp-22] ;dw ? ;equ word[ebp-18] + .y3 equ word[ebp-24] ;dw ? ;equ word[ebp-20] .dx12 equ dword[ebp-28] ;dd ? - .tex_dx12 equ dword[ebp-32] ;dd ? - .tex_dy12 equ [ebp-36] ;dd ? + .tex_dy12 equ [ebp-32] ;dd ? + .tex_dx12 equ [ebp-36] ;dd ? .dz12 equ dword[ebp-40] ;dd ? - .dc12r equ [ebp-44] ;dd ? + .dc12r equ [ebp-44] ;dd ? .dc12g equ dword[ebp-48] ;dd ? - .dc12b equ [ebp-52] ;dd ? + .dc12b equ [ebp-52] ;dd ? .dx23 equ dword[ebp-56] ;dd ? - .tex_dx23 equ dword[ebp-60] ;dd ? - .tex_dy23 equ [ebp-64] ;dd ? + .tex_dy23 equ [ebp-60] ;dd ? + .tex_dx23 equ [ebp-64] ;dd ? .dz23 equ dword[ebp-68] ;dd ? - .dc23r equ [ebp-72] ;dd ? + .dc23r equ [ebp-72] ;dd ? .dc23g equ dword[ebp-76] ;dd ? - .dc23b equ [ebp-80] ;dword[ebp-8]dd ? + .dc23b equ [ebp-80] ;dword[ebp-8]dd ? .dx13 equ dword[ebp-84] ;dd ? - .tex_dx13 equ dword[ebp-88] ;dd ? - .tex_dy13 equ [ebp-92] ;dd ? + .tex_dy13 equ [ebp-88] ;dd ? + .tex_dx13 equ [ebp-92] ;dd ? .dz13 equ dword[ebp-96] ;dd ? - .dc13r equ [ebp-100] ;dd ? + .dc13r equ [ebp-100] ;dd ? .dc13g equ dword[ebp-104] ;dd ? - .dc13b equ [ebp-108] ;dd ? + .dc13b equ [ebp-108] ;dd ? - .scan_x1 equ dword[ebp-112] ;dd ? - .scan_y1 equ [ebp-116] ;dd ? + .scan_y1 equ [ebp-112] ;dd ? + .scan_x1 equ [ebp-116] ;dd ? .zz1 equ dword[ebp-120] ;dw ? - .cur1r equ [ebp-124] ;dw ? - .cur1g equ dword[ebp-128] ;dw ? - .cur1b equ [ebp-132] ;dw ? + .cur1r equ [ebp-124] ;dw ? + .cur1g equ [ebp-128] ;dw ? + .cur1b equ [ebp-132] ;dw ? - .scan_x2 equ dword[ebp-136] ;dd ? - .scan_y2 equ [ebp-140] ;dd ? - .zz2 equ dword[ebp-144] ;dw ? - .cur2r equ [ebp-148] ;dw ? - .cur2g equ dword[ebp-152] ;dw ? - .cur2b equ [ebp-156] ;dw ? + .scan_y2 equ [ebp-136] ;dd ? + .scan_x2 equ [ebp-140] ;dd ? + .zz2 equ [ebp-144] ;dw ? + .cur2r equ [ebp-148] ;dw ? + .cur2g equ [ebp-152] ;dw ? + .cur2b equ [ebp-156] ;dw ? mov ebp,esp - ; mov .tex_ptr,edx - ; mov .z_ptr,esi - ; mov .scr_buff,edi - push edx esi edi -; push esi -; push edi - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .loop2_end + push edx esi edi + + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .loop2_end .sort3: cmp ax,bx jle .sort1 xchg eax,ebx if Ext>=MMX - movq mm0, .col1r ; exchange r, g, b, z + movq mm0, .col1r ; exchange r, g, b, z movq mm1, .col2r movq .col1r ,mm1 movq .col2r ,mm0 @@ -141,12 +143,12 @@ end if mov dword .tex_x1 ,edx .sort1: - cmp bx,cx - jle .sort2 + cmp bx,cx + jle .sort2 xchg ebx,ecx if Ext>=MMX - movq mm0, .col2r ; exchange r, g, b, z + movq mm0, .col2r ; exchange r, g, b, z movq mm1, .col3r movq .col3r ,mm0 movq .col2r ,mm1 @@ -169,528 +171,352 @@ end if .sort2: - push eax ebx ecx ; store in variables + push eax ebx ecx ; store in variables ; push ebx ; push ecx ;****************** delta computng zone ************** ;+++++++++ first zone - mov bx,.y2 ; calc delta12 - sub bx,.y1 - jnz .dx12_make - mov ecx,7 + mov bx,.y2 ; calc delta12 + sub bx,.y1 + jnz .dx12_make + mov ecx,7 @@: - push dword 0 - loop @b - jmp .dx12_done + push dword 0 + loop @b + jmp .dx12_done .dx12_make: + ; sub esp,7*4 - - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND + movsx ebx,bx + mov eax,1 shl 15 cdq - idiv ebx - ; mov .dx12,eax - push eax + idiv ebx + ; push eax + mov ebx,eax -if 0 ; Ext=SSE - movd mm0,.col1r ; 2 words r, g - pxor mm1,mm1 - punpcklwd mm0,mm1 - cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 - movd mm0,.col1g ; 2 words b, z - punpcklwd mm0,mm1 - cvtpi2ps xmm0,mm0 - ; xmm0=four float double words - divss xmm0,.pack3 - ;convert and insert mm0 to lower xmm1 .. -end if - mov ax,word .tex_x2 - sub ax,word .tex_x1 + mov ax,.x2 + sub ax,.x1 cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dx12r,eax - push eax + imul ebx + sar eax,15 - ROUND + push eax + ; mov .dx12,eax - mov ax,word .tex_y2 - sub ax,word .tex_y1 - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dx12,eax - push eax + sub esp,6*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + ; pshufd xmm0,xmm0,0 + movlhps xmm0,xmm0 + movq xmm1,.col1r + movq xmm2,.col2r + movhps xmm1,.tex_x1 + movhps xmm2,.tex_x2 + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + movhlps xmm4,xmm2 + movhlps xmm5,xmm3 + punpcklwd xmm2,xmm3 + punpcklwd xmm4,xmm5 + psrad xmm2,15 - ROUND + psrad xmm4,15 - ROUND + pshufd xmm2,xmm2,11000110b + movdqu .dc12b,xmm2 + ; punpcklwd xmm4,xmm5 + ; psrad xmm4,15 - ROUND + movq .tex_dx12,xmm4 - mov ax,word .z2 - sub ax,word .z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz12,eax - push eax ; .dza12 - - mov ax,word .col2r - sub ax,word .col1r - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc12r,eax - push eax - - mov ax,word .col2g - sub ax,word .col1g - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc12g,eax - push eax - - mov ax,word .col2b ;;--- - sub ax,word .col1b - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .dc12b,eax - push eax ;+++++++++++++++++ second zone +++++++++++++ .dx12_done: - mov bx,.y3 ; calc delta23 - sub bx,.y2 - jnz .dx23_make - mov ecx,7 + mov bx,.y3 ; calc delta23 + sub bx,.y2 + jnz .dx23_make + mov ecx,7 @@: - push dword 0 - loop @b - jmp .dx23_done + push dword 0 + loop @b + jmp .dx23_done .dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND + movsx ebx,bx + mov eax,1 shl 15 cdq - idiv ebx - ; mov .dx23,eax - push eax + idiv ebx + mov ebx,eax - mov ax,word .tex_x3 - sub ax,word .tex_x2 + + mov ax,.x3 + sub ax,.x2 cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dx23,eax - push eax + imul ebx + sar eax,15 - ROUND + push eax - mov ax,word .tex_y3 - sub ax,word .tex_y2 - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dy23,eax - push eax + sub esp,6*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movlhps xmm0,xmm0 + movq xmm1,.col2r + movq xmm2,.col3r + movhps xmm1,.tex_x2 + movhps xmm2,.tex_x3 + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + movhlps xmm4,xmm2 + movhlps xmm5,xmm3 + punpcklwd xmm2,xmm3 + punpcklwd xmm4,xmm5 + psrad xmm2,15 - ROUND + psrad xmm4,15 - ROUND + pshufd xmm2,xmm2,11000110b + movdqu .dc23b,xmm2 + movq .tex_dx23,xmm4 - mov ax,word .z3 - sub ax,word .z2 - cwde ; - shl eax,CATMULL_SHIFT ; 2222222 - cdq ; 2 2 - idiv ebx ; 2 -; mov .dz23,eax ; 2 - push eax ; .dza12 ; 2 - ; 2 - mov ax,word .col3r ; 2 - sub ax,word .col2r ; 2222222 - cwde ; second delta - shl eax,ROUND ; - cdq ; - idiv ebx ; -; mov .dc23r,eax ; - push eax - - mov ax,word .col3g - sub ax,word .col2g - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .dc23g,eax - push eax - - mov ax,word .col3b ;;--- - sub ax,word .col2b - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .dc23b,eax - push eax .dx23_done: ;++++++++++++++++++third zone++++++++++++++++++++++++ - mov bx,.y3 ; calc delta13 - sub bx,.y1 - jnz .dx13_make - mov ecx,7 + mov bx,.y3 ; calc delta13 + sub bx,.y1 + jnz .dx13_make + mov ecx,7 @@: - push dword 0 - loop @b - jmp .dx13_done + push dword 0 + loop @b + jmp .dx13_done .dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND + movsx ebx,bx + mov eax,1 shl 15 cdq - idiv ebx -; mov .dx13,eax - push eax + idiv ebx + mov ebx,eax - mov ax,word .tex_x3 ; triangle b - sub ax,word .tex_x1 + + mov ax,.x3 + sub ax,.x1 cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dx13r,eax - push eax - - mov ax,word .tex_y3 - sub ax,word .tex_y1 - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dy13,eax - push eax - - mov ax,word .z3 - sub ax,word .z1 ; 333333333 - cwde ; 3 3 - shl eax,CATMULL_SHIFT ; 3 - cdq ; 3 - idiv ebx ; 3 -; mov .dz13,eax ; 3 - push eax ; .dza12 ; 3 - ; 3 - mov ax,word .col3r ; 3333333333 - sub ax,word .col1r ; 3 - cwde ; 3 - shl eax,ROUND ; 3 - cdq ; 3 - idiv ebx ; 3 - ; mov .dc13r,eax ; 3 3 - push eax ; 33333333 - - mov ax,word .col3g - sub ax,word .col1g - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc13g,eax - push eax - - mov ax,word .col3b ;;--- - sub ax,word .col1b - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .dc13b,eax - push eax + imul ebx + sar eax,15 - ROUND + push eax + sub esp,6*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movlhps xmm0,xmm0 + movq xmm1,.col1r + movq xmm2,.col3r + movhps xmm1,.tex_x1 + movhps xmm2,.tex_x3 + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + movhlps xmm4,xmm2 + movhlps xmm5,xmm3 + punpcklwd xmm2,xmm3 + punpcklwd xmm4,xmm5 + psrad xmm2,15 - ROUND + psrad xmm4,15 - ROUND + pshufd xmm2,xmm2,11000110b + movdqu .dc13b,xmm2 + movq .tex_dx13,xmm4 .dx13_done: ; <<<<<<< ::delta zone end+++++++++++++++++++++ >>>>>>>> - sub esp,55 ;(12*4) - movsx eax,.x1 ; eax - cur x1 - shl eax,ROUND ; ebx - cur x2 - mov ebx,eax - movsx edx,word .z1 - shl edx,CATMULL_SHIFT - mov .zz1,edx - mov .zz2,edx + sub esp,(12*4) - movzx edi,word .col1r - shl edi,ROUND - mov .cur1r,edi - mov .cur2r,edi - movzx esi,word .col1g - shl esi,ROUND - mov .cur1g,esi - mov .cur2g,esi - movzx edx,word .col1b - shl edx,ROUND - mov .cur1b,edx - mov .cur2b,edx + movsx eax,.x1 ; eax - cur x1 + shl eax,ROUND ; ebx - cur x2 + mov ebx,eax - movzx edi,word .tex_x1 - shl edi,ROUND - mov .scan_x1,edi - mov .scan_x2,edi - movzx edx,word .tex_y1 - shl edx,ROUND - mov .scan_y1,edx - mov .scan_y2,edx - mov cx,.y1 - cmp cx,.y2 - jge .loop1_end + movzx edi,word .tex_x1 + shl edi,ROUND + mov .scan_x1,edi + mov .scan_x2,edi + ; push edi + ; push edi + movzx edx,word .tex_y1 + shl edx,ROUND + ; push edx + ; push edx + mov .scan_y1,edx + mov .scan_y2,edx + + movsx edx,word .z1 + shl edx,CATMULL_SHIFT + ; push edx + ; push edx + mov .zz1,edx + mov .zz2,edx + + movzx edi,word .col1r + shl edi,ROUND + mov .cur1r,edi + mov .cur2r,edi + movzx esi,word .col1g + shl esi,ROUND + mov .cur1g,esi + mov .cur2g,esi + movzx edx,word .col1b + shl edx,ROUND + mov .cur1b,edx + mov .cur2b,edx + + + mov cx,.y1 + cmp cx,.y2 + jge .loop1_end .loop_1: - ; push eax ebx ebp + pushad - push .tex_ptr - push .scr_buff - push .z_ptr - push cx + push .tex_ptr + push .scr_buff + push .z_ptr + push cx - push .zz2 + push dword .zz2 - push .scan_x2 - push dword .scan_y2 - push dword .cur2r - push .cur2g - push dword .cur2b + push dword .cur2b + push dword .cur2g + push dword .cur2r + push dword .scan_x2 + push dword .scan_y2 - push .zz1 + push .zz1 - push .scan_x1 - push dword .scan_y1 - push dword .cur1r - push .cur1g - push dword .cur1b + push dword .cur1b + push dword .cur1g + push dword .cur1r + push dword .scan_x1 + push dword .scan_y1 + + sar eax,ROUND + sar ebx,ROUND + call horizontal_tex_grd_line - sar eax,ROUND - sar ebx,ROUND - call horizontal_tex_grd_line - ; pop ebp ebx eax popad -if (Ext = MMX)|(Ext=SSE) - movq mm0,.cur1b - movq mm1,.cur1r - movq mm2,.scan_y1 - movq mm3,.cur2b - movq mm4,.cur2r - movq mm5,.scan_y2 - paddd mm0,.dc13b - paddd mm1,.dc13r - paddd mm2,.tex_dy13 - paddd mm3,.dc12b - paddd mm4,.dc12r - paddd mm5,.tex_dy12 - movq .cur1b,mm0 - movq .cur1r,mm1 - movq .scan_y1,mm2 - movq .cur2b,mm3 - movq .cur2r,mm4 - movq .scan_y2,mm5 -end if -if Ext >= SSE2 - movups xmm0,.cur1b - movups xmm1,.dc13b - movups xmm2,.cur2b - movups xmm3,.dc12b - movq mm2,.scan_y1 - movq mm5,.scan_y2 - paddd xmm0,xmm1 - paddd xmm2,xmm3 - paddd mm2,.tex_dy13 - paddd mm5,.tex_dy12 - movq .scan_y1,mm2 - movq .scan_y2,mm5 - movups .cur1b,xmm0 - movups .cur2b,xmm2 -end if -if Ext = NON - mov edx,.dc13b - add .cur1b,edx - mov esi,.dc13g - add .cur1g,esi - mov edi,.dc13r - add .cur1r,edi - mov edx,.dz13 - add .zz1,edx - mov edx,.tex_dx13 - add .scan_x1,edx - mov esi,.tex_dy13 - add .scan_y1,esi + movups xmm0,.cur1b + movups xmm1,.dc13b + movups xmm2,.cur2b + movups xmm3,.dc12b + movq mm2,.scan_x1 + movq mm5,.scan_x2 + paddd xmm0,xmm1 + paddd xmm2,xmm3 + paddd mm2,.tex_dx13 + paddd mm5,.tex_dx12 + movq .scan_x1,mm2 + movq .scan_x2,mm5 + movups .cur1b,xmm0 + movups .cur2b,xmm2 - mov edi,.dc12b - add .cur2b,edi - mov esi,.dc12g - add .cur2g,esi - mov edx,.dc12r - add .cur2r,edx - mov edi,.tex_dx12 - add .scan_x2,edi - mov esi,.tex_dy12 - add .scan_y2,esi - mov edx,.dz12 - add .zz2,edx -end if - add eax,.dx13 - add ebx,.dx12 - inc cx - cmp cx,.y2 + + add eax,.dx13 + add ebx,.dx12 + inc cx + cmp cx,.y2 jl .loop_1 .loop1_end: - movzx ecx,.y2 - cmp cx,.y3 - jge .loop2_end + movzx ecx,.y2 + cmp cx,.y3 + jge .loop2_end - movsx ebx,.x2 ; eax - cur x1 - shl ebx,ROUND ; ebx - cur x2 + movsx ebx,.x2 ; eax - cur x1 + shl ebx,ROUND ; ebx - cur x2 - movsx edx,word .z2 - shl edx,CATMULL_SHIFT + movsx edx,word .z2 + shl edx,CATMULL_SHIFT ; mov .zz1,edx - mov .zz2,edx + mov .zz2,edx - movzx edi,word .col2r - shl edi,ROUND + movzx edi,word .col2r + shl edi,ROUND ; mov .cur1r,edi - mov .cur2r,edi - movzx esi,word .col2g - shl esi,ROUND + mov .cur2r,edi + movzx esi,word .col2g + shl esi,ROUND ; mov .cur1g,esi - mov .cur2g,esi - movzx edx,word .col2b - shl edx,ROUND + mov .cur2g,esi + movzx edx,word .col2b + shl edx,ROUND ; mov .cur1b,edx - mov .cur2b,edx + mov .cur2b,edx - movzx edi,word .tex_x2 - shl edi,ROUND + movzx edi,word .tex_x2 + shl edi,ROUND ; mov .scan_x1,edi - mov .scan_x2,edi - movzx edx,word .tex_y2 - shl edx,ROUND + mov .scan_x2,edi + movzx edx,word .tex_y2 + shl edx,ROUND ; mov .scan_y1,edx - mov .scan_y2,edx + mov .scan_y2,edx .loop_2: pushad - push .tex_ptr - push .scr_buff - push .z_ptr - push cx + push .tex_ptr + push .scr_buff + push .z_ptr + push cx - push .zz2 + push dword .zz2 - push .scan_x2 - push dword .scan_y2 - push dword .cur2r - push .cur2g - push dword .cur2b + push dword .cur2b + push dword .cur2g + push dword .cur2r + push dword .scan_x2 + push dword .scan_y2 - push .zz1 + push .zz1 - push .scan_x1 - push dword .scan_y1 - push dword .cur1r - push .cur1g - push dword .cur1b + push dword .cur1b + push dword .cur1g + push dword .cur1r + push dword .scan_x1 + push dword .scan_y1 - sar eax,ROUND - sar ebx,ROUND - call horizontal_tex_grd_line + sar eax,ROUND + sar ebx,ROUND + call horizontal_tex_grd_line popad -if (Ext = MMX)|(Ext=SSE) - movq mm0,.cur1b - movq mm1,.cur1r - movq mm2,.scan_y1 - movq mm3,.cur2b - movq mm4,.cur2r - movq mm5,.scan_y2 - paddd mm0,.dc13b - paddd mm1,.dc13r - paddd mm2,.tex_dy13 - paddd mm3,.dc23b - paddd mm4,.dc23r - paddd mm5,.tex_dy23 - movq .cur1b,mm0 - movq .cur1r,mm1 - movq .scan_y1,mm2 - movq .cur2b,mm3 - movq .cur2r,mm4 - movq .scan_y2,mm5 -end if -if Ext >= SSE2 - movups xmm0,.cur1b - movups xmm1,.dc13b - movups xmm2,.cur2b - movups xmm3,.dc23b - movq mm2,.scan_y1 - movq mm5,.scan_y2 - paddd xmm0,xmm1 - paddd xmm2,xmm3 - paddd mm2,.tex_dy13 - paddd mm5,.tex_dy23 - movq .scan_y1,mm2 - movq .scan_y2,mm5 - movups .cur1b,xmm0 - movups .cur2b,xmm2 -end if -if Ext = NON - mov edx,.dc13b - add .cur1b,edx - mov esi,.dc13g - add .cur1g,esi - mov edi,.dc13r - add .cur1r,edi - mov edx,.tex_dx13 - add .scan_x1,edx - mov esi,.tex_dy13 - add .scan_y1,esi - mov edx,.dz13 - add .zz1,edx - mov edi,.dc23b - add .cur2b,edi - mov esi,.dc23g - add .cur2g,esi - mov edx,.dc23r - add .cur2r,edx - mov edi,.tex_dx23 - add .scan_x2,edi - mov esi,.tex_dy23 - add .scan_y2,esi - mov edx,.dz23 - add .zz2,edx -end if - add eax,.dx13 - add ebx,.dx23 - inc cx - cmp cx,.y3 - jl .loop_2 + movups xmm0,.cur1b + movups xmm1,.dc13b + movups xmm2,.cur2b + movups xmm3,.dc23b + movq mm2,.scan_x1 + movq mm5,.scan_x2 + paddd xmm0,xmm1 + paddd xmm2,xmm3 + paddd mm2,.tex_dx13 + paddd mm5,.tex_dx23 + movq .scan_x1,mm2 + movq .scan_x2,mm5 + movups .cur1b,xmm0 + movups .cur2b,xmm2 + + add eax,.dx13 + add ebx,.dx23 + inc cx + cmp cx,.y3 + jl .loop_2 .loop2_end: - mov esp,ebp + mov esp,ebp ret 36 horizontal_tex_grd_line: ;in: @@ -699,318 +525,254 @@ horizontal_tex_grd_line: .tex_ptr equ [ebp+62] .screen equ [ebp+58] .z_buffer equ [ebp+54] -.y equ [ebp+52] +.y equ [ebp+52] -.z2 equ [ebp+48] -.tex_x2 equ [ebp+44] -.tex_y2 equ [ebp+40] -.r2 equ [ebp+36] -.g2 equ [ebp+32] -.b2 equ [ebp+28] +.z2 equ [ebp+48] +.b2 equ [ebp+44] +.g2 equ [ebp+40] +.r2 equ [ebp+36] +.tex_x2 equ [ebp+32] +.tex_y2 equ [ebp+28] + + +.z1 equ [ebp+24] +.b1 equ [ebp+20] +.g1 equ [ebp+16] +.r1 equ [ebp+12] +.tex_x1 equ [ebp+8] +.tex_y1 equ [ebp+4] -.z1 equ [ebp+24] -.tex_x1 equ [ebp+20] -.tex_y1 equ [ebp+16] -.r1 equ [ebp+12] -.g1 equ [ebp+8] -.b1 equ [ebp+4] .x1 equ word[ebp-2] .x2 equ word[ebp-4] .dz equ dword[ebp-8] -.db equ dword[ebp-12] +.db equ [ebp-12] .dg equ dword[ebp-16] -.dr equ dword[ebp-20] +.dr equ [ebp-20] .dtex_x equ dword[ebp-24] -.dtex_y equ dword[ebp-28] +.dtex_y equ [ebp-28] -.c_ty equ [ebp-32] -.c_tx equ [ebp-36] -.cb equ [ebp-40] -.cg equ [ebp-44] -.cr equ [ebp-48] -.t_col equ [ebp-52] + mov ebp,esp -.dtex_yM equ qword[ebp-28] -.drM equ qword[ebp-20] -.dbM equ qword[ebp-12] - mov ebp,esp - ; sub esp,30 + mov cx,word .y + or cx,cx + jl .quit_l - mov cx,word .y - or cx,cx - jl .quit_l + cmp cx,word[size_y_var] ;SIZE_Y + jge .quit_l - cmp cx,word[size_y_var] ;SIZE_Y - jge .quit_l + cmp ax,bx + je .quit_l + jl @f - cmp ax,bx - je .quit_l - jl @f + xchg eax,ebx - xchg eax,ebx -if Ext=NON - mov ecx,dword .r1 - xchg ecx, .r2 - mov dword .r1, ecx + movdqu xmm0,.tex_y1 + movdqu xmm1,.tex_y2 + movdqu .tex_y1,xmm1 + movdqu .tex_y2,xmm0 + movq xmm4,.b1 ; x, z + movq xmm5,.b2 + movq .b1,xmm5 + movq .b2,xmm4 - mov ecx,dword .g1 - xchg ecx, .g2 - mov dword .g1, ecx - - mov ecx,dword .b1 - xchg ecx, .b2 - mov dword .b1, ecx - - mov ecx,dword .tex_x1 - xchg ecx, .tex_x2 - mov dword .tex_x1, ecx - - mov ecx,dword .tex_y1 - xchg ecx, .tex_y2 - mov dword .tex_y1, ecx - - mov ecx,dword .z1 - xchg ecx, .z2 - mov dword .z1, ecx -end if -if (Ext=MMX) - movq mm0,.b1 ; b, g - movq mm1,.b2 - movq .b1, mm1 - movq .b2, mm0 - movq mm2,.r1 ; r, y - movq mm3,.r2 - movq .r1,mm3 - movq .r2,mm2 - movq mm4,.tex_x1 ; x, z - movq mm5,.tex_x2 - movq .tex_x1,mm5 - movq .tex_x2,mm4 - -end if -if Ext>=SSE - movups xmm0,.b1 - movups xmm1,.b2 - movups .b1,xmm1 - movups .b2,xmm0 - movq mm4,.tex_x1 ; x, z - movq mm5,.tex_x2 - movq .tex_x1,mm5 - movq .tex_x2,mm4 -end if @@: - or bx,bx - jle .quit_l - cmp ax,word[size_x_var] ;SIZE_X - jge .quit_l + or bx,bx + jle .quit_l + cmp ax,word[size_x_var] ;SIZE_X + jge .quit_l - push ax - push bx + push ax + push bx +if 1 + mov bx,.x2 + sub bx,.x1 - mov eax,.z2 ; delta zone************ - sub eax,.z1 - cdq - mov bx,.x2 - sub bx,.x1 - movsx ebx,bx - idiv ebx - push eax ; .dz + movsx ebx,bx + mov eax,1 shl 15 + cdq + idiv ebx + mov ebx,eax - mov eax,.b2 - sub eax,.b1 - cdq - idiv ebx - push eax ; .db - mov eax,.g2 - sub eax,.g1 - cdq - idiv ebx - push eax ; .dg + mov eax,.z2 ; delta zone************ + sub eax,.z1 + imul ebx + sar eax,15 + push eax ; .dz - mov eax,.r2 - sub eax,.r1 - cdq - idiv ebx - push eax ; .dr + mov eax,.b2 + sub eax,.b1 + imul ebx + sar eax,15 + push eax - mov eax,.tex_x2 - sub eax,.tex_x1 - cdq - idiv ebx - push eax ; .dtex_x + mov eax,.g2 + sub eax,.g1 + imul ebx + sar eax,15 + push eax ; .dz - mov eax,.tex_y2 - sub eax,.tex_y1 - cdq - idiv ebx - push eax ; .dtey_x + mov eax,.r2 + sub eax,.r1 + imul ebx + sar eax,15 + push eax - cmp .x1,0 - jg @f + mov eax,.tex_x2 + sub eax,.tex_x1 + imul ebx + sar eax,15 + push eax - mov eax,.dz ; clipping - movsx ebx,.x1 - neg ebx - imul ebx - add .z1,eax - mov .x1,0 + mov eax,.tex_y2 + sub eax,.tex_y1 + imul ebx + sar eax,15 + push eax - mov eax,.dr - imul ebx - add .r1,eax + +end if +if 0 + sub esp,6*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movlhps xmm0,xmm0 + movdqu xmm1,.tex_y1 + movdqu xmm2,.tex_y2 + movq xmm3,.b1 + movq xmm4,.b2 + psubd xmm4,xmm3 + psubd xmm2,xmm1 + packssdw xmm2,xmm4 + ; packlssdw xmm2,xmm2 + ; movlhps xmm2,xmm4 + + + ; psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + movhlps xmm4,xmm2 + movhlps xmm5,xmm3 + punpcklwd xmm2,xmm3 + punpcklwd xmm4,xmm5 + psrad xmm2,15 - ROUND + psrad xmm4,15 - ROUND + ; pshufd xmm2,xmm2,11000110b + movdqu .dtex_y,xmm2 + movq .db,xmm4 + + +end if + + cmp .x1,0 + jg @f + + mov eax,.dz ; clipping + movsx ebx,.x1 + neg ebx + imul ebx + add .z1,eax + mov .x1,0 + + mov eax,.dr + imul ebx + add .r1,eax ;if Ext=NON - mov eax,.dg - imul ebx - add .g1,eax + mov eax,.dg + imul ebx + add .g1,eax - mov eax,.db - imul ebx - add .b1,eax + mov eax,.db + imul ebx + add .b1,eax - mov eax,.dtex_x - imul ebx - add .tex_x1,eax + mov eax,.dtex_x + imul ebx + add .tex_x1,eax - mov eax,.dtex_y - imul ebx - add .tex_y1,eax + mov eax,.dtex_y + imul ebx + add .tex_y1,eax @@: - movsx edx,word[size_x_var] ;SIZE_X - cmp .x2,dx - jl @f - mov .x2,dx + movsx edx,word[size_x_var] ;SIZE_X + cmp .x2,dx + jl @f + mov .x2,dx @@: ; calc line addres begin in screen and Z buffer - movsx eax,word .y - mul edx - movsx edx,.x1 - add eax,edx + movsx eax,word .y + mul edx + movsx edx,.x1 + add eax,edx - mov esi,eax - shl esi,2 - add esi,.z_buffer + mov esi,eax + shl esi,2 + add esi,.z_buffer - lea eax,[eax*3] - mov edi,.screen - add edi,eax + lea eax,[eax*3] + mov edi,.screen + add edi,eax - mov cx,.x2 - sub cx,.x1 - movzx ecx,cx + mov cx,.x2 + sub cx,.x1 + movzx ecx,cx ; init current variables - push dword .tex_y1 -;if Ext=NON - push dword .tex_x1 + movdqu xmm0,.r1 + movdqu xmm1,.dr + pxor xmm2,xmm2 + movq xmm4,.dtex_y + movq xmm5,.tex_y1 - push dword .b1 - push dword .g1 - push dword .r1 - -if Ext>=MMX - movq mm4,.cr ; lo -> r,g - movq mm6,.cb ; hi -> b, tex_x - pxor mm0,mm0 -end if - mov ebx,.z1 + mov ebx,.z1 .ddraw: - cmp ebx,dword[esi] - jge @f - mov eax,.c_ty -; if ROUNDTEX_SHIFT -; shr eax,ROUND-TEX_SHIFT -; end if - shr eax,ROUND - shl Eax,TEX_SHIFT - mov edx,.c_tx ; calc texture pixel mem addres - shr edx,ROUND - add eax,edx - and eax,TEXTURE_SIZE ; cutting - lea eax,[3*eax] - add eax,.tex_ptr - mov dword[esi],ebx -if Ext = NON - mov eax,dword[eax] - ; mov .tex_col,eax - push ax - shl eax,8 - pop ax - mov edx,.cr - sar edx,ROUND - mul dl ; al*dl - shr ax,8 - stosb - ror eax,16 - push ax - mov edx,.cg - sar edx,ROUND - mul dl - shr ax,8 - stosb - pop ax - shr ax,8 - mov edx,.cb - sar edx,ROUND - mul dl - shr ax,8 - stosb - jmp .no_skip -else - movd mm1,[eax] - punpcklbw mm1,mm0 - movq mm3,mm4 ;.cr ; lo -> r,g - movq mm5,mm6 ;.cb ; lo -> b,tex_x - psrld mm3,ROUND ; - psrld mm5,ROUND ; - packssdw mm3,mm5 - pmullw mm1,mm3 - psrlw mm1,8 - packuswb mm1,mm0 - movd [edi],mm1 -end if - mov dword[esi],ebx -if Ext = NON - jmp .no_skip -end if + cmp ebx,dword[esi] + jge @f + movdqa xmm6,xmm5 + psrld xmm6,ROUND + movd eax,xmm6 + psrldq xmm6,4 + movd edx,xmm6 + shl eax,TEX_SHIFT + + ; calc texture pixel mem addres + + add eax,edx + and eax,TEXTURE_SIZE ; cutting + lea eax,[3*eax] + add eax,.tex_ptr + mov dword[esi],ebx + + movd xmm7,[eax] + punpcklbw xmm7,xmm2 + movdqa xmm3,xmm0 ; calc col + psrld xmm3,ROUND ; + packssdw xmm3,xmm3 + pmullw xmm7,xmm3 + psrlw xmm7,8 + packuswb xmm7,xmm7 + movd [edi],xmm7 + + mov dword[esi],ebx @@: - add edi,3 - .no_skip: - add esi,4 - add ebx,.dz + add edi,3 + add esi,4 + add ebx,.dz + paddd xmm5,xmm4 + paddd xmm0,xmm1 - mov eax,.dtex_x - add .c_tx, eax - mov edx,.dtex_y - add .c_ty, edx -if Ext=NON - mov eax,.dr - add .cr,eax - mov edx,.dg - add .cg,edx - mov eax,.db - add .cb,eax - -else - paddd mm4,.drM - paddd mm6,.dbM -;; paddd mm7,.dtex_y ; mm4 - b, g -;; movq .c_tx,mm7 - ; mm6 - r, x -end if ; mm7 - y, x - - dec ecx - jnz .ddraw + loop .ddraw .quit_l: - mov esp,ebp + mov esp,ebp ret 42+20 ; horizontal line +;the_zero: +;size_y_var: +;size_x_var: diff --git a/programs/demos/view3ds/history.txt b/programs/demos/view3ds/history.txt index 8fe503636c..a6c8faef2b 100644 --- a/programs/demos/view3ds/history.txt +++ b/programs/demos/view3ds/history.txt @@ -1,3 +1,18 @@ +View3ds 0.075 - XII 2021 +1. Cusom rotate using keys and mouse scroll support by Leency. +---------------------------------------------------------------------------------- + +View3ds 0.074 - IX 2021 +1. Fixed emboss bug in grd lines displaying model. +2. Grd line exceedes screen problem fix. +3. New rendering model - ray casted shadows and appropiate button to + set 'on' this option. Note that is non real time model, especially when + complex object is computed. I took effort to introduce accelerating + structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled + + for now - seems to work incorrect(slow). +---------------------------------------------------------------------------------- + View3ds 0.073 - may 2021 1. I introduced procedure for searching nonredundand edges. 2. Writing some info about object: vertices, triangles unique edges diff --git a/programs/demos/view3ds/readme.txt b/programs/demos/view3ds/readme.txt index 07d9339f2f..e680574552 100644 --- a/programs/demos/view3ds/readme.txt +++ b/programs/demos/view3ds/readme.txt @@ -1,18 +1,33 @@ -View3ds 0.074 - tiny viewer to .3ds and .asc files with several graphics +View3ds 0.076 - tiny viewer to .3ds and .asc files with several graphics effects implementation. What's new? -1. Fixed emboss bug in grd lines displaying model. -2. Grd line exceedes screen problem fix. -3. New rendering model - ray casted shadows and appropiate button to - set 'on' this option. Note that is non real time model, especially when - complex object is computed. I took effort to introduce accelerating - structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled - for now - seems to work incorrect(slow). +1. Detecting manifold chunks procedure based on kind of sorted pivot + table. Chunks are counted and this number displayed. +2. New calculating normal vectors proc that use some data produced + by new chunks routine. Now big object loading is fast. I load object that + contains ~500000 vertices, ~700000 faces and ~2000 0000 unique edges + in few seconds on i5 2cond gen. Earlier such objects calculating was + rather above time limits. +3. On http://board.flatassembler.net occasionaly there are some disccusions + about optimizing. Some clever people, wich skills and competence I trust, + claims - for CPU's manufactured last ~15 years size of code is crucial + for speed. (Better utilize CPU cache). + So I wrote some 'movsd' mnemonics instead 'mov [edi],sth'; 'loop' instead + 'dec ecx,jnz sth'. Moreover I come back to init some local varibles + by 'push' (flat_cat.inc). I took effort to change divisions to + multiplications two_tex.inc (works ok in fpu only Ext = NON mode and + of course in Ext = SSE3 mode), grd_tex.inc (single line not parallel + muls, whole drawing routine 4 divs instead 27 divisions), + bump_tex.inc - 3 divs in SSE2 mode.s See sources for details. +4. Editor button allows now editing by vertex all above 65535 vert objects. + + Buttons description: -1. rotary: choosing rotary axle: x, y, x+y. +1. rotary: choosing rotary axle: x, y, x+y, keys - for object translate + using keyboard. . 2. shd. model: choosing shading model: flat, grd (smooth), env (spherical environment mapping, bump (bump mapping), tex (texture mapping), pos (position shading depend), dots (app draws only points - nodes of object), @@ -46,4 +61,4 @@ Buttons description: decrease whole handlers count by enable culling (using appropriate button) - some back handlers become hidden. - Maciej Guba IX 2021 + Maciej Guba XII 2021 diff --git a/programs/demos/view3ds/tex_cat.inc b/programs/demos/view3ds/tex_cat.inc index ecc6615f8a..500e98405a 100644 --- a/programs/demos/view3ds/tex_cat.inc +++ b/programs/demos/view3ds/tex_cat.inc @@ -24,86 +24,86 @@ tex_triangle_z: .tex_y2 equ ebp+10 .tex_x3 equ ebp+12 .tex_y3 equ ebp+14 -.z1 equ word[ebp+16] -.z2 equ word[ebp+18] -.z3 equ word[ebp+20] +.z1 equ word[ebp+16] +.z2 equ word[ebp+18] +.z3 equ word[ebp+20] -.tex_ptr equ dword[ebp-4] ; pointer to texture -.z_ptr equ dword[ebp-8] ; pointer to z-buffer -.x1 equ word[ebp-10] -.y1 equ word[ebp-12] -.x2 equ word[ebp-14] -.y2 equ word[ebp-16] -.x3 equ word[ebp-18] -.y3 equ word[ebp-20] +.tex_ptr equ dword[ebp-4] ; pointer to texture +.z_ptr equ dword[ebp-8] ; pointer to z-buffer +.x1 equ word[ebp-10] +.y1 equ word[ebp-12] +.x2 equ word[ebp-14] +.y2 equ word[ebp-16] +.x3 equ word[ebp-18] +.y3 equ word[ebp-20] -.dx12 equ dword[ebp-24] +.dx12 equ dword[ebp-24] .tex_dx12 equ dword[ebp-28] .tex_dy12 equ dword[ebp-32] -.dz12 equ dword[ebp-36] +.dz12 equ dword[ebp-36] -.dx13 equ dword[ebp-40] +.dx13 equ dword[ebp-40] .tex_dx13 equ dword[ebp-44] .tex_dy13 equ dword[ebp-48] -.dz13 equ dword[ebp-52] +.dz13 equ dword[ebp-52] -.dx23 equ dword[ebp-56] +.dx23 equ dword[ebp-56] .tex_dx23 equ dword[ebp-60] .tex_dy23 equ dword[ebp-64] -.dz23 equ dword[ebp-68] +.dz23 equ dword[ebp-68] .scan_x1 equ dword[ebp-72] .scan_x2 equ dword[ebp-76] .scan_y1 equ dword[ebp-80] .scan_y2 equ dword[ebp-84] -.cz1 equ dword[ebp-88] -.cz2 equ dword[ebp-92] +.cz1 equ dword[ebp-88] +.cz2 equ dword[ebp-92] - mov ebp,esp - push esi ; store memory pointers - push edx + mov ebp,esp + push esi ; store memory pointers + push edx .tt_sort3: - cmp ax,bx ;sort all parameters - jle .tt_sort1 - xchg eax,ebx - mov edx,dword [.tex_x1] - xchg edx,dword [.tex_x2] - mov dword[.tex_x1],edx - mov dx,.z1 - xchg dx,.z2 - mov .z1,dx + cmp ax,bx ;sort all parameters + jle .tt_sort1 + xchg eax,ebx + mov edx,dword [.tex_x1] + xchg edx,dword [.tex_x2] + mov dword[.tex_x1],edx + mov dx,.z1 + xchg dx,.z2 + mov .z1,dx .tt_sort1: - cmp bx,cx - jle .tt_sort2 - xchg ebx,ecx - mov edx,dword [.tex_x2] - xchg edx,dword [.tex_x3] - mov dword [.tex_x2],edx - mov dx,.z2 - xchg dx,.z3 - mov .z2,dx - jmp .tt_sort3 + cmp bx,cx + jle .tt_sort2 + xchg ebx,ecx + mov edx,dword [.tex_x2] + xchg edx,dword [.tex_x3] + mov dword [.tex_x2],edx + mov dx,.z2 + xchg dx,.z3 + mov .z2,dx + jmp .tt_sort3 .tt_sort2: - push eax ; and store to user friendly variables - push ebx - push ecx + push eax ; and store to user friendly variables + push ebx + push ecx - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .tt_loop2_end + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .tt_loop2_end ; cmp ax,SIZE_Y ; jl @f ; cmp bx,SIZE_Y ; jl @f ; cmp cx,SIZE_Y ; jl @f - ror eax,16 - ror ebx,16 - ror ecx,16 + ror eax,16 + ror ebx,16 + ror ecx,16 ; cmp ax,SIZE_X ; jl @f ; cmp bx,SIZE_X @@ -112,301 +112,301 @@ tex_triangle_z: ; jl @f ; jmp .tt_loop2_end @@: - mov eax,dword[.tex_x1] ; texture coords must be in [0..TEX_X(Y)] - mov ebx,dword[.tex_x2] - mov ecx,dword[.tex_x3] - mov edx,eax - or edx,ebx - or edx,ecx - test edx,80008000h - jne .tt_loop2_end - cmp ax,TEX_X - jge .tt_loop2_end - cmp bx,TEX_X - jge .tt_loop2_end - cmp cx,TEX_X - jge .tt_loop2_end - ror eax,16 - ror ebx,16 - ror ecx,16 - cmp ax,TEX_Y - jge .tt_loop2_end - cmp bx,TEX_Y - jge .tt_loop2_end - cmp cx,TEX_Y - jge .tt_loop2_end + mov eax,dword[.tex_x1] ; texture coords must be in [0..TEX_X(Y)] + mov ebx,dword[.tex_x2] + mov ecx,dword[.tex_x3] + mov edx,eax + or edx,ebx + or edx,ecx + test edx,80008000h + jne .tt_loop2_end + cmp ax,TEX_X + jge .tt_loop2_end + cmp bx,TEX_X + jge .tt_loop2_end + cmp cx,TEX_X + jge .tt_loop2_end + ror eax,16 + ror ebx,16 + ror ecx,16 + cmp ax,TEX_Y + jge .tt_loop2_end + cmp bx,TEX_Y + jge .tt_loop2_end + cmp cx,TEX_Y + jge .tt_loop2_end - movsx ebx,.y2 ; calc delta - sub bx,.y1 - jnz .tt_dx12_make - xor edx,edx - mov ecx,4 + movsx ebx,.y2 ; calc delta + sub bx,.y1 + jnz .tt_dx12_make + xor edx,edx + mov ecx,4 @@: - push edx - loop @b - jmp .tt_dx12_done + push edx + loop @b + jmp .tt_dx12_done .tt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - shl eax,ROUND - cdq - idiv ebx + mov ax,.x2 + sub ax,.x1 + cwde + shl eax,ROUND + cdq + idiv ebx ; mov .dx12,eax ; dx12 = (x2-x1)/(y2-y1) - push eax + push eax - mov ax,word[.tex_x2] - sub ax,word[.tex_x1] - cwde - shl eax,ROUND - cdq - idiv ebx + mov ax,word[.tex_x2] + sub ax,word[.tex_x1] + cwde + shl eax,ROUND + cdq + idiv ebx ; mov [.tex_dx12],eax ; tex_dx12 = (tex_x2-tex_x1)/(y2-y1) - push eax + push eax - mov ax,word[.tex_y2] - sub ax,word[.tex_y1] - cwde - shl eax,ROUND - cdq - idiv ebx + mov ax,word[.tex_y2] + sub ax,word[.tex_y1] + cwde + shl eax,ROUND + cdq + idiv ebx ; mov [.tex_dy12],eax ; tex_dy12 = (tex_y2-tex_y1)/(y2-y1) - push eax + push eax - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax + mov ax,.z2 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax .tt_dx12_done: - movsx ebx,.y3 ; calc delta - sub bx,.y1 - jnz .tt_dx13_make - xor edx,edx - mov ecx,4 + movsx ebx,.y3 ; calc delta + sub bx,.y1 + jnz .tt_dx13_make + xor edx,edx + mov ecx,4 @@: - push edx - loop @b - jmp .tt_dx13_done + push edx + loop @b + jmp .tt_dx13_done .tt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - shl eax,ROUND - cdq - idiv ebx + mov ax,.x3 + sub ax,.x1 + cwde + shl eax,ROUND + cdq + idiv ebx ; mov .dx12,eax ; dx13 = (x3-x1)/(y3-y1) - push eax + push eax - mov ax,word[.tex_x3] - sub ax,word[.tex_x1] - cwde - shl eax,ROUND - cdq - idiv ebx + mov ax,word[.tex_x3] + sub ax,word[.tex_x1] + cwde + shl eax,ROUND + cdq + idiv ebx ; mov [.tex_dx12],eax ; tex_dx13 = (tex_x3-tex_x1)/(y3-y1) - push eax + push eax - mov ax,word[.tex_y3] - sub ax,word[.tex_y1] - cwde - shl eax,ROUND - cdq - idiv ebx + mov ax,word[.tex_y3] + sub ax,word[.tex_y1] + cwde + shl eax,ROUND + cdq + idiv ebx ; mov [.tex_dy12],eax ; tex_dy13 = (tex_y3-tex_y1)/(y3-y1) - push eax + push eax - mov ax,.z3 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax + mov ax,.z3 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax .tt_dx13_done: - mov bx,.y3 ; calc delta - sub bx,.y2 - jnz .tt_dx23_make - xor edx,edx - mov ecx,4 + mov bx,.y3 ; calc delta + sub bx,.y2 + jnz .tt_dx23_make + xor edx,edx + mov ecx,4 @@: - push edx - loop @b - jmp .tt_dx23_done + push edx + loop @b + jmp .tt_dx23_done .tt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - shl eax,ROUND - cdq - movzx ebx,bx - idiv ebx + mov ax,.x3 + sub ax,.x2 + cwde + shl eax,ROUND + cdq + movzx ebx,bx + idiv ebx ; mov .dx23,eax ; dx23 = (x3-x2)/(y3-y2) - push eax + push eax - mov ax,word[.tex_x3] - sub ax,word[.tex_x2] - cwde - shl eax,ROUND - cdq - idiv ebx + mov ax,word[.tex_x3] + sub ax,word[.tex_x2] + cwde + shl eax,ROUND + cdq + idiv ebx ; mov [.tex_dx23],eax ; tex_dx23 = (tex_x3-tex_x2)/(y3-y2) - push eax + push eax - mov ax,word[.tex_y3] - sub ax,word[.tex_y2] - cwde - shl eax,ROUND - cdq - idiv ebx + mov ax,word[.tex_y3] + sub ax,word[.tex_y2] + cwde + shl eax,ROUND + cdq + idiv ebx ; mov [.tex_dy23],eax ; tex_dy23 = (tex_y3-tex_y2)/(y3-y2) - push eax + push eax - mov ax,.z3 - sub ax,.z2 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax + mov ax,.z3 + sub ax,.z2 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax .tt_dx23_done: - movsx eax,.x1 ;eax - cur x1 - shl eax,ROUND ;ebx - cur x2 - mov ebx,eax + movsx eax,.x1 ;eax - cur x1 + shl eax,ROUND ;ebx - cur x2 + mov ebx,eax - movsx edx, word[.tex_x1] - shl edx,ROUND + movsx edx, word[.tex_x1] + shl edx,ROUND ; mov [.scan_x1],edx ; mov [.scan_x2],edx - push edx - push edx - movsx edx, word[.tex_y1] - shl edx,ROUND + push edx + push edx + movsx edx, word[.tex_y1] + shl edx,ROUND ; mov [.scan_y1],edx ; mov [.scan_y2],edx - push edx - push edx - movsx edx,.z1 - shl edx,CATMULL_SHIFT - push edx - push edx - mov cx,.y1 - cmp cx,.y2 - jge .tt_loop1_end + push edx + push edx + movsx edx,.z1 + shl edx,CATMULL_SHIFT + push edx + push edx + mov cx,.y1 + cmp cx,.y2 + jge .tt_loop1_end .tt_loop1: - pushad + pushad - push .z_ptr - push .cz1 ; z coords shifted shl catmull_shift - push .cz2 - push .scan_y2 - push .scan_x2 - push .scan_y1 - push .scan_x1 - push esi ;[.tex_ptr] + push .z_ptr + push .cz1 ; z coords shifted shl catmull_shift + push .cz2 + push .scan_y2 + push .scan_x2 + push .scan_y1 + push .scan_x1 + push esi ;[.tex_ptr] - push cx - sar ebx,ROUND - push bx - sar eax,ROUND - push ax - call textured_line_z + push cx + sar ebx,ROUND + push bx + sar eax,ROUND + push ax + call textured_line_z - popad - mov edx,.dz13 - add .cz1,edx - mov edx,.dz12 - add .cz2,edx + popad + mov edx,.dz13 + add .cz1,edx + mov edx,.dz12 + add .cz2,edx - mov edx, .tex_dx13 - add .scan_x1, edx - mov edx, .tex_dx12 - add .scan_x2, edx - mov edx, .tex_dy13 - add .scan_y1, edx - mov edx, .tex_dy12 - add .scan_y2, edx + mov edx, .tex_dx13 + add .scan_x1, edx + mov edx, .tex_dx12 + add .scan_x2, edx + mov edx, .tex_dy13 + add .scan_y1, edx + mov edx, .tex_dy12 + add .scan_y2, edx - add eax, .dx13 - add ebx, .dx12 - inc cx - cmp cx,.y2 - jl .tt_loop1 + add eax, .dx13 + add ebx, .dx12 + inc cx + cmp cx,.y2 + jl .tt_loop1 .tt_loop1_end: - mov cx,.y2 - cmp cx,.y3 - jge .tt_loop2_end + mov cx,.y2 + cmp cx,.y3 + jge .tt_loop2_end - movsx ebx,.x2 - shl ebx,ROUND - movsx edx,.z2 - shl edx,CATMULL_SHIFT - mov .cz2,edx - movzx edx, word [.tex_x2] - shl edx,ROUND - mov .scan_x2,edx - movzx edx, word[.tex_y2] - shl edx,ROUND - mov .scan_y2,edx + movsx ebx,.x2 + shl ebx,ROUND + movsx edx,.z2 + shl edx,CATMULL_SHIFT + mov .cz2,edx + movzx edx, word [.tex_x2] + shl edx,ROUND + mov .scan_x2,edx + movzx edx, word[.tex_y2] + shl edx,ROUND + mov .scan_y2,edx .tt_loop2: - pushad + pushad - push .z_ptr - push .cz1 ; z coords shifted shl catmull_shift - push .cz2 + push .z_ptr + push .cz1 ; z coords shifted shl catmull_shift + push .cz2 - push .scan_y2 - push .scan_x2 - push .scan_y1 - push .scan_x1 - push esi ;[.tex_ptr] + push .scan_y2 + push .scan_x2 + push .scan_y1 + push .scan_x1 + push esi ;[.tex_ptr] - push cx - sar ebx,ROUND - push bx - sar eax,ROUND - push ax - call textured_line_z + push cx + sar ebx,ROUND + push bx + sar eax,ROUND + push ax + call textured_line_z - popad + popad - mov edx,.dz13 - add .cz1,edx - mov edx,.dz23 - add .cz2,edx + mov edx,.dz13 + add .cz1,edx + mov edx,.dz23 + add .cz2,edx - mov edx, .tex_dx13 - add .scan_x1, edx - mov edx, .tex_dx23 - add .scan_x2, edx - mov edx, .tex_dy13 - add .scan_y1, edx - mov edx, .tex_dy23 - add .scan_y2, edx + mov edx, .tex_dx13 + add .scan_x1, edx + mov edx, .tex_dx23 + add .scan_x2, edx + mov edx, .tex_dy13 + add .scan_y1, edx + mov edx, .tex_dy23 + add .scan_y2, edx - add eax, .dx13 - add ebx, .dx23 - inc cx - cmp cx,.y3 - jl .tt_loop2 + add eax, .dx13 + add ebx, .dx23 + inc cx + cmp cx,.y3 + jl .tt_loop2 .tt_loop2_end: .tt_end: - mov esp,ebp + mov esp,ebp ret 18 textured_line_z: @@ -421,126 +421,130 @@ textured_line_z: .tex_y1 equ ebp+18 .tex_x2 equ ebp+22 .tex_y2 equ ebp+26 - .z2 equ dword [ebp+30] ;z1, z2 coords shifted shl CATMULL_SHIFT - .z1 equ dword [ebp+34] + .z2 equ dword [ebp+30] ;z1, z2 coords shifted shl CATMULL_SHIFT + .z1 equ dword [ebp+34] .z_ptr equ dword [ebp+38] .tex_dy equ dword [ebp-4] .tex_dx equ dword [ebp-8] - .dz equ dword [ebp-12] - .cz equ dword [ebp-16] + .dz equ dword [ebp-12] + .cz equ dword [ebp-16] .c_tex_x equ dword [ebp-20] ; current tex x .m_sft1 equ ebp-28 .m_sft2 equ ebp-32 ; .c_tex_xM equ ebp+14 .tex_dxM equ ebp-8 - mov ebp,esp + mov ebp,esp - mov ax,.y - or ax,ax - jl .tl_quit - mov bx,[size_y_var] - dec bx - cmp ax,bx ;SIZE_Y - jge .tl_quit + mov ax,.y + or ax,ax + jl .tl_quit + mov bx,[size_y_var] + dec bx + cmp ax,bx ;SIZE_Y + jge .tl_quit - mov ax,.x1 - cmp ax,.x2 - je .tl_quit - jl .tl_ok + mov ax,.x1 + cmp ax,.x2 + je .tl_quit + jl .tl_ok - xchg ax,.x2 ; sort params - mov .x1,ax -if Ext >= MMX - movq mm0,[.tex_x1] - movq mm1,[.tex_x2] - movq [.tex_x2],mm0 - movq [.tex_x1],mm1 + xchg ax,.x2 ; sort params + mov .x1,ax +if Ext >= SSE2 + movdqu xmm0,[.tex_x1] + pshufd xmm0,xmm0,01001110b + movdqu [.tex_x1],xmm0 +else if Ext >= MMX + movq mm0,[.tex_x1] + movq mm1,[.tex_x2] + movq [.tex_x2],mm0 + movq [.tex_x1],mm1 else - mov eax,dword[.tex_x1] - xchg eax,dword[.tex_x2] - mov dword[.tex_x1],eax + mov eax,dword[.tex_x1] + xchg eax,dword[.tex_x2] + mov dword[.tex_x1],eax - mov eax,dword[.tex_y1] - xchg eax,dword[.tex_y2] - mov dword[.tex_y1],eax + mov eax,dword[.tex_y1] + xchg eax,dword[.tex_y2] + mov dword[.tex_y1],eax end if - mov eax,.z1 - xchg eax,.z2 - mov .z1,eax + mov eax,.z1 + xchg eax,.z2 + mov .z1,eax .tl_ok: - mov cx,[size_x_var] - dec cx - cmp .x1,cx ;SIZE_X - jge .tl_quit - cmp .x2,0 - jle .tl_quit + mov cx,[size_x_var] + dec cx + cmp .x1,cx ;SIZE_X + jge .tl_quit + cmp .x2,0 + jle .tl_quit - mov bx,.x2 - sub bx,.x1 - movsx ebx,bx + mov bx,.x2 + sub bx,.x1 + movsx ebx,bx - mov eax,dword[.tex_y2] ; calc .dty - sub eax,dword[.tex_y1] - cdq - idiv ebx - push eax + mov eax,dword[.tex_y2] ; calc .dty + sub eax,dword[.tex_y1] + cdq + idiv ebx + push eax - mov eax,dword[.tex_x2] ; calc .dtx - sub eax,dword[.tex_x1] - cdq - idiv ebx - push eax + mov eax,dword[.tex_x2] ; calc .dtx + sub eax,dword[.tex_x1] + cdq + idiv ebx + push eax - mov eax,.z2 ; calc .dz - sub eax,.z1 - cdq - idiv ebx - push eax + mov eax,.z2 ; calc .dz + sub eax,.z1 + cdq + idiv ebx + push eax - cmp .x1,0 ; clipping - jg @f + cmp .x1,0 ; clipping + jg @f - movsx ebx,.x1 - neg ebx - imul ebx ; eax = .dz * abs(.x1) - add .z1,eax - mov .x1,0 + movsx ebx,.x1 + neg ebx + imul ebx ; eax = .dz * abs(.x1) + add .z1,eax + mov .x1,0 - mov eax,.tex_dy - imul ebx - add dword[.tex_y1],eax + mov eax,.tex_dy + imul ebx + add dword[.tex_y1],eax - mov eax,.tex_dx - imul ebx - add dword[.tex_x1],eax + mov eax,.tex_dx + imul ebx + add dword[.tex_x1],eax @@: - cmp .x2,cx ;SIZE_X - jl @f - mov .x2,cx ;SIZE_X + cmp .x2,cx ;SIZE_X + jl @f + mov .x2,cx ;SIZE_X @@: - movsx ebx,.y ; calc mem begin in buffers - movzx eax,word[size_x_var] ;SIZE_X - mul ebx - movsx ebx,.x1 - add eax,ebx - mov ebx,eax + movsx ebx,.y ; calc mem begin in buffers + movzx eax,word[size_x_var] ;SIZE_X + mul ebx + movsx ebx,.x1 + add eax,ebx + mov ebx,eax - lea eax,[eax*3] - add edi,eax ; edi - scr buff - shl ebx,2 - add .z_ptr,ebx ; z buffer pointer + lea eax,[eax*3] + add edi,eax ; edi - scr buff + shl ebx,2 + add .z_ptr,ebx ; z buffer pointer - mov cx,.x2 - sub cx,.x1 - movzx ecx,cx + mov cx,.x2 + sub cx,.x1 + movzx ecx,cx ;if Ext >= MMX ; movq mm0,[.tex_x1] @@ -549,12 +553,12 @@ end if ; mov ebx,.z1 ; mov eax,.dz ;else - mov eax,dword[.tex_x1] - mov ebx,dword[.tex_y1] - push .z1 ; .cz - push eax ;.c_tex_x + mov eax,dword[.tex_x1] + mov ebx,dword[.tex_y1] + push .z1 ; .cz + push eax ;.c_tex_x ;end if - mov edx,.z_ptr + mov edx,.z_ptr .tl_loop: @@ -569,43 +573,43 @@ end if ; movd esi,mm3 ; mov dword[edx],ebx ; renew z buffer ;else - ; eax - temp - mov eax,.cz ; ebx - cur tex y shl ROUND - cmp eax,[edx] ; ecx - l.lenght - jge @f ; ebx - cur tex_y ; edx - temp - mov esi,ebx ; edi - scr buff - sar esi,ROUND ; esi - tex_ptr temp - shl esi,TEX_SHIFT ; .z_ptr - cur pointer to z buff - mov eax,.c_tex_x ; .cz - cur z coord shl CATMULL_SHIFT - sar eax,ROUND - add esi,eax - mov eax,.cz - mov dword[edx],eax ; renew z buffer + ; eax - temp + mov eax,.cz ; ebx - cur tex y shl ROUND + cmp eax,[edx] ; ecx - l.lenght + jge @f ; ebx - cur tex_y ; edx - temp + mov esi,ebx ; edi - scr buff + sar esi,ROUND ; esi - tex_ptr temp + shl esi,TEX_SHIFT ; .z_ptr - cur pointer to z buff + mov eax,.c_tex_x ; .cz - cur z coord shl CATMULL_SHIFT + sar eax,ROUND + add esi,eax + mov eax,.cz + mov dword[edx],eax ; renew z buffer ;end if - and esi,TEXTURE_SIZE - lea esi,[esi*3] - add esi,.tex_ptr - movsd - dec edi - jmp .no_skip + and esi,TEXTURE_SIZE + lea esi,[esi*3] + add esi,.tex_ptr + movsd + dec edi + jmp .no_skip @@: - add edi,3 + add edi,3 .no_skip: - add edx,4 + add edx,4 ;if Ext >= MMX ; add ebx,eax ; paddd mm0,mm1 ;else - mov eax,.dz - add .cz,eax - mov eax,.tex_dx - add .c_tex_x,eax - add ebx,.tex_dy + mov eax,.dz + add .cz,eax + mov eax,.tex_dx + add .c_tex_x,eax + add ebx,.tex_dy ;end if - loop .tl_loop + loop .tl_loop .tl_quit: - mov esp,ebp + mov esp,ebp ret 30+8 diff --git a/programs/demos/view3ds/two_tex.inc b/programs/demos/view3ds/two_tex.inc index f4d9c03357..459686e15b 100644 --- a/programs/demos/view3ds/two_tex.inc +++ b/programs/demos/view3ds/two_tex.inc @@ -5,14 +5,18 @@ ;TEX_X equ 512 ;TEX_Y equ 512 ;TEXTURE_SIZE EQU (512*512)-1 + ;TEX_SHIFT EQU 9 -;CATMULL_SHIFT equ 8 -;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 -;Ext = SSE -;SSE = 3 +CATMULL_SHIFT equ 8 +TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 + +;SSE3 = 4 +;SSE2 = 3 +;SSE = 2 ;MMX = 1 ;NON = 0 +;Ext = NON ;use32 ;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- ;------- DOS 13h mode demos -------------------------------------------- @@ -32,59 +36,59 @@ two_tex_triangle_z: ;---------------------- pointer io Z buffer----- ;-- Z-buffer - filled with coordinates as dword -------- ;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- -.b_x1 equ ebp+4 ; procedure don't save registers !!! -.b_y1 equ ebp+6 ; each coordinate as word -.b_x2 equ ebp+8 -.b_y2 equ ebp+10 ; b - first texture -.b_x3 equ ebp+12 -.b_y3 equ ebp+14 ; e - second texture -.e_x1 equ ebp+16 -.e_y1 equ ebp+18 -.e_x2 equ ebp+20 -.e_y2 equ ebp+22 -.e_x3 equ ebp+24 -.e_y3 equ ebp+26 -.z1 equ word[ebp+28] -.z2 equ word[ebp+30] -.z3 equ word[ebp+32] -.z_buff equ dword[ebp+34] ; pointer to Z-buffer +.e_x1 equ ebp+4 ; procedure don't save registers !!! +.e_y1 equ ebp+6 ; each coordinate as word +.b_x1 equ ebp+8 +.b_y1 equ ebp+10 +.e_x2 equ ebp+12 +.e_y2 equ ebp+14 +.b_x2 equ ebp+16 +.b_y2 equ ebp+18 ; b - first texture +.e_x3 equ ebp+20 +.e_y3 equ ebp+22 ; e - second texture +.b_x3 equ ebp+24 +.b_y3 equ ebp+26 +.z1 equ word[ebp+28] +.z2 equ word[ebp+30] +.z3 equ word[ebp+32] +.z_buff equ dword[ebp+34] ; pointer to Z-buffer -.t_bmap equ dword[ebp-4] ; pointer to b. texture -.t_emap equ dword[ebp-8] ; pointer to e. texture -.x1 equ word[ebp-10] -.y1 equ word[ebp-12] -.x2 equ word[ebp-14] -.y2 equ word[ebp-16] -.x3 equ word[ebp-18] -.y3 equ word[ebp-20] +.t_bmap equ dword[ebp-4] ; pointer to b. texture +.t_emap equ dword[ebp-8] ; pointer to e. texture +.x1 equ word[ebp-10] +.y1 equ word[ebp-12] +.x2 equ word[ebp-14] +.y2 equ word[ebp-16] +.x3 equ word[ebp-18] +.y3 equ word[ebp-20] .dx12 equ dword[ebp-24] -.dbx12 equ dword[ebp-28] +.dbx12 equ [ebp-28] .dby12 equ dword[ebp-32] -.dby12q equ [ebp-32] +.dby12q equ [ebp-32] .dex12 equ dword[ebp-36] .dey12 equ dword[ebp-40] -.dey12q equ [ebp-40] -.dz12 equ dword[ebp-44] +.dey12q equ [ebp-40] +.dz12 equ [ebp-44] .dx13 equ dword[ebp-48] -.dbx13 equ dword[ebp-52] +.dbx13 equ [ebp-52] .dby13 equ dword[ebp-56] -.dby13q equ [ebp-56] +.dby13q equ [ebp-56] .dex13 equ dword[ebp-60] .dey13 equ dword[ebp-64] -.dey13q equ [ebp-64] -.dz13 equ dword[ebp-68] +.dey13q equ [ebp-64] +.dz13 equ [ebp-68] .dx23 equ dword[ebp-72] -.dbx23 equ dword[ebp-76] +.dbx23 equ [ebp-76] .dby23 equ dword[ebp-80] -.dby23q equ [ebp-80] +.dby23q equ [ebp-80] .dex23 equ dword[ebp-84] .dey23 equ dword[ebp-88] -.dey23q equ [ebp-88] -.dz23 equ dword[ebp-92] +.dey23q equ [ebp-88] +.dz23 equ [ebp-92] .cx1 equ dword[ebp-96] ; current variables .cx2 equ dword[ebp-100] @@ -106,10 +110,10 @@ two_tex_triangle_z: cld end if mov ebp,esp - push edx esi ; store bump map + push edx esi ; store bump map ; push esi ; store e. map ; sub esp,120 - .sort3: ; sort triangle coordinates... + .sort3: ; sort triangle coordinates... cmp ax,bx jle .sort1 xchg eax,ebx @@ -123,30 +127,28 @@ two_tex_triangle_z: xchg dx,.z2 mov .z1,dx .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - mov edx,dword[.b_x2] - xchg edx,dword[.b_x3] - mov dword[.b_x2],edx - mov edx,dword[.e_x2] - xchg edx,dword[.e_x3] - mov dword[.e_x2],edx + cmp bx,cx + jle .sort2 + xchg ebx,ecx + mov edx,dword[.b_x2] + xchg edx,dword[.b_x3] + mov dword[.b_x2],edx + mov edx,dword[.e_x2] + xchg edx,dword[.e_x3] + mov dword[.e_x2],edx mov dx,.z2 xchg dx,.z3 mov .z2,dx - jmp .sort3 + jmp .sort3 .sort2: - push eax ebx ecx ; store triangle coords in variables -; push ebx -; push ecx + push eax ebx ecx ; store triangle coords in variables - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .loop23_done + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .loop23_done ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that ; or edx,ebx ; if any *one* of them is negative a sign flag is raised ; or edx,ecx @@ -159,593 +161,509 @@ two_tex_triangle_z: ; jg .loop23_done ; cmp .x3,SIZE_X ; jg .loop23_done ; { - - - mov bx,.y2 ; calc delta 12 - sub bx,.y1 - jnz .bt_dx12_make - mov ecx,6 - xor edx,edx +; sub esp,18*4 +; pxor xmm7,xmm7 + mov bx,.y2 ; calc delta 12 + sub bx,.y1 + jnz .bt_dx12_make + mov ecx,6 + xor edx,edx @@: - push edx ;dword 0 - loop @b - jmp .bt_dx12_done + push edx ;dword 0 + loop @b + jmp .bt_dx12_done .bt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND + + movsx ebx,bx + mov eax,1 shl 15 cdq - idiv ebx -; mov .dx12,eax - push eax + idiv ebx + mov ebx,eax -if Ext=SSE - - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps - ; mov eax,255 - cvtsi2ss xmm4,[i255d] ;eax - divss xmm3,xmm4 - rcpss xmm3,xmm3 - ; mulss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x1] - movd mm1,[.b_x2] - movd mm2,[.e_x1] - movd mm3,[.e_x2] - ; psubsw mm3,mm2 - ; psubsw mm1,mm0 - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - ; pslld mm0,ROUND - ; pslld mm1,ROUND - ; pslld mm2,ROUND - ; pslld mm3,ROUND - cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - subps xmm1,xmm0 - - ; pxor mm4,mm4 - ; movq mm5,mm1 - ; movq mm6,mm1 - ; pcmpeqb mm5,mm4 -; psubd mm1,mm0 -; psubd mm3,mm2 - - ; movq mm0,[.b_x1] ; bx1 by1 bx2 by2 - ; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2 - ; pxor - ; punpcklhd mm0,mm1 ; lwd ; - ; psubw mm1,mm0 ; mm1, mm0 - ; pxor mm2,mm2 - ; pmovmaskb eax,mm1 - ; and eax,10101010b - ; pcmpgtw mm2,mm1 - ; punpcklwd mm1,mm2 - ; psllw mm0,ROUND - ; psllw mm1,ROUND - ; movq mm2,mm0 - ; psrlq mm0,32 - -; cvtpi2ps xmm0,mm1 -; movlhps xmm0,xmm0 -; cvtpi2ps xmm0,mm3 - ; divps xmm1,xmm3 - mulps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey12q,mm0 - movq .dby12q,mm1 - -; movd .dex12,mm0 -; psrlq mm0,32 -; movd .dey12,mm0 -; movhlps xmm1,xmm1 -; cvtps2pi mm0,xmm1 -; movd .dbx12,mm0 -; psrlq mm0,32 -; movd .dby12,mm0 + mov ax,.x2 + sub ax,.x1 + cwde + imul ebx + sar eax,15 - ROUND + push eax + ; mov .dx12,eax +if Ext >= SSE2 + sub esp,4*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movq xmm1,[.e_x1] + movq xmm2,[.e_x2] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + punpcklwd xmm2,xmm3 + psrad xmm2,15 - ROUND + pshufd xmm2,xmm2,10110001b + movdqu .dey12q,xmm2 else - mov ax,word[.b_x2] - sub ax,word[.b_x1] + + mov ax,word[.b_x2] + sub ax,word[.b_x1] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx12,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax - mov ax,word[.b_y2] - sub ax,word[.b_y1] + mov ax,word[.b_y2] + sub ax,word[.b_y1] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby12,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax - ; mov eax,.dbx12 - ; mov ebx,.dby12 - ; int3 - - mov ax,word[.e_x2] - sub ax,word[.e_x1] + mov ax,word[.e_x2] + sub ax,word[.e_x1] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex12,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax - mov ax,word[.e_y2] - sub ax,word[.e_y1] + mov ax,word[.e_y2] + sub ax,word[.e_y1] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey12,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax + end if + + + mov ax,.z2 + sub ax,.z1 + cwde + imul ebx + sar eax,15 - ROUND + push eax + ; mov .dz12,eax + -end if - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax .bt_dx12_done: - mov bx,.y3 ; calc delta13 - sub bx,.y1 - jnz .bt_dx13_make - mov ecx,6 - xor edx,edx + mov bx,.y3 ; calc delta13 + sub bx,.y1 + jnz .bt_dx13_make + mov ecx,6 + xor edx,edx @@: - push edx ;dword 0 - loop @b - jmp .bt_dx13_done + push edx ;dword 0 + loop @b + ; movq .dbx13,xmm7 + ; movdqu .dz13,xmm7 + jmp .bt_dx13_done .bt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND + ; sub esp,6*4 + + movsx ebx,bx + mov eax,1 shl 15 cdq - idiv ebx - ; mov .dx13,eax - push eax + idiv ebx + ; push eax + mov ebx,eax -if Ext=SSE - - cvtsi2ss xmm3,ebx - ; mov eax,255 - cvtsi2ss xmm4,[i255d] - divss xmm3,xmm4 - rcpss xmm3,xmm3 -; mulss xmm3,xmm4 - shufps xmm3,xmm3,0 - sub esp,16 - - movd mm0,[.b_x1] - movd mm1,[.b_x3] - movd mm2,[.e_x1] - movd mm3,[.e_x3] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - subps xmm1,xmm0 - - ; divps xmm1,xmm3 - mulps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey13q,mm0 - movq .dby13q,mm1 + mov ax,.x3 + sub ax,.x1 + cwde + imul ebx + sar eax,15 - ROUND + push eax + ; mov .dx13,eax +if 1 + sub esp,4*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movq xmm1,[.e_x1] + movq xmm2,[.e_x3] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + punpcklwd xmm2,xmm3 + psrad xmm2,15 - ROUND + pshufd xmm2,xmm2,10110001b + movdqu .dey13q,xmm2 + ; punpcklwd xmm4,xmm5 + ; psrad xmm4,15 - ROUND + ; movq .tex_dx12,xmm4 else - - mov ax,word[.b_x3] - sub ax,word[.b_x1] + mov ax,word[.b_x3] + sub ax,word[.b_x1] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx13,eax - push eax +; shl eax,ROUND +; cdq + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax - mov ax,word[.b_y3] - sub ax,word[.b_y1] + mov ax,word[.b_y3] + sub ax,word[.b_y1] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby13,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax - mov ax,word[.e_x3] - sub ax,word[.e_x1] + mov ax,word[.e_x3] + sub ax,word[.e_x1] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex13,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax - mov ax,word[.e_y3] - sub ax,word[.e_y1] + mov ax,word[.e_y3] + sub ax,word[.e_y1] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey13,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax + end if + mov ax,.z3 + sub ax,.z1 + cwde + imul ebx + sar eax,15 - ROUND + push eax + ; mov .dz13,eax -end if - - mov ax,.z3 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz13,eax - push eax .bt_dx13_done: - mov bx,.y3 ; calc delta23 - sub bx,.y2 - jnz .bt_dx23_make - mov ecx,6 - xor edx,edx + + mov bx,.y3 ; calc delta23 + sub bx,.y2 + jnz .bt_dx23_make + mov ecx,6 + xor edx,edx @@: - push edx ;dword 0 - loop @b - jmp .bt_dx23_done + push edx ;dword 0 + loop @b + ; movq .dbx23,xmm7 + ; movdqu .dz23,xmm7 + + jmp .bt_dx23_done .bt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND + + + movsx ebx,bx + mov eax,1 shl 15 cdq - idiv ebx + idiv ebx + ; push eax + mov ebx,eax + + + mov ax,.x3 + sub ax,.x2 + cwde + imul ebx + sar eax,15 - ROUND + push eax ; mov .dx23,eax - push eax - -if Ext=SSE - - cvtsi2ss xmm3,ebx - ; mov eax,255 - cvtsi2ss xmm4,[i255d] ;eax - divss xmm3,xmm4 - shufps xmm3,xmm3,0 - sub esp,16 - - movd mm0,[.b_x2] - movd mm1,[.b_x3] - movd mm2,[.e_x2] - movd mm3,[.e_x3] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - subps xmm1,xmm0 - - divps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey23q,mm0 - movq .dby23q,mm1 +if Ext >= SSE2 + sub esp,4*4 + movd xmm0,ebx + pshuflw xmm0,xmm0,0 + movq xmm1,[.e_x2] + movq xmm2,[.e_x3] + psubw xmm2,xmm1 + movdqa xmm3,xmm2 + pmullw xmm2,xmm0 + pmulhw xmm3,xmm0 + punpcklwd xmm2,xmm3 + psrad xmm2,15 - ROUND + pshufd xmm2,xmm2,10110001b + movdqu .dey23q,xmm2 else - mov ax,word[.b_x3] - sub ax,word[.b_x2] + mov ax,word[.b_x3] + sub ax,word[.b_x2] cwde - shl eax,ROUND - cdq - idiv ebx +; shl eax,ROUND +; cdq + imul ebx + sar eax,15 - ROUND ; mov .dbx23,eax - push eax + push eax - mov ax,word[.b_y3] - sub ax,word[.b_y2] + mov ax,word[.b_y3] + sub ax,word[.b_y2] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby23,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax - mov ax,word[.e_x3] - sub ax,word[.e_x2] + mov ax,word[.e_x3] + sub ax,word[.e_x2] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex23,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax - mov ax,word[.e_y3] - sub ax,word[.e_y2] + mov ax,word[.e_y3] + sub ax,word[.e_y2] cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey23,eax - push eax + imul ebx + sar eax,15 - ROUND + ; mov .dbx23,eax + push eax end if - mov ax,.z3 - sub ax,.z2 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz23,eax - push eax - ; sub esp,40 + + + mov ax,.z3 + sub ax,.z2 + cwde + imul ebx + sar eax,15 - ROUND + push eax + ; mov .dz23,eax + + + + + + + .bt_dx23_done: - movsx eax,.x1 - shl eax,ROUND + + movsx eax,.x1 + shl eax,ROUND ; mov .cx1,eax ; mov .cx2,eax - push eax eax + push eax eax ; push eax - - movsx eax,word[.b_x1] - shl eax,ROUND - mov .cbx1,eax - mov .cbx2,eax - ; push eax eax - ; push eax - - movsx eax,word[.b_y1] - shl eax,ROUND - mov .cby1,eax - mov .cby2,eax - ; push eax eax - ; push eax - - movsx eax,word[.e_x1] - shl eax,ROUND - mov .cex1,eax - mov .cex2,eax + sub esp,32 ; push eax eax ;push eax - movsx eax,word[.e_y1] - shl eax,ROUND - mov .cey1,eax - mov .cey2,eax - sub esp,32 - ; push eax eax - ;push eax - - movsx eax,.z1 - shl eax,CATMULL_SHIFT + movsx eax,.z1 + shl eax,CATMULL_SHIFT ; mov .cz1,eax ; mov .cz2,eax push eax eax ;push eax - movsx ecx,.y1 - cmp cx,.y2 - jge .loop12_done + movsx eax,word[.b_x1] + shl eax,ROUND + mov .cbx1,eax + mov .cbx2,eax + ; push eax eax + ; push eax + + movsx eax,word[.b_y1] + shl eax,ROUND + mov .cby1,eax + mov .cby2,eax + ; push eax eax + ; push eax + + movsx eax,word[.e_x1] + shl eax,ROUND + mov .cex1,eax + mov .cex2,eax + ; push eax eax + ;push eax + + movsx eax,word[.e_y1] + shl eax,ROUND + mov .cey1,eax + mov .cey2,eax + + movsx ecx,.y1 + cmp cx,.y2 + jge .loop12_done .loop12: - call .call_line + call .call_line - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx12 - add .cx2,ebx + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx12 + add .cx2,ebx if Ext>= SSE2 - movups xmm0,.cey1 - movups xmm1,.cey2 - movups xmm2,.dey12q - movups xmm3,.dey13q - paddd xmm0,xmm3 - paddd xmm1,xmm2 - movups .cey1,xmm0 - movups .cey2,xmm1 + movups xmm0,.cey1 + movups xmm1,.cey2 + movups xmm2,.dey12q + movups xmm3,.dey13q + paddd xmm0,xmm3 + paddd xmm1,xmm2 + movups .cey1,xmm0 + movups .cey2,xmm1 else if (Ext = MMX) | (Ext=SSE) - movq mm0,.cby2 ; with this optimization object - movq mm1,.cby1 ; looks bit annoying - movq mm2,.cey2 - movq mm3,.cey1 - paddd mm0,.dby12q - paddd mm1,.dby13q - paddd mm2,.dey12q - paddd mm3,.dey13q - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey1,mm3 - movq .cey2,mm2 + movq mm0,.cby2 ; with this optimization object + movq mm1,.cby1 ; looks bit annoying + movq mm2,.cey2 + movq mm3,.cey1 + paddd mm0,.dby12q + paddd mm1,.dby13q + paddd mm2,.dey12q + paddd mm3,.dey13q + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey1,mm3 + movq .cey2,mm2 else - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx12 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby12 - add .cby2,edx + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx12 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby12 + add .cby2,edx - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex12 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey12 - add .cey2,eax + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex12 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey12 + add .cey2,eax end if - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz12 - add .cz2,edx + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz12 + add .cz2,edx - inc ecx - cmp cx,.y2 - jl .loop12 + inc ecx + cmp cx,.y2 + jl .loop12 .loop12_done: - movsx ecx,.y2 - cmp cx,.y3 - jge .loop23_done + movsx ecx,.y2 + cmp cx,.y3 + jge .loop23_done - movsx eax,.z2 - shl eax,CATMULL_SHIFT - mov .cz2,eax + movsx eax,.z2 + shl eax,CATMULL_SHIFT + mov .cz2,eax - movsx eax,.x2 - shl eax,ROUND - mov .cx2,eax + movsx eax,.x2 + shl eax,ROUND + mov .cx2,eax - movzx eax,word[.b_x2] - shl eax,ROUND - mov .cbx2,eax + movzx eax,word[.b_x2] + shl eax,ROUND + mov .cbx2,eax - movzx eax,word[.b_y2] - shl eax,ROUND - mov .cby2,eax + movzx eax,word[.b_y2] + shl eax,ROUND + mov .cby2,eax - movzx eax,word[.e_x2] - shl eax,ROUND - mov .cex2,eax + movzx eax,word[.e_x2] + shl eax,ROUND + mov .cex2,eax - movzx eax,word[.e_y2] - shl eax,ROUND - mov .cey2,eax + movzx eax,word[.e_y2] + shl eax,ROUND + mov .cey2,eax .loop23: - call .call_line + call .call_line ;if Ext = NON - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx23 - add .cx2,ebx + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx23 + add .cx2,ebx if Ext>= SSE2 - movups xmm0,.cey1 - movups xmm1,.cey2 - movups xmm2,.dey23q - movups xmm3,.dey13q - paddd xmm0,xmm3 - paddd xmm1,xmm2 - movups .cey1,xmm0 - movups .cey2,xmm1 + movups xmm0,.cey1 + movups xmm1,.cey2 + movups xmm2,.dey23q + movups xmm3,.dey13q + paddd xmm0,xmm3 + paddd xmm1,xmm2 + movups .cey1,xmm0 + movups .cey2,xmm1 else if (Ext = MMX) | ( Ext = SSE) - movq mm0,.cby2 ; with this mmx optimization object looks bit - movq mm1,.cby1 ; annoying - movq mm2,.cey2 - movq mm3,.cey1 - paddd mm0,.dby23q - paddd mm1,.dby13q - paddd mm2,.dey23q - paddd mm3,.dey13q - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey2,mm2 - movq .cey1,mm3 + movq mm0,.cby2 ; with this mmx optimization object looks bit + movq mm1,.cby1 ; annoying + movq mm2,.cey2 + movq mm3,.cey1 + paddd mm0,.dby23q + paddd mm1,.dby13q + paddd mm2,.dey23q + paddd mm3,.dey13q + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey2,mm2 + movq .cey1,mm3 else - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx23 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby23 - add .cby2,edx + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx23 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby23 + add .cby2,edx - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex23 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey23 - add .cey2,eax + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex23 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey23 + add .cey2,eax end if - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz23 - add .cz2,edx + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz23 + add .cz2,edx ;else ; movq mm0,.db13q ; movq mm1,.cbx1q - inc ecx - cmp cx,.y3 - jl .loop23 + inc ecx + cmp cx,.y3 + jl .loop23 .loop23_done: - mov esp,ebp + mov esp,ebp ret 34 .call_line: pushad - push .cz1 - push .cz2 - push .z_buff - push .t_bmap - push .t_emap - push dword .cey2 - push .cex2 - push dword .cey1 - push .cex1 - push dword .cby2 - push .cbx2 - push dword .cby1 - push .cbx1 - push ecx + push .cz1 + push .cz2 + push .z_buff + push .t_bmap + push .t_emap + push dword .cey2 + push .cex2 + push dword .cey1 + push .cex1 + push dword .cby2 + push .cbx2 + push dword .cby1 + push .cbx1 + push ecx - mov eax,.cx1 - sar eax,ROUND - mov ebx,.cx2 - sar ebx,ROUND + mov eax,.cx1 + sar eax,ROUND + mov ebx,.cx2 + sar ebx,ROUND - call two_tex_line_z + call two_tex_line_z popad ret @@ -754,96 +672,96 @@ two_tex_line_z: ;-------------- ebx - x2 ;-------------- edi - pointer to screen buffer ;stack - another parameters : -.y equ dword [ebp+4] -.bx1 equ [ebp+8] ; --- -.by1 equ [ebp+12] ; | -.bx2 equ [ebp+16] ; | -.by2 equ [ebp+20] ; |> b. texture and e. texture coords -.ex1 equ [ebp+24] ; |> shifted shl ROUND -.ey1 equ [ebp+28] ; | -.ex2 equ [ebp+32] ; | -.ey2 equ [ebp+36] ; --- -.emap equ [ebp+40] ; b texture offset -.bmap equ [ebp+44] ; e texture offset +.y equ dword [ebp+4] +.bx1 equ [ebp+8] ; --- +.by1 equ [ebp+12] ; | +.bx2 equ [ebp+16] ; | +.by2 equ [ebp+20] ; |> b. texture and e. texture coords +.ex1 equ [ebp+24] ; |> shifted shl ROUND +.ey1 equ [ebp+28] ; | +.ex2 equ [ebp+32] ; | +.ey2 equ [ebp+36] ; --- +.emap equ [ebp+40] ; b texture offset +.bmap equ [ebp+44] ; e texture offset .z_buff equ dword [ebp+48] -.z2 equ dword [ebp+52] ; -- |> z coords shifted -.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT +.z2 equ dword [ebp+52] ; -- |> z coords shifted +.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT -.x1 equ dword [ebp-4] -.x2 equ dword [ebp-8] -.dbx equ [ebp-12] -.dex equ [ebp-16] -.dby equ [ebp-20] -.dey equ [ebp-24] -.dz equ dword [ebp-28] -.cbx equ [ebp-32] -.cex equ [ebp-36] -.cby equ [ebp-40] -.cey equ [ebp-44] -.cz equ dword [ebp-48] +.x1 equ dword [ebp-4] +.x2 equ dword [ebp-8] +.dbx equ [ebp-12] +.dex equ [ebp-16] +.dby equ [ebp-20] +.dey equ [ebp-24] +.dz equ dword [ebp-28] +.cbx equ [ebp-32] +.cex equ [ebp-36] +.cby equ [ebp-40] +.cey equ [ebp-44] +.cz equ dword [ebp-48] .czbuff equ dword [ebp-52] - mov ebp,esp + mov ebp,esp - mov ecx,.y - or ecx,ecx - jl .bl_end - mov dx,word[size_y_var] - dec dx - cmp cx,dx ;word[size_y_var] ;SIZE_Y - jge .bl_end + mov ecx,.y + or ecx,ecx + jl .bl_end + mov dx,word[size_y_var] + dec dx + cmp cx,dx ;word[size_y_var] ;SIZE_Y + jge .bl_end - cmp eax,ebx - jl @f - je .bl_end + cmp eax,ebx + jl @f + je .bl_end - xchg eax,ebx + xchg eax,ebx if Ext=NON - mov edx,.bx1 - xchg edx,.bx2 - mov .bx1,edx - mov edx,.by1 - xchg edx,.by2 - mov .by1,edx + mov edx,.bx1 + xchg edx,.bx2 + mov .bx1,edx + mov edx,.by1 + xchg edx,.by2 + mov .by1,edx - mov edx,.ex1 - xchg edx,.ex2 - mov .ex1,edx - mov edx,.ey1 - xchg edx,.ey2 - mov .ey1,edx + mov edx,.ex1 + xchg edx,.ex2 + mov .ex1,edx + mov edx,.ey1 + xchg edx,.ey2 + mov .ey1,edx else - movq mm0,.bx1 - movq mm1,.ex1 - movq mm2,.bx2 - movq mm3,.ex2 - movq .bx2,mm0 - movq .ex2,mm1 - movq .bx1,mm2 - movq .ex1,mm3 + movq mm0,.bx1 + movq mm1,.ex1 + movq mm2,.bx2 + movq mm3,.ex2 + movq .bx2,mm0 + movq .ex2,mm1 + movq .bx1,mm2 + movq .ex1,mm3 end if - mov edx,.z1 - xchg edx,.z2 - mov .z1,edx + mov edx,.z1 + xchg edx,.z2 + mov .z1,edx @@: - push eax ebx ;store x1, x2 - mov ebx,.x1 - movzx edx,word[size_x_var] - dec edx - cmp ebx,edx + push eax ebx ;store x1, x2 + mov ebx,.x1 + movzx edx,word[size_x_var] + dec edx + cmp ebx,edx ; cmp bx,word[size_x_var] ;SIZE_X - jg .bl_end - cmp .x2,0 - jle .bl_end + jg .bl_end + cmp .x2,0 + jle .bl_end - mov ebx,.x2 - sub ebx,.x1 + mov ebx,.x2 + sub ebx,.x1 if Ext >= SSE - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps - shufps xmm3,xmm3,0 + sub esp,16 + cvtsi2ss xmm3,ebx ;rcps + shufps xmm3,xmm3,0 ; movq mm0,.bx1q ; movq mm1,.bx2q @@ -856,86 +774,91 @@ if Ext >= SSE ; cvtpi2ps xmm1,mm3 cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1 - movlhps xmm0,xmm0 + movlhps xmm0,xmm0 cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1 cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2 - movlhps xmm1,xmm1 + movlhps xmm1,xmm1 cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2 - subps xmm1,xmm0 - ; hi lo - divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex + subps xmm1,xmm0 + ; hi lo + divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex - shufps xmm1,xmm1,11011000b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 + shufps xmm1,xmm1,11011000b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 cvtps2pi mm1,xmm1 - movq .dex,mm0 ; hi - lo -> dbx, dex - movq .dey,mm1 ; hi - lo -> dby, dey + movq .dex,mm0 ; hi - lo -> dbx, dex + movq .dey,mm1 ; hi - lo -> dby, dey else - mov eax,.bx2 ; calc .dbx - sub eax,.bx1 - cdq - idiv ebx - push eax - mov eax,.ex2 ; calc .dby - sub eax,.ex1 - cdq - idiv ebx - push eax - mov eax,.by2 ; calc .dex - sub eax,.by1 - cdq - idiv ebx - push eax - mov eax,.ey2 ; calc .dey - sub eax,.ey1 - cdq - idiv ebx - push eax + + + mov eax,.bx2 ; calc .dbx + sub eax,.bx1 + cdq + idiv ebx + push eax + + mov eax,.ex2 ; calc .dby + sub eax,.ex1 + cdq + idiv ebx + push eax + + mov eax,.by2 ; calc .dex + sub eax,.by1 + cdq + idiv ebx + push eax + + mov eax,.ey2 ; calc .dey + sub eax,.ey1 + cdq + idiv ebx + push eax end if - mov eax,.z2 ; calc .dz - sub eax,.z1 - cdq - idiv ebx - push eax + mov eax,.z2 ; calc .dz + sub eax,.z1 + cdq + idiv ebx + push eax - cmp .x1,0 ; set correctly begin variable - jge @f ; CLIPPING ON FUNCTION - ; cutting triangle exceedes screen - mov ebx,.x1 - neg ebx - imul ebx ; eax = .dz * abs(.x1) - add .z1,eax - mov .x1,0 + cmp .x1,0 ; set correctly begin variable + jge @f ; CLIPPING ON FUNCTION + ; cutting triangle exceedes screen + mov ebx,.x1 + neg ebx + imul ebx ; eax = .dz * abs(.x1) + add .z1,eax + mov .x1,0 - mov eax,.dbx - imul ebx - add .bx1,eax + mov eax,.dbx + imul ebx + add .bx1,eax - mov eax,.dby - imul ebx - add .by1,eax + mov eax,.dby + imul ebx + add .by1,eax - mov eax,.dex - imul ebx - add .ex1,eax + mov eax,.dex + imul ebx + add .ex1,eax - mov eax,.dey - imul ebx - add .ey1,eax + mov eax,.dey + imul ebx + add .ey1,eax @@: - movzx eax,word[size_x_var] ;SIZE_X ;word[size_x_var] - mov ebx,.x2 - cmp eax,ebx - jg @f - mov .x2,eax + movzx eax,word[size_x_var] ;SIZE_X ;word[size_x_var] + mov ebx,.x2 + cmp eax,ebx + jg @f + mov .x2,eax @@: ; movd mm0,eax ; movd mm1,.x2 @@ -946,27 +869,27 @@ end if ; mov .x2,SIZE_X ;eax | @@: ; movzx eax,word[size_x_var] ;calc memory begin in buffers - mov ebx,.y - mul ebx - mov ebx,.x1 - add eax,ebx - mov ebx,eax - lea eax,[eax*3] - add edi,eax ; edi - screen - mov esi,.z_buff ; z-buffer filled with dd variables - shl ebx,2 - add esi,ebx ; esi - Z buffer + mov ebx,.y + mul ebx + mov ebx,.x1 + add eax,ebx + mov ebx,eax + lea eax,[eax*3] + add edi,eax ; edi - screen + mov esi,.z_buff ; z-buffer filled with dd variables + shl ebx,2 + add esi,ebx ; esi - Z buffer - mov ecx,.x2 - sub ecx,.x1 - ; init current variables - push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi - push dword .ex1 - push dword .by1 - push dword .ey1 + mov ecx,.x2 + sub ecx,.x1 + ; init current variables + push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi + push dword .ex1 + push dword .by1 + push dword .ey1 - push .z1 ; current z shl CATMULL_SHIFT - push esi + push .z1 ; current z shl CATMULL_SHIFT + push esi if Ext >= MMX pxor mm0,mm0 @@ -985,104 +908,104 @@ end if .draw: ; if TEX = SHIFTING ;bump drawing only in shifting mode if Ext=NON - mov esi,.czbuff ; .czbuff current address in buffer - mov ebx,.cz ; .cz - cur z position - cmp ebx,dword[esi] + mov esi,.czbuff ; .czbuff current address in buffer + mov ebx,.cz ; .cz - cur z position + cmp ebx,dword[esi] else - mov ebx,.cz - cmp ebx,dword[edx] + mov ebx,.cz + cmp ebx,dword[edx] end if - jge .skip + jge .skip if Ext=NON - mov eax,.cby - mov esi,.cbx - sar eax,ROUND - sar esi,ROUND - shl eax,TEX_SHIFT ;- - add esi,eax - lea esi,[esi*3] ;- ; esi - current b. texture addres - add esi,.bmap + mov eax,.cby + mov esi,.cbx + sar eax,ROUND + sar esi,ROUND + shl eax,TEX_SHIFT ;- + add esi,eax + lea esi,[esi*3] ;- ; esi - current b. texture addres + add esi,.bmap - mov ebx,.cex ;.cex - current env map X - mov eax,.cey ;.cey - current env map y - sar ebx,ROUND - sar eax,ROUND + mov ebx,.cex ;.cex - current env map X + mov eax,.cey ;.cey - current env map y + sar ebx,ROUND + sar eax,ROUND - shl eax,TEX_SHIFT - add ebx,eax - lea ebx,[ebx*3] - add ebx,.emap + shl eax,TEX_SHIFT + add ebx,eax + lea ebx,[ebx*3] + add ebx,.emap else - movq mm5,mm4 ;.cey - psrad mm5,ROUND - pslld mm5,TEX_SHIFT - movq mm6,mm3 ;.cex - psrad mm6,ROUND - paddd mm5,mm6 - movq mm6,mm5 - paddd mm5,mm5 - paddd mm5,mm6 - paddd mm5,.emap - movd esi,mm5 - psrlq mm5,32 - movd ebx,mm5 + movq mm5,mm4 ;.cey + psrad mm5,ROUND + pslld mm5,TEX_SHIFT + movq mm6,mm3 ;.cex + psrad mm6,ROUND + paddd mm5,mm6 + movq mm6,mm5 + paddd mm5,mm5 + paddd mm5,mm6 + paddd mm5,.emap + movd esi,mm5 + psrlq mm5,32 + movd ebx,mm5 end if if Ext>=MMX - movd mm1,[esi] - movd mm2,[ebx] - punpcklbw mm1,mm0 - punpcklbw mm2,mm0 - pmullw mm1,mm2 - psrlw mm1,8 - packuswb mm1,mm0 - movd [edi],mm1 - mov ebx,.cz - mov dword[edx],ebx + movd mm1,[esi] + movd mm2,[ebx] + punpcklbw mm1,mm0 + punpcklbw mm2,mm0 + pmullw mm1,mm2 + psrlw mm1,8 + packuswb mm1,mm0 + movd [edi],mm1 + mov ebx,.cz + mov dword[edx],ebx else - cld ; esi - tex e. - lodsb ; ebx - tex b. - mov dl,[ebx] - mul dl - shr ax,8 - stosb - inc ebx - lodsb - mov dl,[ebx] - mul dl - shr ax,8 - stosb - inc ebx - lodsb - mov dl,[ebx] - mul dl - shr ax,8 - stosb - mov ebx,.cz - mov esi,.czbuff - mov dword[esi],ebx - jmp .no_skip + cld ; esi - tex e. + lodsb ; ebx - tex b. + mov dl,[ebx] + mul dl + shr ax,8 + stosb + inc ebx + lodsb + mov dl,[ebx] + mul dl + shr ax,8 + stosb + inc ebx + lodsb + mov dl,[ebx] + mul dl + shr ax,8 + stosb + mov ebx,.cz + mov esi,.czbuff + mov dword[esi],ebx + jmp .no_skip end if .skip: - add edi,3 + add edi,3 if Ext = NON .no_skip: - add .czbuff,4 - mov eax,.dbx - add .cbx,eax - mov eax,.dby - add .cby,eax - mov eax,.dex - add .cex,eax - mov eax,.dey - add .cey,eax + add .czbuff,4 + mov eax,.dbx + add .cbx,eax + mov eax,.dby + add .cby,eax + mov eax,.dex + add .cex,eax + mov eax,.dey + add .cey,eax else - add edx,4 - paddd mm3,.dex - paddd mm4,.dey + add edx,4 + paddd mm3,.dex + paddd mm4,.dey ; movq mm5,mm3 ; movq mm6,mm4 ; psrad mm5,ROUND @@ -1090,16 +1013,20 @@ end if ; movq .cex,mm3 ; movq .cey,mm4 end if - mov eax,.dz - add .cz,eax + mov eax,.dz + add .cz,eax if Ext = NON - dec ecx - jnz .draw + dec ecx + jnz .draw else - loop .draw + loop .draw end if .bl_end: - mov esp,ebp + mov esp,ebp ret 56 +;Ext = SSE2 +;the_zero: +;size_y_var: +;size_x_var: diff --git a/programs/demos/view3ds/view3ds.asm b/programs/demos/view3ds/view3ds.asm index ec2ab00d32..453a694b1e 100644 --- a/programs/demos/view3ds/view3ds.asm +++ b/programs/demos/view3ds/view3ds.asm @@ -1,11 +1,11 @@ -; application : View3ds ver. 0.075 - tiny .3ds and .asc files viewer +; application : View3ds ver. 0.076 - tiny .3ds and .asc files viewer ; with a few graphics effects demonstration. ; compiler : FASM ; system : KolibriOS ; author : Macgub aka Maciej Guba ; email : macgub3@wp.pl -; web : http://macgub.co.pl, http://macgub.j.pl +; web : http://macgub.co.pl ; Fell free to use this intro in your own distribution of KolibriOS. ; Special greetings to KolibriOS team . ; I hope because my demos Christian Belive will be near to each of You. @@ -18,7 +18,6 @@ ; 1) Read from a file (*.3DS standard) ; 2) Written in manually (at the end of the code) ; now not exist -format binary as "" SIZE_X equ 512 SIZE_Y equ 512 ; ///// I want definitely @@ -38,14 +37,12 @@ MMX = 1 SSE = 2 SSE2 = 3 SSE3 = 4 -Ext = SSE3 ;Ext={ NON | MMX | SSE | SSE2 | SSE3 } +Ext = SSE3 ;Ext={ NON | MMX | SSE | SSE2 | SSE3 } + ; For now correct only SSE2 and SSE3 versions. if you have older CPU ; use older versions of app. Probably ver 005 will be OK but it need ; re-edit to support new Kolibri features. -; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) -USE_LFN = 1 ; App is Kolibri only now. - use32 org 0x0 db 'MENUET01' ; 8 byte id @@ -98,7 +95,19 @@ START: ; start of execution call normalize_all_light_vectors call copy_lights ; to aligned float call init_triangles_normals2 + + if Ext >= SSE2 + call detect_chunks + mov [chunks_number],ecx + mov [chunks_ptr],ebx + + ; esi - tri_ch + ; edi - t_ptr - every vertice index - pointer to to all triangles + ; that have this index + end if + call init_point_normals + call init_envmap2 call init_envmap_cub call generate_texture2 @@ -107,7 +116,7 @@ START: ; start of execution if Ext >= SSE3 call init_point_lights mov [fire_flag],0 ; proteza - end if + end if mov edi,bumpmap call calc_bumpmap call calc_bumpmap_coords ; bump and texture mapping @@ -118,12 +127,13 @@ START: ; start of execution cpuid bt ecx,0 ; is sse3 on board? jc @f - mov [max_dr_flg],12 + mov byte[max_dr_flg],12 mov [isSSE3],0 @@: end if -still: + + still: cmp [edit_flag],1 jne @f mov eax,40 ; set events mask @@ -164,7 +174,7 @@ still: je key cmp eax,3 ; button event ? je button - + mov esi,eax mov eax,37 mov ebx,7 ; get mouse scroll @@ -175,7 +185,7 @@ still: cmp eax, 1 je button.zoom_out mov eax,esi - + cmp eax,6 ; mouse event ? jne @f cmp [edit_flag],1 ; handle mouse only when edit is active @@ -244,26 +254,26 @@ still: jmp noclose - .rot_inc_x: - inc [angle_x] - and [angle_x],0xff - jmp noclose.end_rot - .rot_dec_x: - dec [angle_x] - and [angle_x],0xff - jmp noclose.end_rot - .rot_inc_y: - inc [angle_y] - and [angle_y],0xff - jmp noclose.end_rot - .rot_dec_y: - dec [angle_y] - and [angle_y],0xff - jmp noclose.end_rot - .rot_z: - inc [angle_z] - and [angle_z],0xff - jmp noclose.end_rot + .rot_inc_x: + inc [angle_x] + and [angle_x],0xff + jmp noclose.end_rot + .rot_dec_x: + dec [angle_x] + and [angle_x],0xff + jmp noclose.end_rot + .rot_inc_y: + inc [angle_y] + and [angle_y],0xff + jmp noclose.end_rot + .rot_dec_y: + dec [angle_y] + and [angle_y],0xff + jmp noclose.end_rot + .rot_z: + inc [angle_z] + and [angle_z],0xff + jmp noclose.end_rot button: ; button mov eax,17 ; get id @@ -336,7 +346,7 @@ still: .next_m4: cmp ah,14 jne @f - .xchg: + .xchg: call exchange @@: cmp ah,15 @@ -828,6 +838,7 @@ end if jmp still + ;-------------------------------------------------------------------------------- ;-------------------------PROCEDURES--------------------------------------------- ;-------------------------------------------------------------------------------- @@ -838,6 +849,7 @@ include "3dmath.inc" include "grd_line.inc" include "b_procs.inc" include "a_procs.inc" +include "chunks.inc" include "grd_cat.inc" include "bump_tex.inc" include "grd_tex.inc" @@ -879,7 +891,9 @@ edit: ; mmx required, edit mesh by vertex imul edx,ecx add ebx,edx push ebx - lea ecx,[ebx*2] + mov ecx,ebx + shl ecx,2 + ; lea ecx,[ebx*2] lea ebx,[ebx*3] cmp [dr_flag],12 @@ -931,11 +945,12 @@ edit: ; mmx required, edit mesh by vertex check_bar jne .no_edit add ecx,[vertices_index_ptr] - mov cx,word[ecx] - inc cx + mov ecx,[ecx] + ; cmp ecx,-1 + ; je .no_edit - mov [vertex_edit_no],cx ;if vert_edit_no = 0, no vertex selected + mov [vertex_edit_no],ecx ;if vert_edit_no = -1, no vertex selected mov eax,dword[.x_coord] mov dword[edit_end_x],eax @@ -949,7 +964,7 @@ edit: ; mmx required, edit mesh by vertex ; add ecx,[vertices_index_ptr] ; mov cx,[ecx] ; inc cx - cmp [vertex_edit_no],0 ; cx ; vertex number + cmp [vertex_edit_no],-1 ; cx ; vertex number je .end push dword[.x_coord] pop dword[edit_end_x] @@ -961,8 +976,8 @@ edit: ; mmx required, edit mesh by vertex check_bar jne .end - movzx esi,[vertex_edit_no] - dec esi + mov esi,[vertex_edit_no] + ; dec esi lea esi,[esi*3] add esi,esi add esi,[points_translated_ptr] @@ -996,8 +1011,8 @@ edit: ; mmx required, edit mesh by vertex call rotary ; inject into vertex list - movzx edi,[vertex_edit_no] - dec edi + mov edi,[vertex_edit_no] + ; dec edi lea edi,[edi*3] shl edi,2 add edi,[points_ptr] @@ -1012,7 +1027,7 @@ edit: ; mmx required, edit mesh by vertex mov dword[edit_end_x],0 - mov [vertex_edit_no],0 + mov [vertex_edit_no],-1 .no_edit: .end: @@ -1055,7 +1070,7 @@ alloc_buffer_mem: mov ecx,[.temp] - add ecx,ecx + shl ecx,2 add ecx,256 mov eax,68 mov ebx,20 @@ -1477,18 +1492,84 @@ ret if Ext >= SSE2 init_point_normals: +;in: +; esi - tri_ch +; edi - t_ptr .z equ dword [ebp-8] .y equ dword [ebp-12] .x equ [ebp-16] .point_number equ dword [ebp-28] .hit_faces equ dword [ebp-32] +.t_ptr equ dword [ebp-36] +.tri_ch equ dword [ebp-40] +.max_val equ dword [ebp-44] push ebp mov ebp,esp sub esp,64 and ebp,-16 + mov .t_ptr,edi + mov .tri_ch,esi + + + + + + + mov ecx,[triangles_count_var] + shl ecx,3 + lea ecx,[ecx*3] + add ecx,.tri_ch + mov .max_val,ecx + xor edx,edx + + .lp1: + mov ebx,edx + shl ebx,2 + add ebx,.t_ptr + mov esi,[ebx] + or esi,esi + jz .old + + xorps xmm1,xmm1 + xor ecx,ecx + @@: + mov eax,[esi+4] ; eax - tri index + mov ebx,[esi] + imul eax,[i12] + add eax,[triangles_normals_ptr] + movups xmm0,[eax] + inc ecx + addps xmm1,xmm0 + add esi,8 + cmp esi,.max_val ; some objects need this check + ja .old ;old method + cmp ebx,[esi] + je @b + + cvtsi2ss xmm2,ecx + rcpss xmm2,xmm2 + shufps xmm2,xmm2,0 + mulps xmm1,xmm2 + mov edi,edx + imul edi,[i12] + add edi,[points_normals_ptr] + movlps [edi],xmm1 + movhlps xmm1,xmm1 + movss [edi+8],xmm1 + call normalize_vector + + inc edx + cmp edx,[points_count_var] + jnz .lp1 + + jmp .end + + + .old: + mov edi,[points_normals_ptr] - mov .point_number,0 + mov .point_number,edx .ipn_loop: movd xmm0,.point_number pshufd xmm0,xmm0,0 @@ -1547,6 +1628,20 @@ init_point_normals: mov edx,.point_number cmp edx,[points_count_var] jne .ipn_loop + .end: + + mov eax,68 + mov ebx,13 + mov ecx,.t_ptr + int 0x40 + + mov eax,68 + mov ebx,13 + mov ecx,.tri_ch + int 0x40 + + + add esp,64 pop ebp @@ -1776,10 +1871,10 @@ draw_triangles: emms ; update translated list MMX required - cmp [vertex_edit_no],0 + cmp [vertex_edit_no],-1 je @f - movzx eax,[vertex_edit_no] - dec eax + mov eax,[vertex_edit_no] + ; dec eax movd mm0,[edit_end_x] psubw mm0,[edit_start_x] lea eax,[eax*3] @@ -2383,42 +2478,36 @@ draw_triangles: push word .zz2 push word .zz1 - mov esi, .point_index3 ; tex map coords - shl esi,2 - add esi,[tex_points_ptr] - push dword[esi] - mov esi, .point_index2 - shl esi,2 - add esi,[tex_points_ptr] - push dword[esi] - mov esi, .point_index1 - shl esi,2 - add esi,[tex_points_ptr] - push dword[esi] - - lea esi, .point_index1 ; env coords - sub esp,12 + fninit + lea esi, .point_index3 ; env coords mov edi,esp + sub esp,24 mov ecx,3 @@: mov eax,dword[esi] - lea eax,[eax*3] shl eax,2 + mov ebx,eax + ; mov ebx,eax + add ebx,[tex_points_ptr] + mov ebx,[ebx] + mov [edi-8],ebx + lea eax,[eax*3] add eax,[points_normals_rot_ptr] ; texture x=(rotated point normal -> x * 255)+255 fld dword[eax] fimul [correct_tex] fiadd [correct_tex] - fistp word[edi] - + fistp word[edi-4] + and word[edi-4],0x7fff ; some objects need it ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] fimul [correct_tex] fiadd [correct_tex] - fistp word[edi+2] - and word[edi+2],0x7fff ; some objects need it - add edi,4 - add esi,4 + fistp word[edi-2] + and word[edi-2],0x7fff ; some objects need it + + sub edi,8 + sub esi,4 loop @b mov eax, .xx1 @@ -2434,18 +2523,7 @@ draw_triangles: .bump_tex: push ebp - mov esi, .point_index3 ; tex map coords - shl esi,2 - add esi,[tex_points_ptr] - push dword[esi] - mov esi, .point_index2 - shl esi,2 - add esi,[tex_points_ptr] - push dword[esi] - mov esi, .point_index1 - shl esi,2 - add esi,[tex_points_ptr] - push dword[esi] + fninit push dword texmap @@ -2455,41 +2533,37 @@ draw_triangles: push word .zz2 push word .zz1 - lea esi, .index1x12 ; env coords - sub esp,12 + + lea ebx, .point_index1 + sub esp,36 mov edi,esp mov ecx,3 @@: - mov eax,dword[esi] - add eax,[points_normals_rot_ptr] + mov eax,[ebx] + shl eax,2 + mov esi,eax + lea esi,[esi*3] + add eax,[tex_points_ptr] + mov eax,[eax] + ror eax,16 + mov [edi],eax + mov [edi+8],eax + + add esi,[points_normals_rot_ptr] ; texture x=(rotated point normal -> x * 255)+255 - fld dword[eax] + fld dword[esi] fimul [correct_tex] fiadd [correct_tex] - fistp word[edi] + fistp word[edi+6] ; env coords ; texture y=(rotated point normal -> y * 255)+255 - fld dword[eax+4] + fld dword[esi+4] fimul [correct_tex] fiadd [correct_tex] - fistp word[edi+2] - - add edi,4 - add esi,4 + fistp word[edi+4] + add ebx,4 + add edi,12 loop @b - mov esi, .point_index3 ; bump map coords - shl esi,2 - add esi,[tex_points_ptr] - push dword[esi] - mov esi, .point_index2 - shl esi,2 - add esi,[tex_points_ptr] - push dword[esi] - mov esi, .point_index1 - shl esi,2 - add esi,[tex_points_ptr] - push dword[esi] - mov eax,dword .xx1 mov ebx,dword .xx2 mov ecx,dword .xx3 @@ -2861,138 +2935,110 @@ end if ret - - - - - draw_handlers: - ; in eax - render model - push ebp - mov ebp,esp - .counter equ ebp-16 - .xres3m18 equ ebp-8 - .xres2m12 equ ebp-12 + ; in eax - render model + push ebp + mov ebp,esp +; emms + .fac equ dword[ebp-16] + .xplus_scr equ ebp-8 + .xplus_index equ ebp-12 .dr_model equ dword[ebp-4] + sub esp,16 + mov .dr_model,eax - ; init counter - sub esp,12 - push dword 0 - mov .dr_model,eax - movzx eax,word[size_x_var] + movzx eax,word[size_x_var] cmp .dr_model,12 - jge @f - lea ebx,[eax*3] - sub ebx,18 - add eax,eax - sub eax,12 - mov [.xres3m18],ebx - mov [.xres2m12],eax - jmp .f - @@: - lea ebx,[eax*4] - sub ebx,4*6 - add eax,eax - sub eax,3*4 - mov [.xres3m18],ebx - mov [.xres2m12],eax - .f: + jge @f + lea ebx,[eax*3] + sub ebx,3*6 + mov [.xplus_scr],ebx ; for scr 1st cause + mov .fac,3 + jmp .in_r + @@: + lea ebx,[eax*4] ; for scr 2cond cause + sub ebx,4*6 + mov [.xplus_scr],ebx + mov .fac,4 + .in_r: + lea ebx,[eax*4] + sub ebx,4*6 + mov [.xplus_index],ebx ; index + xor ecx,ecx + mov eax,4 shl 16 + 4 + movd xmm0,[size_y_var] + movd xmm1,eax + psubw xmm0,xmm1 + pshuflw xmm0,xmm0,00000001b - mov esi,[points_translated_ptr] - .loop: - push esi - ; DO culling AT FIRST - cmp [culling_flag],1 ; (if culling_flag = 1) - jne .no_culling - mov edi,[.counter] ; ********************************* - lea edi,[edi*3] - shl edi,2 - add edi,[points_normals_rot_ptr] - mov eax,[edi+8] ; check sign of z coof - shr eax,31 - cmp eax,1 - jnz .skip - .no_culling: - mov eax,[esi] - movzx ebx,ax ; ebx - x - shr eax,16 ; eax - y - cmp eax,4 ; check if markers not exceedes screen - jle .skip - cmp ebx,4 - jle .skip - movzx edx,word[size_x_var] - sub edx,4 - movzx ecx,word[size_y_var] - sub ecx,4 - cmp ebx,edx - jge .skip - cmp eax,ecx - jge .skip + .l: + push ecx + cmp [culling_flag],1 ; (if culling_flag = 1) + jne .no_culling + mov edi,ecx ; ********************************* + lea edi,[edi*3] + shl edi,2 + add edi,[points_normals_rot_ptr] + bt dword[edi+8],31 + jnc .skip + .no_culling: + mov esi,ecx + lea esi,[esi*3] + add esi,esi + add esi,[points_translated_ptr] + movd xmm2,[esi] + movd xmm3,[esi] + pcmpgtw xmm2,xmm0 + pcmpgtw xmm3,xmm1 + pxor xmm3,xmm2 + movd eax,xmm3 + cmp eax,-1 + jne .skip - movzx edx,word[size_x_var] - ; sub ebx,3 - ; sub eax,3 - imul eax,edx - add eax,ebx - push eax - lea edi,[eax*3] - cmp .dr_model,12 - jl @f - add edi,[esp] - @@: - add esp,4 - lea eax,[eax*2] - ; draw bar 6x6 - add edi,[screen_ptr] - add eax,dword[vertices_index_ptr] - - - - - mov edx,[.counter] - mov ecx,6 - - .oop: - push ecx - mov ecx,6 - - .do: - mov word[edi],0x0000 ;ax - mov byte[edi+2],0xff ;al - mov word[eax],dx - add eax,2 - cmp .dr_model,12 - jl @f - add edi,4 - loop .do - jmp .ad - @@: - add edi,3 - loop .do - .ad: - add edi,[.xres3m18] - add eax,[.xres2m12] - pop ecx - loop .oop - - .skip: - pop esi - add esi,6 - inc dword[.counter] - mov ecx,[.counter] - cmp ecx,[points_count_var] - jng .loop - - mov esp,ebp - pop ebp + movzx eax,word[esi] + movzx ebx,word[esi+2] + sub eax,2 + sub ebx,2 + movzx edx, word[size_x_var] + imul ebx,edx + add ebx,eax + mov edi,ebx + imul ebx,.fac + shl edi,2 + add ebx,[screen_ptr] + add edi,[vertices_index_ptr] + mov eax,ecx + cld + mov ecx,6 + .l2: + push ecx + mov ecx,6 ; draw bar + .l1: + mov word[ebx],0 + mov byte[ebx+2],0xff + stosd + add ebx,.fac + loop .l1 + add ebx,[.xplus_scr] + add edi,[.xplus_index] + pop ecx + loop .l2 + .skip: + pop ecx + inc ecx + cmp ecx,[points_count_var] + jna .l + mov esp,ebp + pop ebp ret + fill_Z_buffer: mov eax,0x70000000 cmp [dr_flag],11 @@ -3033,11 +3079,7 @@ read_tp_variables: ; read [triangles_count_var] and [points_count_va xor ebp,ebp mov [points_count_var],ebx mov [triangles_count_var],ebx - if USE_LFN = 0 - mov esi,SourceFile - else mov esi,[fptr] - end if cmp [esi],word 4D4Dh je @f ;Must be legal .3DS file @@ -3096,11 +3138,13 @@ read_tp_variables: ; read [triangles_count_var] and [points_count_va mov edx,ecx add esi,8 @@: - - add ebx,6 - add esi,12 + lea ecx,[ecx*3] + add ecx,ecx + add ebx,ecx + add ecx,ecx + add esi,ecx ; dec ecx - loop @b + ; loop @b @@: @@: @@ -3114,9 +3158,11 @@ read_tp_variables: ; read [triangles_count_var] and [points_count_va add esi,8 @@: - add esi,8 - dec ecx - jnz @b + shl ecx,3 + add esi,ecx + ; dec ecx + ; jnz @b + ; loop @b ; xor ecx,ecx add ebp,edx jmp .find4k @@ -3198,8 +3244,9 @@ read_from_file: add ebx,6 add esi,12 - dec ecx - jnz @b + ; dec ecx + ; jnz @b + loop @b @@: ; mov dword[points+ebx],-1 push edi @@ -3227,14 +3274,17 @@ read_from_file: add dword[edi-8],ebp add dword[edi-4],ebp add esi,8 - dec ecx - jnz @b + ; dec ecx + ; jnz @b + loop @b add ebp,edx jmp .find4k mov eax,-1 ;<---mark if OK .exit: mov dword[edi],-1 ret + + alloc_mem_for_tp: mov eax, 68 cmp [re_alloc_flag],1 @@ -3265,7 +3315,7 @@ alloc_mem_for_tp: mov eax, 68 mov ecx, [triangles_count_var] - lea ecx, [3+ecx*3] + lea ecx, [6+ecx*3] shl ecx, 2 mov edx,[triangles_normals_ptr] int 0x40 ; -> allocate memory for triangles normals @@ -3274,7 +3324,7 @@ alloc_mem_for_tp: mov eax, 68 mov ecx, [points_count_var] - lea ecx,[3+ecx*3] + lea ecx,[6+ecx*3] shl ecx, 2 mov edx,[points_normals_ptr] int 0x40 @@ -3284,13 +3334,14 @@ alloc_mem_for_tp: mov eax, 68 ; mov ebx, 12 mov ecx, [points_count_var] - lea ecx,[3+ecx*3] + lea ecx,[10+ecx*3] shl ecx, 2 mov edx,[points_normals_rot_ptr] int 0x40 mov [points_normals_rot_ptr], eax mov eax, 68 + mov edx,[points_ptr] int 0x40 mov [points_ptr], eax @@ -3304,13 +3355,14 @@ alloc_mem_for_tp: mov ebx, 12 mov ecx, [points_count_var] shl ecx,2 + add ecx,32 mov edx,[tex_points_ptr] int 0x40 mov [tex_points_ptr], eax mov eax, 68 mov ecx, [points_count_var] - inc ecx + add ecx,10 shl ecx, 3 mov edx,[points_translated_ptr] int 0x40 @@ -3417,7 +3469,7 @@ write_info: mov bx,[size_x_var] shl ebx,16 add ebx,120*65536+70 ; [x start] *65536 + [y start] - mov ecx,30 shl 16 + 100 + mov ecx,30 shl 16 + 150 xor edx,edx int 0x40 @@ -3467,7 +3519,7 @@ write_info: int 40h pop esi add esi,4 - cmp esi,12 + cmp esi,16 jnz .nxxx ret ; *********************************************