diff --git a/programs/demos/view3ds/3dmath.inc b/programs/demos/view3ds/3dmath.inc index 5a52eb5b0a..af467e4ebc 100644 --- a/programs/demos/view3ds/3dmath.inc +++ b/programs/demos/view3ds/3dmath.inc @@ -4,13 +4,108 @@ z3d equ 4 vec_x equ 0 vec_y equ 4 vec_z equ 8 -; 3d point - triple integer word coordinate -; vector - triple float dword coordinate -;----------------------in: -------------------------------- -;------------------------ esi - pointer to 1st 3d point --- -;------------------------ edi - pointer to 2nd 3d point --- -;------------------------ ebx - pointer to result vector -- -;---------------------- out : none ------------------------ + +if 0 ; Ext >= SSE3 +calc_bounding_box: +; in: +; xmm0 - normal vector of ray +; xmm1 - light origin +; out: +; eax - axis aligned bounding boxes bit mask + + .rmx equ [ebp-36] + .nray equ [ebp-64] + .origin equ [ebp-80] + .dirfrac equ [ebp-96] + .nrayr equ [ebp-112] + .originr equ [ebp-128] + .tmin equ [ebp-132] + .tmax equ [ebp-136] + + + push ebp + mov ebp,esp + and ebp,-16 + sub esp,160 + + movss xmm5,[rsscale] + shufps xmm5,xmm1,0 + movd xmm2,[vect_x] + punpcklwd xmm2,[the_zero] + cvtdq2ps xmm2,xmm2 + subps xmm1,xmm2 + movaps .origin,xmm1 + mulps xmm0,xmm5 + movaps .nray,xmm0 + + mov esi,matrix + lea edi,.rmx + call reverse_mx_3x3 + +; in: esi - ptr to points(normals], each point(normal) coeficient as dword +; edi - ptr to rotated points(normals) +; ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix +; ecx - number of points(normals) + + ; reverse transform + lea esi,.nray + lea edi,.nrayr + lea ebx,.rmx + mov ecx,1 + call rotary + + lea esi,.origin + lea edi,.originr + lea ebx,.rmx + mov ecx,1 + call rotary + + xor ecx,ecx + mov ebx,aabb1 + xor eax,eax + rcpps xmm7,.nrayr + movaps .dirfrac,xmm7 + + .nx_aabb: + movaps xmm5,[ebx] + movaps xmm6,[ebx] + minps xmm5,[the_zero] + maxps xmm6,[the_zero] + ; xmm5 - lb corner of AABB with minimal coordinates + ; xmm6 - rt cor. of AABB wit maximum coords + subps xmm5,.originr + subps xmm6,.originr + mulps xmm5,.dirfrac ; xmm5 - tx1, ty1 + mulps xmm6,.dirfrac ; xmm6 - tx2, ty2 + movaps xmm1,xmm6 + movaps xmm2,xmm6 + + + minps xmm1,xmm5 + maxps xmm2,xmm5 + + movaps xmm5,xmm1 + movaps xmm6,xmm2 + shufps xmm5,xmm5,11100001b + shufps xmm6,xmm6,11100001b + maxss xmm1,xmm5 ;t min + minss xmm2,xmm6 ;t max + comiss xmm2,xmm1 + jb .no_inter + .yes: + bts eax,ecx + .no_inter: + add ebx,16 + inc ecx + cmp ecx,8 + jne .nx_aabb + +; out: eax - bit mask + add esp,160 + pop ebp +ret +end if + reverse_mx_3x3: ; esi - source matrix ; edi - desired reversed matrix @@ -141,6 +236,13 @@ reverse_mx_3x3: mov esp,ebp pop ebp ret +; 3d point - triple integer word coordinate +; vector - triple float dword coordinate +;----------------------in: -------------------------------- +;------------------------ esi - pointer to 1st 3d point --- +;------------------------ edi - pointer to 2nd 3d point --- +;------------------------ ebx - pointer to result vector -- +;---------------------- out : none ------------------------ make_vector_r: if Ext < SSE2 @@ -194,17 +296,37 @@ cross_product: fsubp ;st1 ,st fstp dword [ebx+vec_z] ret +cross_aligned: + movaps xmm0,[esi] + movaps xmm1,[esi] + movaps xmm2,[edi] + movaps xmm3,[edi] + shufps xmm0,xmm0,00001001b + shufps xmm1,xmm1,00010010b + shufps xmm2,xmm2,00010010b + shufps xmm3,xmm3,00001001b + mulps xmm0,xmm2 + mulps xmm1,xmm3 + subps xmm0,xmm1 + movaps [ebx],xmm0 +ret ;----------------------- in: ------------------------------ ;---------------------------- edi - pointer to vector ----- ;----------------------- out : none normalize_vector: -if Ext >= SSE3 +if Ext >= SSE2 movups xmm0,[edi] andps xmm0,[zero_hgst_dd] movups xmm1,xmm0 mulps xmm0,xmm0 - haddps xmm0,xmm0 - haddps xmm0,xmm0 + movhlps xmm2,xmm0 + addps xmm0,xmm2 + movaps xmm2,xmm0 + shufps xmm2,xmm2,11100101b + addps xmm0,xmm2 + shufps xmm0,xmm0,0 +; haddps xmm0,xmm0 +; haddps xmm0,xmm0 rsqrtps xmm0,xmm0 mulps xmm0,xmm1 movlps [edi],xmm0 @@ -559,7 +681,7 @@ translate_points: ; just convert into integer; z coord still needed ; packsdw xmm0,xmm0 ; movq [edi] fld dword[esi] - fiadd [vect_x] + fiadd word[vect_x] fistp word[edi] fld dword[esi+4] fiadd [vect_y] diff --git a/programs/demos/view3ds/3glass.inc b/programs/demos/view3ds/3glass.inc index e4e0d495cf..293cbf3881 100644 --- a/programs/demos/view3ds/3glass.inc +++ b/programs/demos/view3ds/3glass.inc @@ -1,5 +1,5 @@ ; Glass like rendering triangle by Maciej Guba. -; http://macgub.hekko.pl, macgub3@wp.pl +; http://macgub.co.pl, macgub3@wp.pl ROUND2 equ 10 glass_tri: diff --git a/programs/demos/view3ds/3glass_tex.inc b/programs/demos/view3ds/3glass_tex.inc index 549d80cf75..d78fd986d6 100644 --- a/programs/demos/view3ds/3glass_tex.inc +++ b/programs/demos/view3ds/3glass_tex.inc @@ -1,762 +1,762 @@ -; Bilinear filtering, real Phongs shading and glass like parallel. -; Thanks to authors of 3dica tutorial. -; Implemented in FASM by Maciej Guba. -; http://macgub.j.pl - -ROUND2 equ 10 - -glass_tex_tri: -;----Procedure render Phongs shaded triangle with z coord -;----interpolation ( Catmull alghoritm ), each pixel is - -;----covered by texture using bilinear filtering.-------- -;----I normalize normal vector in every pixel ----------- -;------------------in - eax - x1 shl 16 + y1 ------------ -;---------------------- ebx - x2 shl 16 + y2 ------------ -;---------------------- ecx - x3 shl 16 + y3 ------------ -;---------------------- esi - pointer to stencil buffer-- -;---------------------- filled with dd float variables- -;---------------------- edi - pointer to screen buffer--- -;---------------------- edx - pointer to texture--------- -;---------------------- xmm0 - 1st normal vector -------- -;---------------------- xmm1 - 2cond normal vector ------ -;---------------------- xmm2 - 3rd normal vector -------- -;---------------------- xmm3 - normalized light vector -- -;---------------------- xmm4 - lo -> hi z1, z2, z3 coords -;---------------------- as dwords floats --------------- -;---------------------- xmm5 - lo -> hi y_min, y_max, --- -;---------------------- x_min, x_max as dword integers - -;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, -- -;---------------------- ty2, tx3, ty3 as word, xres as-- -;---------------------- dword integers------------------ -;---------------------- stack - no parameters ----------- -;-------------------------------------------------------- -;----------------- procedure don't save registers !! ---- - - - - - push ebp - mov ebp,esp - sub esp,512 - sub ebp,16 - and ebp,0xfffffff0 - - .1_nv equ [ebp-16] - .2_nv equ [ebp-32] - .3_nv equ [ebp-48] - .l_v equ [ebp-64] - .z3 equ [ebp-72] - .z2 equ [ebp-76] - .z1 equ [ebp-80] - .x1 equ [ebp-82] - .y1 equ [ebp-84] - .x2 equ [ebp-86] - .y2 equ [ebp-88] - .x3 equ [ebp-90] - .y3 equ [ebp-92] - .Zbuf equ [ebp-96] - .x_max equ [ebp-100] - .x_min equ [ebp-104] - .y_max equ [ebp-108] - .y_min equ [ebp-112] - .screen equ [ebp-116] - .dx12 equ [ebp-120] - .dx13 equ [ebp-124] - .dx23 equ [ebp-128] - .dn12 equ [ebp-144] - .dn13 equ [ebp-160] - .dn23 equ [ebp-176] - - .cnv1 equ [ebp-192] ; cur normal vectors - .cnv2 equ [ebp-208] - .x_res equ [ebp-212] - .ty3 equ [ebp-214] - .tx3 equ [ebp-216] - .ty2 equ [ebp-218] - .tx2 equ [ebp-220] - .ty1 equ [ebp-222] - .tx1 equ [ebp-224] - .dz12 equ [ebp-232] - .dty12 equ [ebp-236] - .dtx12 equ [ebp-240] - .dz13 equ [ebp-248] - .dty13 equ [ebp-252] - .dtx13 equ [ebp-256] - .dz23 equ [ebp-264] - .dty23 equ [ebp-268] - .dtx23 equ [ebp-272] - .cz1 equ [ebp-280] - .cty1 equ [ebp-284] - .ctx1 equ [ebp-288] - .cz2 equ [ebp-296] - .cty2 equ [ebp-300] - .ctx2 equ [ebp-304] - .tx_ptr equ [ebp-308] - - - emms - ; movd .x_res,xmm7 - .sort3: ; sort triangle coordinates... - cmp ax,bx - jle .sort1 - xchg eax,ebx - shufps xmm4,xmm4,11100001b - shufps xmm6,xmm6,11100001b - movaps xmm7,xmm0 - movaps xmm0,xmm1 - movaps xmm1,xmm7 - - - .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - shufps xmm4,xmm4,11011000b - shufps xmm6,xmm6,11011000b - movaps xmm7,xmm1 - movaps xmm1,xmm2 - movaps xmm2,xmm7 - - jmp .sort3 - - .sort2: - ; movq .tx1,xmm6 - ; pshufd xmm6,xmm6,01001110b - ; movd .tx3,xmm6 - movaps .tx1,xmm6 - movaps .z1,xmm4 - mov .y1,eax - mov .y2,ebx - mov .y3,ecx - - movdqa .y_min,xmm5 -if 1 ; check if at last only fragment - packssdw xmm5,xmm5 ; of triangle is in visable area - pshuflw xmm5,xmm5,11011000b - movdqu xmm7,.y3 - movdqa xmm6,xmm5 - pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min - pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max - movdqa xmm4,xmm7 - pcmpgtw xmm7,xmm5 - pcmpgtw xmm4,xmm6 - pxor xmm7,xmm4 - pmovmskb eax,xmm7 - and eax,0x00aaaaaa - or eax,eax - jz .rpt_loop2_end -end if - movaps .1_nv,xmm0 - movaps .2_nv,xmm1 - movaps .3_nv,xmm2 - movaps .l_v,xmm3 - mov .Zbuf,esi - mov .screen,edi - mov .tx_ptr,edx - - - - mov bx,.y2 ; calc deltas - sub bx,.y1 - jnz .rpt_dx12_make - - xorps xmm7,xmm7 - mov dword .dx12,0 - movaps .dtx12,xmm7 - movaps .dn12,xmm7 - jmp .rpt_dx12_done - - .rpt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND2 - cdq - idiv ebx - mov .dx12,eax - - cvtsi2ss xmm6,ebx - shufps xmm6,xmm6,0 - movss xmm5,.z2 - subss xmm5,.z1 - divss xmm5,xmm6 - movss .dz12,xmm5 - - movd xmm0,.tx1 - movd xmm2,.tx2 - pxor xmm1,xmm1 - punpcklwd xmm0,xmm1 - punpcklwd xmm2,xmm1 - psubd xmm2,xmm0 - ; cvtdq2ps xmm0,xmm0 - cvtdq2ps xmm2,xmm2 -; movlps .ctx1,xmm0 -; movlps .ctx2,xmm2 - ; subps xmm2,xmm0 - divps xmm2,xmm6 - movlps .dtx12,xmm2 - - movaps xmm0,.2_nv - subps xmm0,.1_nv - divps xmm0,xmm6 - movaps .dn12,xmm0 - - - .rpt_dx12_done: - - mov bx,.y3 ; calc deltas - sub bx,.y1 - jnz .rpt_dx13_make - - xorps xmm7,xmm7 - mov dword .dx13,0 - movaps .dtx13,xmm7 - movaps .dn13,xmm7 - jmp .rpt_dx13_done - - .rpt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND2 - cdq - idiv ebx - mov .dx13,eax - - - cvtsi2ss xmm6,ebx - shufps xmm6,xmm6,0 - - movss xmm5,.z3 - subss xmm5,.z1 - divss xmm5,xmm6 - movss .dz13,xmm5 - - movd xmm0,.tx1 - movd xmm2,.tx3 - pxor xmm1,xmm1 - punpcklwd xmm0,xmm1 - punpcklwd xmm2,xmm1 - psubd xmm2,xmm0 - ; cvtdq2ps xmm0,xmm0 - cvtdq2ps xmm2,xmm2 - ; subps xmm2,xmm0 - divps xmm2,xmm6 - movlps .dtx13,xmm2 - - - - movaps xmm0,.3_nv - subps xmm0,.1_nv - divps xmm0,xmm6 - movaps .dn13,xmm0 - - .rpt_dx13_done: - - mov bx,.y3 ; calc deltas - sub bx,.y2 - jnz .rpt_dx23_make - - xorps xmm7,xmm7 - mov dword .dx23,0 - movaps .dtx23,xmm7 - movaps .dn23,xmm7 - jmp .rpt_dx23_done - - .rpt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND2 - cdq - idiv ebx - mov .dx23,eax - - cvtsi2ss xmm6,ebx - shufps xmm6,xmm6,0 - movss xmm5,.z3 - subss xmm5,.z2 - divss xmm5,xmm6 - movss .dz23,xmm5 - - movd xmm0,.tx2 - movd xmm2,.tx3 - pxor xmm1,xmm1 - punpcklwd xmm0,xmm1 - punpcklwd xmm2,xmm1 - psubd xmm2,xmm0 - ; cvtdq2ps xmm0,xmm0 - cvtdq2ps xmm2,xmm2 -; movlps .ctx1,xmm0 -; movlps .ctx2,xmm2 - ; subps xmm2,xmm0 - divps xmm2,xmm6 - movlps .dtx23,xmm2 - - - - - movaps xmm0,.3_nv - subps xmm0,.2_nv - divps xmm0,xmm6 - movaps .dn23,xmm0 - - .rpt_dx23_done: - - movsx eax,word .x1 - shl eax,ROUND2 - mov ebx,eax - mov edx,.z1 - movd xmm1,.tx1 - pxor xmm2,xmm2 - punpcklwd xmm1,xmm2 - cvtdq2ps xmm1,xmm1 - - mov .cz1,edx - mov .cz2,edx - movaps xmm0,.1_nv - movlps .ctx1,xmm1 - movlps .ctx2,xmm1 - movaps .cnv1,xmm0 - movaps .cnv2,xmm0 - - ; mov edx,.dx13 - ; cmp edx,.dx12 - ; jg .second_cause - - movsx ecx,word .y1 - cmp cx,.y2 - - jge .rpt_loop1_end - - .rpt_loop1: - pushad - - movaps xmm2,.y_min - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - ; movlps xmm3,.cz1 ; cz1, cz2 both - movaps xmm3,.ctx1 - movaps xmm5,.ctx2 - movaps xmm4,.l_v - movd xmm6,.x_res - sar ebx,ROUND2 - sar eax,ROUND2 - mov edx,.tx_ptr - mov edi,.screen - - mov esi,.Zbuf - - call glass_tex_line - - popad - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - ; movss xmm2,.cz1 - ; movss xmm3,.cz2 - movaps xmm2,.ctx1 - movaps xmm3,.ctx2 - addps xmm0,.dn13 - addps xmm1,.dn12 - addps xmm2,.dtx13 - addps xmm3,.dtx12 - add eax,.dx13 - add ebx,.dx12 - - movaps .cnv1,xmm0 - movaps .cnv2,xmm1 - ; movss .cz1,xmm2 - ; movss .cz2,xmm3 - movaps .ctx1,xmm2 - movaps .ctx2,xmm3 - add ecx,1 - cmp cx,.y2 - jl .rpt_loop1 - - - ; jmp .rpt_loop2_end - - - .rpt_loop1_end: - movsx ecx,word .y2 - cmp cx,.y3 - jge .rpt_loop2_end - - movsx ebx,word .x2 ; eax - cur x1 - shl ebx,ROUND2 ; ebx - cur x2 - push dword .z2 - pop dword .cz2 - movd xmm1,.tx2 - pxor xmm2,xmm2 - punpcklwd xmm1,xmm2 - cvtdq2ps xmm1,xmm1 - movlps .ctx2,xmm1 - movaps xmm0,.2_nv - movaps .cnv2,xmm0 - - - .rpt_loop2: - pushad - - movaps xmm2,.y_min - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - movaps xmm3,.ctx1 - movaps xmm5,.ctx2 - movaps xmm4,.l_v - sar ebx,ROUND2 - sar eax,ROUND2 - mov edx,.tx_ptr - mov edi,.screen - mov esi,.Zbuf - movd xmm6,.x_res - call glass_tex_line - - popad - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - ; movss xmm2,.cz1 - ; movss xmm3,.cz2 - movaps xmm2,.ctx1 - movaps xmm3,.ctx2 - addps xmm0,.dn13 - addps xmm1,.dn23 - ; addss xmm2,.dz13 - ; addss xmm3,.dz23 - addps xmm2,.dtx13 - addps xmm3,.dtx23 - - add eax,.dx13 - add ebx,.dx23 - - movaps .cnv1,xmm0 - movaps .cnv2,xmm1 - movaps .ctx1,xmm2 - movaps .ctx2,xmm3 - - ; movss .cz1,xmm2 - ; movss .cz2,xmm3 - - add ecx,1 - cmp cx,.y3 - jl .rpt_loop2 - - .second_cause: ;dx13 > dx12 - - .rpt_loop2_end: - - add esp,512 - pop ebp - -ret -align 16 -glass_tex_line: -; in: -; xmm0 - normal vector 1 -; xmm1 - normal vect 2 -; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float -; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float -; xmm2 - lo -> hi y_min, y_max, x_min, x_max -; as dword integers -; xmm4 - normalized light vector -; eax - x1 -; ebx - x2 -; ecx - y -; edi - screen buffer -; esi - stencil buffer filled with dd floats -; edx - texture pointer (handle) -; xmm6 - lowest dword x_res as integer - - push ebp - mov ebp,esp - sub esp,350 - sub ebp,16 - and ebp,0xfffffff0 - - .n1 equ [ebp-16] - .n2 equ [ebp-32] - .lv equ [ebp-48] - .lx1 equ [ebp-52] - .lx2 equ [ebp-56] -; .z2 equ [ebp-60] -; .z1 equ [ebp-64] - .screen equ [ebp-68] - .zbuff equ [ebp-72] - .x_max equ [ebp-74] - .x_min equ [ebp-76] - .y_max equ [ebp-78] - .y_min equ [ebp-80] - .dn equ [ebp-96] - .x_res equ [ebp-100] - .y equ [ebp-104] - .cnv equ [ebp-128] - .z1 equ [ebp-136] - .ty1 equ [ebp-140] - .tx1 equ [ebp-144] - .z2 equ [ebp-152] - .ty2 equ [ebp-156] - .tx2 equ [ebp-160] - .cz equ [ebp-168] - .cty equ [ebp-172] - .ctx equ [ebp-176] - .dz equ [ebp-184] - .dty equ [ebp-188] - .dtx equ [ebp-192] - .yd equ [ebp-196] - .xd equ [ebp-200] - .yf equ [ebp-204] - .xf equ [ebp-208] - .w4 equ [ebp-212] - .w3 equ [ebp-216] - .w2 equ [ebp-220] - .w1 equ [ebp-224] - .p4 equ [ebp-228] - .p3 equ [ebp-232] - .p2 equ [ebp-236] - .p1 equ [ebp-240] - - - .tx_ptr equ [ebp-244] - - ; movaps xmm7,xmm3 - ; movaps xmm3,xmm5 - ; movaps xmm5,xmm7 - - - mov .y,ecx - packssdw xmm2,xmm2 - ; movaps xmm7,xmm2 - ; movhps xmm2,[the_zero] - ; pshuflw xmm2,xmm2,11111000b - ; pshufd xmm2,xmm2,11111100b - ; movlps xmm7,[the_zero] - ; pshufhw xmm7,xmm7,11111111b - ; movlps xmm7,[the_zero] - ; psrldq xmm7,4 - ; por xmm2,xmm7 - movq .y_min,xmm2 - cmp cx,.y_min - jl .end_line - cmp cx,.y_max - jge .end_line ; - - cmp eax,ebx - je .end_line - jl @f - xchg eax,ebx - movaps xmm7,xmm0 - movaps xmm0,xmm1 - movaps xmm1,xmm7 - movaps xmm7,xmm3 - movaps xmm3,xmm5 - movaps xmm5,xmm7 - @@: - - cmp ax,.x_max - jge .end_line - cmp bx,.x_min - jle .end_line - movaps .lv,xmm4 - movaps .n1,xmm0 - movaps .n2,xmm1 - mov .lx1,eax - mov .lx2,ebx - movaps .tx1,xmm3 - movaps .tx2,xmm5 - movd .x_res,xmm6 - mov .tx_ptr,edx - sub ebx,eax - cvtsi2ss xmm7,ebx - shufps xmm7,xmm7,0 - subps xmm1,xmm0 - divps xmm1,xmm7 - movaps .dn,xmm1 - subps xmm5,xmm3 - divps xmm5,xmm7 - movaps .dtx,xmm5 - - - - mov ebx,.lx1 - cmp bx,.x_min ; clipping on function4 - jge @f - movzx eax,word .x_min - sub eax,ebx - cvtsi2ss xmm7,eax - shufps xmm7,xmm7,0 - mulps xmm5,xmm7 - mulps xmm1,xmm7 - addps xmm5,.tx1 - addps xmm1,.n1 - movsx eax,word .x_min - movaps .tx1,xmm5 - movaps .n1,xmm1 - mov dword .lx1,eax - - @@: - movzx eax,word .x_max - cmp .lx2,eax - jl @f - mov .lx2,eax - @@: - mov eax,.x_res - mul dword .y - add eax,.lx1 - shl eax,2 - add edi,eax - add esi,eax - - mov ecx,.lx2 - sub ecx,.lx1 - ; movaps xmm0,.n1 - movaps xmm2,.tx1 - ; xorps xmm1,xmm1 -align 16 - .ddraw: - ; movhlps xmm7,xmm2 - ; cmpnltss xmm7,dword[esi] - ; movd eax,xmm7 - ; or eax,eax - ; jnz .skip - xorps xmm5,xmm5 - ; movhlps xmm7,xmm2 - ; movss [esi],xmm7 - movaps xmm7,.n1 ;xmm0 - mulps xmm7,xmm7 ; normalize - haddps xmm7,xmm7 - haddps xmm7,xmm7 - rsqrtps xmm7,xmm7 - mulps xmm7,.n1 ;xmm0 - ; andps xmm7,[abs_z_coof] - movaps .cnv,xmm7 - - movaps xmm6,xmm2 - minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2 - cvttps2dq xmm7,xmm6 - cvtdq2ps xmm4,xmm7 - subps xmm6,xmm4 - movlps .xf,xmm6 - ; movaps xmm5,.lv - mov eax,lights_aligned ; global - align 16 - .again_col: - movaps xmm0,[eax] ; calc multple lights - mulps xmm0,.cnv ;.lv ; last dword should be zeroed - haddps xmm0,xmm0 - haddps xmm0,xmm0 - ; andps xmm0,[abs_val] ;calc absolute value -if 1 - ; stencil - movhlps xmm6,xmm2 - movhlps xmm4,xmm2 - addss xmm6,[aprox] - subss xmm4,[aprox] - cmpnltss xmm6,dword[esi] - cmpnltss xmm4,dword[esi] - xorps xmm6,xmm4 - xorps xmm4,xmm4 - movd ebx,xmm6 - cmp ebx,-1 - jne .no_reflective -end if - movaps xmm4,xmm0 - mulps xmm4,xmm4 - mulps xmm4,xmm4 - mulps xmm4,xmm4 - mulps xmm4,xmm4 - mulps xmm4,[eax+48] - - .no_reflective: - maxps xmm0,[the_zero] - ; movaps xmm1,xmm0 - mulps xmm0,[eax+16] - addps xmm4,xmm0 - addps xmm4,[eax+32] - maxps xmm5,xmm4 - add eax,64 - cmp eax,lights_aligned_end - jnz .again_col - minps xmm5,[mask_255f] - - ; texture coords work - movd eax,xmm7 - psrldq xmm7,4 - movd ebx,xmm7 - shl ebx,TEX_SHIFT - add eax,ebx - lea eax,[eax*3] - add eax,.tx_ptr - mov ebx,eax - add ebx,TEX_X*3 - movd xmm7,[eax] - movd xmm6,[eax+3] - movd xmm4,[ebx] - movd xmm3,[ebx+3] - punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2 - punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4 - punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ... - movdqa xmm6,xmm7 - movdqa xmm4,xmm7 - psrldq xmm6,4 - psrldq xmm4,8 - - punpcklbw xmm7,[the_zero] ; broadcasted 0 - punpcklbw xmm6,[the_zero] - punpcklbw xmm4,[the_zero] - punpcklwd xmm7,[the_zero] - punpcklwd xmm6,[the_zero] - punpcklwd xmm4,[the_zero] - - - ; calc w ......... - movlps xmm3,[the_one] ; broadcasted dword 1.0 - cvtdq2ps xmm7,xmm7 - subps xmm3,.xf - cvtdq2ps xmm6,xmm6 - movhps xmm3,.xf - cvtdq2ps xmm4,xmm4 - movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf - shufps xmm3,xmm3,10001000b - shufps xmm1,xmm1,11110101b - mulps xmm3,xmm1 - - mulps xmm7,xmm3 - mulps xmm6,xmm3 - mulps xmm4,xmm3 - haddps xmm7,xmm7 ; r - haddps xmm6,xmm6 ; g - haddps xmm4,xmm4 ; b - haddps xmm7,xmm7 ; r - haddps xmm6,xmm6 ; g - haddps xmm4,xmm4 ; b - movlhps xmm7,xmm6 - shufps xmm7,xmm7,11101000b - movlhps xmm7,xmm4 - - mulps xmm5,xmm7 - cvtps2dq xmm5,xmm5 - psrld xmm5,8 - movd xmm6,[edi] - packssdw xmm5,xmm5 - packuswb xmm5,xmm5 - paddusb xmm5,xmm6 - movd [edi],xmm5 - .skip: - add edi,4 - add esi,4 - ; addps xmm0,.dn - movaps xmm0,.n1 ; cur normal - addps xmm0,.dn - addps xmm2,.dtx - movaps .n1,xmm0 - sub ecx,1 - jnz .ddraw - - .end_line: - add esp,350 - pop ebp - -ret +; Bilinear filtering, real Phongs shading and glass like parallel. +; Thanks to authors of 3dica tutorial. +; Implemented in FASM by Maciej Guba. +; http://macgub.co.pl + +ROUND2 equ 10 + +glass_tex_tri: +;----Procedure render Phongs shaded triangle with z coord +;----interpolation ( Catmull alghoritm ), each pixel is - +;----covered by texture using bilinear filtering.-------- +;----I normalize normal vector in every pixel ----------- +;------------------in - eax - x1 shl 16 + y1 ------------ +;---------------------- ebx - x2 shl 16 + y2 ------------ +;---------------------- ecx - x3 shl 16 + y3 ------------ +;---------------------- esi - pointer to stencil buffer-- +;---------------------- filled with dd float variables- +;---------------------- edi - pointer to screen buffer--- +;---------------------- edx - pointer to texture--------- +;---------------------- xmm0 - 1st normal vector -------- +;---------------------- xmm1 - 2cond normal vector ------ +;---------------------- xmm2 - 3rd normal vector -------- +;---------------------- xmm3 - normalized light vector -- +;---------------------- xmm4 - lo -> hi z1, z2, z3 coords +;---------------------- as dwords floats --------------- +;---------------------- xmm5 - lo -> hi y_min, y_max, --- +;---------------------- x_min, x_max as dword integers - +;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, -- +;---------------------- ty2, tx3, ty3 as word, xres as-- +;---------------------- dword integers------------------ +;---------------------- stack - no parameters ----------- +;-------------------------------------------------------- +;----------------- procedure don't save registers !! ---- + + + + + push ebp + mov ebp,esp + sub esp,512 + sub ebp,16 + and ebp,0xfffffff0 + + .1_nv equ [ebp-16] + .2_nv equ [ebp-32] + .3_nv equ [ebp-48] + .l_v equ [ebp-64] + .z3 equ [ebp-72] + .z2 equ [ebp-76] + .z1 equ [ebp-80] + .x1 equ [ebp-82] + .y1 equ [ebp-84] + .x2 equ [ebp-86] + .y2 equ [ebp-88] + .x3 equ [ebp-90] + .y3 equ [ebp-92] + .Zbuf equ [ebp-96] + .x_max equ [ebp-100] + .x_min equ [ebp-104] + .y_max equ [ebp-108] + .y_min equ [ebp-112] + .screen equ [ebp-116] + .dx12 equ [ebp-120] + .dx13 equ [ebp-124] + .dx23 equ [ebp-128] + .dn12 equ [ebp-144] + .dn13 equ [ebp-160] + .dn23 equ [ebp-176] + + .cnv1 equ [ebp-192] ; cur normal vectors + .cnv2 equ [ebp-208] + .x_res equ [ebp-212] + .ty3 equ [ebp-214] + .tx3 equ [ebp-216] + .ty2 equ [ebp-218] + .tx2 equ [ebp-220] + .ty1 equ [ebp-222] + .tx1 equ [ebp-224] + .dz12 equ [ebp-232] + .dty12 equ [ebp-236] + .dtx12 equ [ebp-240] + .dz13 equ [ebp-248] + .dty13 equ [ebp-252] + .dtx13 equ [ebp-256] + .dz23 equ [ebp-264] + .dty23 equ [ebp-268] + .dtx23 equ [ebp-272] + .cz1 equ [ebp-280] + .cty1 equ [ebp-284] + .ctx1 equ [ebp-288] + .cz2 equ [ebp-296] + .cty2 equ [ebp-300] + .ctx2 equ [ebp-304] + .tx_ptr equ [ebp-308] + + + emms + ; movd .x_res,xmm7 + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + shufps xmm4,xmm4,11100001b + shufps xmm6,xmm6,11100001b + movaps xmm7,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm7 + + + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + shufps xmm4,xmm4,11011000b + shufps xmm6,xmm6,11011000b + movaps xmm7,xmm1 + movaps xmm1,xmm2 + movaps xmm2,xmm7 + + jmp .sort3 + + .sort2: + ; movq .tx1,xmm6 + ; pshufd xmm6,xmm6,01001110b + ; movd .tx3,xmm6 + movaps .tx1,xmm6 + movaps .z1,xmm4 + mov .y1,eax + mov .y2,ebx + mov .y3,ecx + + movdqa .y_min,xmm5 +if 1 ; check if at last only fragment + packssdw xmm5,xmm5 ; of triangle is in visable area + pshuflw xmm5,xmm5,11011000b + movdqu xmm7,.y3 + movdqa xmm6,xmm5 + pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min + pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max + movdqa xmm4,xmm7 + pcmpgtw xmm7,xmm5 + pcmpgtw xmm4,xmm6 + pxor xmm7,xmm4 + pmovmskb eax,xmm7 + and eax,0x00aaaaaa + or eax,eax + jz .rpt_loop2_end +end if + movaps .1_nv,xmm0 + movaps .2_nv,xmm1 + movaps .3_nv,xmm2 + movaps .l_v,xmm3 + mov .Zbuf,esi + mov .screen,edi + mov .tx_ptr,edx + + + + mov bx,.y2 ; calc deltas + sub bx,.y1 + jnz .rpt_dx12_make + + xorps xmm7,xmm7 + mov dword .dx12,0 + movaps .dtx12,xmm7 + movaps .dn12,xmm7 + jmp .rpt_dx12_done + + .rpt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx12,eax + + cvtsi2ss xmm6,ebx + shufps xmm6,xmm6,0 + movss xmm5,.z2 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz12,xmm5 + + movd xmm0,.tx1 + movd xmm2,.tx2 + pxor xmm1,xmm1 + punpcklwd xmm0,xmm1 + punpcklwd xmm2,xmm1 + psubd xmm2,xmm0 + ; cvtdq2ps xmm0,xmm0 + cvtdq2ps xmm2,xmm2 +; movlps .ctx1,xmm0 +; movlps .ctx2,xmm2 + ; subps xmm2,xmm0 + divps xmm2,xmm6 + movlps .dtx12,xmm2 + + movaps xmm0,.2_nv + subps xmm0,.1_nv + divps xmm0,xmm6 + movaps .dn12,xmm0 + + + .rpt_dx12_done: + + mov bx,.y3 ; calc deltas + sub bx,.y1 + jnz .rpt_dx13_make + + xorps xmm7,xmm7 + mov dword .dx13,0 + movaps .dtx13,xmm7 + movaps .dn13,xmm7 + jmp .rpt_dx13_done + + .rpt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx13,eax + + + cvtsi2ss xmm6,ebx + shufps xmm6,xmm6,0 + + movss xmm5,.z3 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz13,xmm5 + + movd xmm0,.tx1 + movd xmm2,.tx3 + pxor xmm1,xmm1 + punpcklwd xmm0,xmm1 + punpcklwd xmm2,xmm1 + psubd xmm2,xmm0 + ; cvtdq2ps xmm0,xmm0 + cvtdq2ps xmm2,xmm2 + ; subps xmm2,xmm0 + divps xmm2,xmm6 + movlps .dtx13,xmm2 + + + + movaps xmm0,.3_nv + subps xmm0,.1_nv + divps xmm0,xmm6 + movaps .dn13,xmm0 + + .rpt_dx13_done: + + mov bx,.y3 ; calc deltas + sub bx,.y2 + jnz .rpt_dx23_make + + xorps xmm7,xmm7 + mov dword .dx23,0 + movaps .dtx23,xmm7 + movaps .dn23,xmm7 + jmp .rpt_dx23_done + + .rpt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx23,eax + + cvtsi2ss xmm6,ebx + shufps xmm6,xmm6,0 + movss xmm5,.z3 + subss xmm5,.z2 + divss xmm5,xmm6 + movss .dz23,xmm5 + + movd xmm0,.tx2 + movd xmm2,.tx3 + pxor xmm1,xmm1 + punpcklwd xmm0,xmm1 + punpcklwd xmm2,xmm1 + psubd xmm2,xmm0 + ; cvtdq2ps xmm0,xmm0 + cvtdq2ps xmm2,xmm2 +; movlps .ctx1,xmm0 +; movlps .ctx2,xmm2 + ; subps xmm2,xmm0 + divps xmm2,xmm6 + movlps .dtx23,xmm2 + + + + + movaps xmm0,.3_nv + subps xmm0,.2_nv + divps xmm0,xmm6 + movaps .dn23,xmm0 + + .rpt_dx23_done: + + movsx eax,word .x1 + shl eax,ROUND2 + mov ebx,eax + mov edx,.z1 + movd xmm1,.tx1 + pxor xmm2,xmm2 + punpcklwd xmm1,xmm2 + cvtdq2ps xmm1,xmm1 + + mov .cz1,edx + mov .cz2,edx + movaps xmm0,.1_nv + movlps .ctx1,xmm1 + movlps .ctx2,xmm1 + movaps .cnv1,xmm0 + movaps .cnv2,xmm0 + + ; mov edx,.dx13 + ; cmp edx,.dx12 + ; jg .second_cause + + movsx ecx,word .y1 + cmp cx,.y2 + + jge .rpt_loop1_end + + .rpt_loop1: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + ; movlps xmm3,.cz1 ; cz1, cz2 both + movaps xmm3,.ctx1 + movaps xmm5,.ctx2 + movaps xmm4,.l_v + movd xmm6,.x_res + sar ebx,ROUND2 + sar eax,ROUND2 + mov edx,.tx_ptr + mov edi,.screen + + mov esi,.Zbuf + + call glass_tex_line + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + ; movss xmm2,.cz1 + ; movss xmm3,.cz2 + movaps xmm2,.ctx1 + movaps xmm3,.ctx2 + addps xmm0,.dn13 + addps xmm1,.dn12 + addps xmm2,.dtx13 + addps xmm3,.dtx12 + add eax,.dx13 + add ebx,.dx12 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + ; movss .cz1,xmm2 + ; movss .cz2,xmm3 + movaps .ctx1,xmm2 + movaps .ctx2,xmm3 + add ecx,1 + cmp cx,.y2 + jl .rpt_loop1 + + + ; jmp .rpt_loop2_end + + + .rpt_loop1_end: + movsx ecx,word .y2 + cmp cx,.y3 + jge .rpt_loop2_end + + movsx ebx,word .x2 ; eax - cur x1 + shl ebx,ROUND2 ; ebx - cur x2 + push dword .z2 + pop dword .cz2 + movd xmm1,.tx2 + pxor xmm2,xmm2 + punpcklwd xmm1,xmm2 + cvtdq2ps xmm1,xmm1 + movlps .ctx2,xmm1 + movaps xmm0,.2_nv + movaps .cnv2,xmm0 + + + .rpt_loop2: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movaps xmm3,.ctx1 + movaps xmm5,.ctx2 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edx,.tx_ptr + mov edi,.screen + mov esi,.Zbuf + movd xmm6,.x_res + call glass_tex_line + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + ; movss xmm2,.cz1 + ; movss xmm3,.cz2 + movaps xmm2,.ctx1 + movaps xmm3,.ctx2 + addps xmm0,.dn13 + addps xmm1,.dn23 + ; addss xmm2,.dz13 + ; addss xmm3,.dz23 + addps xmm2,.dtx13 + addps xmm3,.dtx23 + + add eax,.dx13 + add ebx,.dx23 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movaps .ctx1,xmm2 + movaps .ctx2,xmm3 + + ; movss .cz1,xmm2 + ; movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y3 + jl .rpt_loop2 + + .second_cause: ;dx13 > dx12 + + .rpt_loop2_end: + + add esp,512 + pop ebp + +ret +align 16 +glass_tex_line: +; in: +; xmm0 - normal vector 1 +; xmm1 - normal vect 2 +; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float +; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float +; xmm2 - lo -> hi y_min, y_max, x_min, x_max +; as dword integers +; xmm4 - normalized light vector +; eax - x1 +; ebx - x2 +; ecx - y +; edi - screen buffer +; esi - stencil buffer filled with dd floats +; edx - texture pointer (handle) +; xmm6 - lowest dword x_res as integer + + push ebp + mov ebp,esp + sub esp,350 + sub ebp,16 + and ebp,0xfffffff0 + + .n1 equ [ebp-16] + .n2 equ [ebp-32] + .lv equ [ebp-48] + .lx1 equ [ebp-52] + .lx2 equ [ebp-56] +; .z2 equ [ebp-60] +; .z1 equ [ebp-64] + .screen equ [ebp-68] + .zbuff equ [ebp-72] + .x_max equ [ebp-74] + .x_min equ [ebp-76] + .y_max equ [ebp-78] + .y_min equ [ebp-80] + .dn equ [ebp-96] + .x_res equ [ebp-100] + .y equ [ebp-104] + .cnv equ [ebp-128] + .z1 equ [ebp-136] + .ty1 equ [ebp-140] + .tx1 equ [ebp-144] + .z2 equ [ebp-152] + .ty2 equ [ebp-156] + .tx2 equ [ebp-160] + .cz equ [ebp-168] + .cty equ [ebp-172] + .ctx equ [ebp-176] + .dz equ [ebp-184] + .dty equ [ebp-188] + .dtx equ [ebp-192] + .yd equ [ebp-196] + .xd equ [ebp-200] + .yf equ [ebp-204] + .xf equ [ebp-208] + .w4 equ [ebp-212] + .w3 equ [ebp-216] + .w2 equ [ebp-220] + .w1 equ [ebp-224] + .p4 equ [ebp-228] + .p3 equ [ebp-232] + .p2 equ [ebp-236] + .p1 equ [ebp-240] + + + .tx_ptr equ [ebp-244] + + ; movaps xmm7,xmm3 + ; movaps xmm3,xmm5 + ; movaps xmm5,xmm7 + + + mov .y,ecx + packssdw xmm2,xmm2 + ; movaps xmm7,xmm2 + ; movhps xmm2,[the_zero] + ; pshuflw xmm2,xmm2,11111000b + ; pshufd xmm2,xmm2,11111100b + ; movlps xmm7,[the_zero] + ; pshufhw xmm7,xmm7,11111111b + ; movlps xmm7,[the_zero] + ; psrldq xmm7,4 + ; por xmm2,xmm7 + movq .y_min,xmm2 + cmp cx,.y_min + jl .end_line + cmp cx,.y_max + jge .end_line ; + + cmp eax,ebx + je .end_line + jl @f + xchg eax,ebx + movaps xmm7,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm7 + movaps xmm7,xmm3 + movaps xmm3,xmm5 + movaps xmm5,xmm7 + @@: + + cmp ax,.x_max + jge .end_line + cmp bx,.x_min + jle .end_line + movaps .lv,xmm4 + movaps .n1,xmm0 + movaps .n2,xmm1 + mov .lx1,eax + mov .lx2,ebx + movaps .tx1,xmm3 + movaps .tx2,xmm5 + movd .x_res,xmm6 + mov .tx_ptr,edx + sub ebx,eax + cvtsi2ss xmm7,ebx + shufps xmm7,xmm7,0 + subps xmm1,xmm0 + divps xmm1,xmm7 + movaps .dn,xmm1 + subps xmm5,xmm3 + divps xmm5,xmm7 + movaps .dtx,xmm5 + + + + mov ebx,.lx1 + cmp bx,.x_min ; clipping on function4 + jge @f + movzx eax,word .x_min + sub eax,ebx + cvtsi2ss xmm7,eax + shufps xmm7,xmm7,0 + mulps xmm5,xmm7 + mulps xmm1,xmm7 + addps xmm5,.tx1 + addps xmm1,.n1 + movsx eax,word .x_min + movaps .tx1,xmm5 + movaps .n1,xmm1 + mov dword .lx1,eax + + @@: + movzx eax,word .x_max + cmp .lx2,eax + jl @f + mov .lx2,eax + @@: + mov eax,.x_res + mul dword .y + add eax,.lx1 + shl eax,2 + add edi,eax + add esi,eax + + mov ecx,.lx2 + sub ecx,.lx1 + ; movaps xmm0,.n1 + movaps xmm2,.tx1 + ; xorps xmm1,xmm1 +align 16 + .ddraw: + ; movhlps xmm7,xmm2 + ; cmpnltss xmm7,dword[esi] + ; movd eax,xmm7 + ; or eax,eax + ; jnz .skip + xorps xmm5,xmm5 + ; movhlps xmm7,xmm2 + ; movss [esi],xmm7 + movaps xmm7,.n1 ;xmm0 + mulps xmm7,xmm7 ; normalize + haddps xmm7,xmm7 + haddps xmm7,xmm7 + rsqrtps xmm7,xmm7 + mulps xmm7,.n1 ;xmm0 + ; andps xmm7,[abs_z_coof] + movaps .cnv,xmm7 + + movaps xmm6,xmm2 + minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2 + cvttps2dq xmm7,xmm6 + cvtdq2ps xmm4,xmm7 + subps xmm6,xmm4 + movlps .xf,xmm6 + ; movaps xmm5,.lv + mov eax,lights_aligned ; global + align 16 + .again_col: + movaps xmm0,[eax] ; calc multple lights + mulps xmm0,.cnv ;.lv ; last dword should be zeroed + haddps xmm0,xmm0 + haddps xmm0,xmm0 + ; andps xmm0,[abs_val] ;calc absolute value +if 1 + ; stencil + movhlps xmm6,xmm2 + movhlps xmm4,xmm2 + addss xmm6,[aprox] + subss xmm4,[aprox] + cmpnltss xmm6,dword[esi] + cmpnltss xmm4,dword[esi] + xorps xmm6,xmm4 + xorps xmm4,xmm4 + movd ebx,xmm6 + cmp ebx,-1 + jne .no_reflective +end if + movaps xmm4,xmm0 + mulps xmm4,xmm4 + mulps xmm4,xmm4 + mulps xmm4,xmm4 + mulps xmm4,xmm4 + mulps xmm4,[eax+48] + + .no_reflective: + maxps xmm0,[the_zero] + ; movaps xmm1,xmm0 + mulps xmm0,[eax+16] + addps xmm4,xmm0 + addps xmm4,[eax+32] + maxps xmm5,xmm4 + add eax,64 + cmp eax,lights_aligned_end + jnz .again_col + minps xmm5,[mask_255f] + + ; texture coords work + movd eax,xmm7 + psrldq xmm7,4 + movd ebx,xmm7 + shl ebx,TEX_SHIFT + add eax,ebx + lea eax,[eax*3] + add eax,.tx_ptr + mov ebx,eax + add ebx,TEX_X*3 + movd xmm7,[eax] + movd xmm6,[eax+3] + movd xmm4,[ebx] + movd xmm3,[ebx+3] + punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2 + punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4 + punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ... + movdqa xmm6,xmm7 + movdqa xmm4,xmm7 + psrldq xmm6,4 + psrldq xmm4,8 + + punpcklbw xmm7,[the_zero] ; broadcasted 0 + punpcklbw xmm6,[the_zero] + punpcklbw xmm4,[the_zero] + punpcklwd xmm7,[the_zero] + punpcklwd xmm6,[the_zero] + punpcklwd xmm4,[the_zero] + + + ; calc w ......... + movlps xmm3,[the_one] ; broadcasted dword 1.0 + cvtdq2ps xmm7,xmm7 + subps xmm3,.xf + cvtdq2ps xmm6,xmm6 + movhps xmm3,.xf + cvtdq2ps xmm4,xmm4 + movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf + shufps xmm3,xmm3,10001000b + shufps xmm1,xmm1,11110101b + mulps xmm3,xmm1 + + mulps xmm7,xmm3 + mulps xmm6,xmm3 + mulps xmm4,xmm3 + haddps xmm7,xmm7 ; r + haddps xmm6,xmm6 ; g + haddps xmm4,xmm4 ; b + haddps xmm7,xmm7 ; r + haddps xmm6,xmm6 ; g + haddps xmm4,xmm4 ; b + movlhps xmm7,xmm6 + shufps xmm7,xmm7,11101000b + movlhps xmm7,xmm4 + + mulps xmm5,xmm7 + cvtps2dq xmm5,xmm5 + psrld xmm5,8 + movd xmm6,[edi] + packssdw xmm5,xmm5 + packuswb xmm5,xmm5 + paddusb xmm5,xmm6 + movd [edi],xmm5 + .skip: + add edi,4 + add esi,4 + ; addps xmm0,.dn + movaps xmm0,.n1 ; cur normal + addps xmm0,.dn + addps xmm2,.dtx + movaps .n1,xmm0 + sub ecx,1 + jnz .ddraw + + .end_line: + add esp,350 + pop ebp + +ret diff --git a/programs/demos/view3ds/3r_phg.inc b/programs/demos/view3ds/3r_phg.inc index 708df3794b..b0a5f20908 100644 --- a/programs/demos/view3ds/3r_phg.inc +++ b/programs/demos/view3ds/3r_phg.inc @@ -1,528 +1,528 @@ -; Real Phong's shading implemented if flat assembler -; by Maciej Guba. -; http://macgub.vxm.pl - -ROUND2 equ 10 -real_phong_tri_z: -;----procedure render Phongs shaded triangle with z coord -;----interpolation ( Catmull alghoritm )----------------- -;----I normalize normal vector in every pixel ----------- -;------------------in - eax - x1 shl 16 + y1 ------------ -;---------------------- ebx - x2 shl 16 + y2 ------------ -;---------------------- ecx - x3 shl 16 + y3 ------------ -;---------------------- esi - pointer to Z-buffer filled- -;---------------------- with dd float variables-------- -;---------------------- edi - pointer to screen buffer--- -;---------------------- xmm0 - 1st normal vector -------- -;---------------------- xmm1 - 2cond normal vector ------ -;---------------------- xmm2 - 3rd normal vector -------- -;---------------------- xmm3 - normalized light vector -- -;---------------------- xmm4 - lo -> hi z1, z2, z3 coords -;---------------------- as dwords floats --------------- -;---------------------- xmm5 - lo -> hi y_min, y_max, --- -;---------------------- x_min, x_max as dword integers - -;---------------------- stack - no parameters ----------- -;-------------------------------------------------------- -;----------------- procedure don't save registers !! ---- - - - - - push ebp - mov ebp,esp - sub esp,512 - sub ebp,16 - and ebp,0xfffffff0 - - .1_nv equ [ebp-16] - .2_nv equ [ebp-32] - .3_nv equ [ebp-48] - .l_v equ [ebp-64] - .z3 equ [ebp-72] - .z2 equ [ebp-76] - .z1 equ [ebp-80] - .x1 equ [ebp-82] - .y1 equ [ebp-84] - .x2 equ [ebp-86] - .y2 equ [ebp-88] - .x3 equ [ebp-90] - .y3 equ [ebp-92] - .Zbuf equ [ebp-96] - .x_max equ [ebp-100] - .x_min equ [ebp-104] - .y_max equ [ebp-108] - .y_min equ [ebp-112] - .screen equ [ebp-116] - .dx12 equ [ebp-120] - .dx13 equ [ebp-124] - .dx23 equ [ebp-128] - .dn12 equ [ebp-144] - .dn13 equ [ebp-160] - .dn23 equ [ebp-176] - .dz12 equ [ebp-180] - .dz13 equ [ebp-184] - .dz23 equ [ebp-188] - - .cnv1 equ [ebp-208] ; cur normal vectors - .cnv2 equ [ebp-224] - .cz2 equ [ebp-228] - .cz1 equ [ebp-232] - - - - - - .sort3: ; sort triangle coordinates... - cmp ax,bx - jle .sort1 - xchg eax,ebx - shufps xmm4,xmm4,11100001b - movaps xmm6,xmm0 - movaps xmm0,xmm1 - movaps xmm1,xmm6 - - - .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - shufps xmm4,xmm4,11011000b - movaps xmm6,xmm1 - movaps xmm1,xmm2 - movaps xmm2,xmm6 - - jmp .sort3 - - .sort2: - - movaps .z1,xmm4 - mov .y1,eax - mov .y2,ebx - mov .y3,ecx - - movdqa .y_min,xmm5 -if 1 ; check if at last only fragment - packssdw xmm5,xmm5 ; of triangle is in visable area - pshuflw xmm5,xmm5,11011000b - movdqu xmm7,.y3 - movdqa xmm6,xmm5 - pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min - pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max - movdqa xmm4,xmm7 - pcmpgtw xmm7,xmm5 - pcmpgtw xmm4,xmm6 - pxor xmm7,xmm4 - pmovmskb eax,xmm7 - and eax,0x00aaaaaa - or eax,eax - jz .rpt_loop2_end -end if - movaps .1_nv,xmm0 - movaps .2_nv,xmm1 - movaps .3_nv,xmm2 - movaps .l_v,xmm3 - mov .Zbuf,esi - mov .screen,edi - - - - mov bx,.y2 ; calc deltas - sub bx,.y1 - jnz .rpt_dx12_make - - xorps xmm7,xmm7 - mov dword .dx12,0 - mov dword .dz12,0 - movaps .dn12,xmm7 - jmp .rpt_dx12_done - - .rpt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND2 - cdq - idiv ebx - mov .dx12,eax - - cvtsi2ss xmm6,ebx - movss xmm5,.z2 - subss xmm5,.z1 - divss xmm5,xmm6 - movss .dz12,xmm5 - - movaps xmm0,.2_nv - subps xmm0,.1_nv - shufps xmm6,xmm6,0 - divps xmm0,xmm6 - movaps .dn12,xmm0 - - - .rpt_dx12_done: - - mov bx,.y3 ; calc deltas - sub bx,.y1 - jnz .rpt_dx13_make - - xorps xmm7,xmm7 - mov dword .dx13,0 - mov dword .dz13,0 - movaps .dn13,xmm7 - jmp .rpt_dx13_done - - .rpt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND2 - cdq - idiv ebx - mov .dx13,eax - - cvtsi2ss xmm6,ebx - movss xmm5,.z3 - subss xmm5,.z1 - divss xmm5,xmm6 - movss .dz13,xmm5 - - movaps xmm0,.3_nv - subps xmm0,.1_nv - shufps xmm6,xmm6,0 - divps xmm0,xmm6 - movaps .dn13,xmm0 - - .rpt_dx13_done: - - mov bx,.y3 ; calc deltas - sub bx,.y2 - jnz .rpt_dx23_make - - xorps xmm7,xmm7 - mov dword .dx23,0 - mov dword .dz23,0 - movaps .dn23,xmm7 - jmp .rpt_dx23_done - - .rpt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND2 - cdq - idiv ebx - mov .dx23,eax - - cvtsi2ss xmm6,ebx - movss xmm5,.z3 - subss xmm5,.z2 - divss xmm5,xmm6 - movss .dz23,xmm5 - - movaps xmm0,.3_nv - subps xmm0,.2_nv - shufps xmm6,xmm6,0 - divps xmm0,xmm6 - movaps .dn23,xmm0 - - .rpt_dx23_done: - - - movsx eax,word .x1 - shl eax,ROUND2 - mov ebx,eax - mov edx,.z1 - mov .cz1,edx - mov .cz2,edx - movaps xmm0,.1_nv - movaps .cnv1,xmm0 - movaps .cnv2,xmm0 - - - movsx ecx,word .y1 - cmp cx,.y2 - - jge .rpt_loop1_end - - .rpt_loop1: - pushad - - movaps xmm2,.y_min - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - movlps xmm3,.cz1 - movaps xmm4,.l_v - sar ebx,ROUND2 - sar eax,ROUND2 - mov edi,.screen - mov esi,.Zbuf - - call real_phong_line_z - - popad - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - movss xmm2,.cz1 - movss xmm3,.cz2 - addps xmm0,.dn13 - addps xmm1,.dn12 - addss xmm2,.dz13 - addss xmm3,.dz12 - add eax,.dx13 - add ebx,.dx12 - - movaps .cnv1,xmm0 - movaps .cnv2,xmm1 - movss .cz1,xmm2 - movss .cz2,xmm3 - - add ecx,1 - cmp cx,.y2 - jl .rpt_loop1 - - - - - - .rpt_loop1_end: - movsx ecx,word .y2 - cmp cx,.y3 - jge .rpt_loop2_end - - movsx ebx,word .x2 ; eax - cur x1 - shl ebx,ROUND2 ; ebx - cur x2 - push dword .z2 - pop dword .cz2 - movaps xmm0,.2_nv - movaps .cnv2,xmm0 - - - .rpt_loop2: - pushad - - movaps xmm2,.y_min - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - movlps xmm3,.cz1 - movaps xmm4,.l_v - sar ebx,ROUND2 - sar eax,ROUND2 - mov edi,.screen - mov esi,.Zbuf - - call real_phong_line_z - - popad - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - movss xmm2,.cz1 - movss xmm3,.cz2 - addps xmm0,.dn13 - addps xmm1,.dn23 - addss xmm2,.dz13 - addss xmm3,.dz23 - add eax,.dx13 - add ebx,.dx23 - - movaps .cnv1,xmm0 - movaps .cnv2,xmm1 - movss .cz1,xmm2 - movss .cz2,xmm3 - - add ecx,1 - cmp cx,.y3 - jl .rpt_loop2 - - .rpt_loop2_end: - - add esp,512 - pop ebp - -ret -align 16 -real_phong_line_z: -; in: -; xmm0 - normal vector 1 -; xmm1 - normal vect 2 -; xmm3 - lo -> hi z1, z2 coords as dwords floats -; xmm2 - lo -> hi y_min, y_max, x_min, x_max -; as dword integers -; xmm4 - normalized light vector -; eax - x1 -; ebx - x2 -; ecx - y -; edi - screen buffer -; esi - z buffer filled with dd floats - - push ebp - mov ebp,esp - sub esp,160 - sub ebp,16 - and ebp,0xfffffff0 - - .n1 equ [ebp-16] - .n2 equ [ebp-32] - .lv equ [ebp-48] - .lx1 equ [ebp-52] - .lx2 equ [ebp-56] - .z2 equ [ebp-60] - .z1 equ [ebp-64] - .screen equ [ebp-68] - .zbuff equ [ebp-72] - .x_max equ [ebp-74] - .x_min equ [ebp-76] - .y_max equ [ebp-78] - .y_min equ [ebp-80] - .dn equ [ebp-96] - .dz equ [ebp-100] - .y equ [ebp-104] - .cnv equ [ebp-128] - - mov .y,ecx - packssdw xmm2,xmm2 - movq .y_min,xmm2 - cmp cx,.y_min - jl .end_rp_line - cmp cx,.y_max - jge .end_rp_line ; - - cmp eax,ebx - je .end_rp_line - jl @f - xchg eax,ebx - movaps xmm7,xmm0 - movaps xmm0,xmm1 - movaps xmm1,xmm7 - shufps xmm3,xmm3,11100001b - @@: - - cmp ax,.x_max - jge .end_rp_line - cmp bx,.x_min - jle .end_rp_line - movaps .lv,xmm4 - movaps .n1,xmm0 - movaps .n2,xmm1 - mov .lx1,eax - mov .lx2,ebx - movlps .z1,xmm3 - - sub ebx,eax - cvtsi2ss xmm7,ebx - shufps xmm7,xmm7,0 - subps xmm1,xmm0 - divps xmm1,xmm7 - movaps .dn,xmm1 - psrldq xmm3,4 - subss xmm3,.z1 - divss xmm3,xmm7 - movss .dz,xmm3 - - - - mov ebx,.lx1 - cmp bx,.x_min ; clipping on function4 - jge @f - movzx eax,word .x_min - sub eax,ebx - cvtsi2ss xmm7,eax - shufps xmm7,xmm7,0 - mulss xmm3,xmm7 - mulps xmm1,xmm7 - addss xmm3,.z1 - addps xmm1,.n1 - movsx eax,word .x_min - movss .z1,xmm3 - movaps .n1,xmm1 - mov dword .lx1,eax - - @@: - movzx eax,word .x_max - cmp .lx2,eax - jl @f - mov .lx2,eax - @@: - movzx eax,word[size_x_var] - mul dword .y - ; mov edx,.x1 - add eax,.lx1 - shl eax,2 - add edi,eax - add esi,eax - - mov ecx,.lx2 - sub ecx,.lx1 - movaps xmm0,.n1 - movss xmm2,.z1 -align 16 - .ddraw: - movss xmm7,xmm2 - cmpnltss xmm7,dword[esi] - movd eax,xmm7 - or eax,eax - jnz .skip - movss [esi],xmm2 - movaps xmm7,xmm0 - mulps xmm7,xmm7 ; normalize - haddps xmm7,xmm7 - haddps xmm7,xmm7 - rsqrtps xmm7,xmm7 - mulps xmm7,xmm0 - movaps .cnv,xmm7 - - mov edx,lights_aligned ; lights - global variable - xorps xmm1,xmm1 ; instead global can be used .lv - light vect. - @@: - movaps xmm6,[edx+16] - movaps xmm5,[edx] - movaps xmm3,[edx+48] - andps xmm5,[zero_hgst_dd] ; global - - mulps xmm5,.cnv ;.lv ; last dword should be zeroed - haddps xmm5,xmm5 - haddps xmm5,xmm5 - ; mulps xmm5,[env_const2] - ; maxps xmm5,[dot_min] - ; minps xmm5,[dot_max] - movaps xmm7,xmm5 - ; mulps xmm7,[env_const2] - ; mulps xmm7,[env_const2] - ; maxps xmm7,[dot_min] - ; minps xmm7,[dot_max] - - mulps xmm7,xmm7 - mulps xmm7,xmm7 - mulps xmm5,xmm6 - mulps xmm7,xmm7 - mulps xmm7,xmm3 - - addps xmm5,xmm7 - minps xmm5,[mask_255f] ; global - maxps xmm1,xmm5 - ; movq xmm3,[edx+20] ; minimal color - ; punpcklwd xmm3,[minimum0] - ; cvtdq2ps xmm3,xmm3 - ; maxps xmm1,xmm3 - add edx,64 - cmp edx,lights_aligned_end ; global - jnz @b - - cvtps2dq xmm1,xmm1 - packssdw xmm1,xmm1 - packuswb xmm1,xmm1 - movd [edi],xmm1 - .skip: - add edi,4 - add esi,4 - addps xmm0,.dn - addss xmm2,.dz - sub ecx,1 - jnz .ddraw - - .end_rp_line: - add esp,160 - pop ebp - -ret +; Real Phong's shading implemented if flat assembler +; by Maciej Guba. +; http://macgub.co.pl + +ROUND2 equ 10 +real_phong_tri_z: +;----procedure render Phongs shaded triangle with z coord +;----interpolation ( Catmull alghoritm )----------------- +;----I normalize normal vector in every pixel ----------- +;------------------in - eax - x1 shl 16 + y1 ------------ +;---------------------- ebx - x2 shl 16 + y2 ------------ +;---------------------- ecx - x3 shl 16 + y3 ------------ +;---------------------- esi - pointer to Z-buffer filled- +;---------------------- with dd float variables-------- +;---------------------- edi - pointer to screen buffer--- +;---------------------- xmm0 - 1st normal vector -------- +;---------------------- xmm1 - 2cond normal vector ------ +;---------------------- xmm2 - 3rd normal vector -------- +;---------------------- xmm3 - normalized light vector -- +;---------------------- xmm4 - lo -> hi z1, z2, z3 coords +;---------------------- as dwords floats --------------- +;---------------------- xmm5 - lo -> hi y_min, y_max, --- +;---------------------- x_min, x_max as dword integers - +;---------------------- stack - no parameters ----------- +;-------------------------------------------------------- +;----------------- procedure don't save registers !! ---- + + + + + push ebp + mov ebp,esp + sub esp,512 + sub ebp,16 + and ebp,0xfffffff0 + + .1_nv equ [ebp-16] + .2_nv equ [ebp-32] + .3_nv equ [ebp-48] + .l_v equ [ebp-64] + .z3 equ [ebp-72] + .z2 equ [ebp-76] + .z1 equ [ebp-80] + .x1 equ [ebp-82] + .y1 equ [ebp-84] + .x2 equ [ebp-86] + .y2 equ [ebp-88] + .x3 equ [ebp-90] + .y3 equ [ebp-92] + .Zbuf equ [ebp-96] + .x_max equ [ebp-100] + .x_min equ [ebp-104] + .y_max equ [ebp-108] + .y_min equ [ebp-112] + .screen equ [ebp-116] + .dx12 equ [ebp-120] + .dx13 equ [ebp-124] + .dx23 equ [ebp-128] + .dn12 equ [ebp-144] + .dn13 equ [ebp-160] + .dn23 equ [ebp-176] + .dz12 equ [ebp-180] + .dz13 equ [ebp-184] + .dz23 equ [ebp-188] + + .cnv1 equ [ebp-208] ; cur normal vectors + .cnv2 equ [ebp-224] + .cz2 equ [ebp-228] + .cz1 equ [ebp-232] + + + + + + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + shufps xmm4,xmm4,11100001b + movaps xmm6,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm6 + + + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + shufps xmm4,xmm4,11011000b + movaps xmm6,xmm1 + movaps xmm1,xmm2 + movaps xmm2,xmm6 + + jmp .sort3 + + .sort2: + + movaps .z1,xmm4 + mov .y1,eax + mov .y2,ebx + mov .y3,ecx + + movdqa .y_min,xmm5 +if 1 ; check if at last only fragment + packssdw xmm5,xmm5 ; of triangle is in visable area + pshuflw xmm5,xmm5,11011000b + movdqu xmm7,.y3 + movdqa xmm6,xmm5 + pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min + pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max + movdqa xmm4,xmm7 + pcmpgtw xmm7,xmm5 + pcmpgtw xmm4,xmm6 + pxor xmm7,xmm4 + pmovmskb eax,xmm7 + and eax,0x00aaaaaa + or eax,eax + jz .rpt_loop2_end +end if + movaps .1_nv,xmm0 + movaps .2_nv,xmm1 + movaps .3_nv,xmm2 + movaps .l_v,xmm3 + mov .Zbuf,esi + mov .screen,edi + + + + mov bx,.y2 ; calc deltas + sub bx,.y1 + jnz .rpt_dx12_make + + xorps xmm7,xmm7 + mov dword .dx12,0 + mov dword .dz12,0 + movaps .dn12,xmm7 + jmp .rpt_dx12_done + + .rpt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx12,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z2 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz12,xmm5 + + movaps xmm0,.2_nv + subps xmm0,.1_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn12,xmm0 + + + .rpt_dx12_done: + + mov bx,.y3 ; calc deltas + sub bx,.y1 + jnz .rpt_dx13_make + + xorps xmm7,xmm7 + mov dword .dx13,0 + mov dword .dz13,0 + movaps .dn13,xmm7 + jmp .rpt_dx13_done + + .rpt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx13,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz13,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.1_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn13,xmm0 + + .rpt_dx13_done: + + mov bx,.y3 ; calc deltas + sub bx,.y2 + jnz .rpt_dx23_make + + xorps xmm7,xmm7 + mov dword .dx23,0 + mov dword .dz23,0 + movaps .dn23,xmm7 + jmp .rpt_dx23_done + + .rpt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx23,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + subss xmm5,.z2 + divss xmm5,xmm6 + movss .dz23,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.2_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn23,xmm0 + + .rpt_dx23_done: + + + movsx eax,word .x1 + shl eax,ROUND2 + mov ebx,eax + mov edx,.z1 + mov .cz1,edx + mov .cz2,edx + movaps xmm0,.1_nv + movaps .cnv1,xmm0 + movaps .cnv2,xmm0 + + + movsx ecx,word .y1 + cmp cx,.y2 + + jge .rpt_loop1_end + + .rpt_loop1: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edi,.screen + mov esi,.Zbuf + + call real_phong_line_z + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movss xmm2,.cz1 + movss xmm3,.cz2 + addps xmm0,.dn13 + addps xmm1,.dn12 + addss xmm2,.dz13 + addss xmm3,.dz12 + add eax,.dx13 + add ebx,.dx12 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y2 + jl .rpt_loop1 + + + + + + .rpt_loop1_end: + movsx ecx,word .y2 + cmp cx,.y3 + jge .rpt_loop2_end + + movsx ebx,word .x2 ; eax - cur x1 + shl ebx,ROUND2 ; ebx - cur x2 + push dword .z2 + pop dword .cz2 + movaps xmm0,.2_nv + movaps .cnv2,xmm0 + + + .rpt_loop2: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edi,.screen + mov esi,.Zbuf + + call real_phong_line_z + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movss xmm2,.cz1 + movss xmm3,.cz2 + addps xmm0,.dn13 + addps xmm1,.dn23 + addss xmm2,.dz13 + addss xmm3,.dz23 + add eax,.dx13 + add ebx,.dx23 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y3 + jl .rpt_loop2 + + .rpt_loop2_end: + + add esp,512 + pop ebp + +ret +align 16 +real_phong_line_z: +; in: +; xmm0 - normal vector 1 +; xmm1 - normal vect 2 +; xmm3 - lo -> hi z1, z2 coords as dwords floats +; xmm2 - lo -> hi y_min, y_max, x_min, x_max +; as dword integers +; xmm4 - normalized light vector +; eax - x1 +; ebx - x2 +; ecx - y +; edi - screen buffer +; esi - z buffer filled with dd floats + + push ebp + mov ebp,esp + sub esp,160 + sub ebp,16 + and ebp,0xfffffff0 + + .n1 equ [ebp-16] + .n2 equ [ebp-32] + .lv equ [ebp-48] + .lx1 equ [ebp-52] + .lx2 equ [ebp-56] + .z2 equ [ebp-60] + .z1 equ [ebp-64] + .screen equ [ebp-68] + .zbuff equ [ebp-72] + .x_max equ [ebp-74] + .x_min equ [ebp-76] + .y_max equ [ebp-78] + .y_min equ [ebp-80] + .dn equ [ebp-96] + .dz equ [ebp-100] + .y equ [ebp-104] + .cnv equ [ebp-128] + + mov .y,ecx + packssdw xmm2,xmm2 + movq .y_min,xmm2 + cmp cx,.y_min + jl .end_rp_line + cmp cx,.y_max + jge .end_rp_line ; + + cmp eax,ebx + je .end_rp_line + jl @f + xchg eax,ebx + movaps xmm7,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm7 + shufps xmm3,xmm3,11100001b + @@: + + cmp ax,.x_max + jge .end_rp_line + cmp bx,.x_min + jle .end_rp_line + movaps .lv,xmm4 + movaps .n1,xmm0 + movaps .n2,xmm1 + mov .lx1,eax + mov .lx2,ebx + movlps .z1,xmm3 + + sub ebx,eax + cvtsi2ss xmm7,ebx + shufps xmm7,xmm7,0 + subps xmm1,xmm0 + divps xmm1,xmm7 + movaps .dn,xmm1 + psrldq xmm3,4 + subss xmm3,.z1 + divss xmm3,xmm7 + movss .dz,xmm3 + + + + mov ebx,.lx1 + cmp bx,.x_min ; clipping on function4 + jge @f + movzx eax,word .x_min + sub eax,ebx + cvtsi2ss xmm7,eax + shufps xmm7,xmm7,0 + mulss xmm3,xmm7 + mulps xmm1,xmm7 + addss xmm3,.z1 + addps xmm1,.n1 + movsx eax,word .x_min + movss .z1,xmm3 + movaps .n1,xmm1 + mov dword .lx1,eax + + @@: + movzx eax,word .x_max + cmp .lx2,eax + jl @f + mov .lx2,eax + @@: + movzx eax,word[size_x_var] + mul dword .y + ; mov edx,.x1 + add eax,.lx1 + shl eax,2 + add edi,eax + add esi,eax + + mov ecx,.lx2 + sub ecx,.lx1 + movaps xmm0,.n1 + movss xmm2,.z1 +align 16 + .ddraw: + movss xmm7,xmm2 + cmpnltss xmm7,dword[esi] + movd eax,xmm7 + or eax,eax + jnz .skip + movss [esi],xmm2 + movaps xmm7,xmm0 + mulps xmm7,xmm7 ; normalize + haddps xmm7,xmm7 + haddps xmm7,xmm7 + rsqrtps xmm7,xmm7 + mulps xmm7,xmm0 + movaps .cnv,xmm7 + + mov edx,lights_aligned ; lights - global variable + xorps xmm1,xmm1 ; instead global can be used .lv - light vect. + @@: + movaps xmm6,[edx+16] + movaps xmm5,[edx] + movaps xmm3,[edx+48] + andps xmm5,[zero_hgst_dd] ; global + + mulps xmm5,.cnv ;.lv ; last dword should be zeroed + haddps xmm5,xmm5 + haddps xmm5,xmm5 + ; mulps xmm5,[env_const2] + ; maxps xmm5,[dot_min] + ; minps xmm5,[dot_max] + movaps xmm7,xmm5 + ; mulps xmm7,[env_const2] + ; mulps xmm7,[env_const2] + ; maxps xmm7,[dot_min] + ; minps xmm7,[dot_max] + + mulps xmm7,xmm7 + mulps xmm7,xmm7 + mulps xmm5,xmm6 + mulps xmm7,xmm7 + mulps xmm7,xmm3 + + addps xmm5,xmm7 + minps xmm5,[mask_255f] ; global + maxps xmm1,xmm5 + ; movq xmm3,[edx+20] ; minimal color + ; punpcklwd xmm3,[minimum0] + ; cvtdq2ps xmm3,xmm3 + ; maxps xmm1,xmm3 + add edx,64 + cmp edx,lights_aligned_end ; global + jnz @b + + cvtps2dq xmm1,xmm1 + packssdw xmm1,xmm1 + packuswb xmm1,xmm1 + movd [edi],xmm1 + .skip: + add edi,4 + add esi,4 + addps xmm0,.dn + addss xmm2,.dz + sub ecx,1 + jnz .ddraw + + .end_rp_line: + add esp,160 + pop ebp + +ret diff --git a/programs/demos/view3ds/3ray_shd.inc b/programs/demos/view3ds/3ray_shd.inc new file mode 100644 index 0000000000..f769141f2a --- /dev/null +++ b/programs/demos/view3ds/3ray_shd.inc @@ -0,0 +1,688 @@ +; Ray casted shadows +; by Maciej Guba. +; http://macgub.co.pl + + +ROUND2 equ 10 +ray_shad: +;--- Procedure render triangle with ray casted shadow --- +;--- effect. Calc intersection with all triangles in ---- +;--- everypixel. Its not real time process, especially -- +;--- when many triangles are computed. ------------------ +;------in - eax - x1 shl 16 + y1 ------------------------ +;---------- ebx - x2 shl 16 + y2 ------------------------ +;---------- ecx - x3 shl 16 + y3 ------------------------ +;---------- edx - ptr to fur coords struct -------------- +;---------- esi - pointer to stencil / Z-buffer, filled - +;-------------- with dword float variables, it masks -- +;-------------- 'Z' position (coord) of every front --- +;-------------- pixel. -------------------------------- +;---------- edi - pointer to screen buffer -------------- +;---------- xmm0 - 1st normal vector -------------------- +;---------- xmm1 - 2cond normal vector ------------------ +;---------- xmm2 - 3rd normal vector -------------------- +;---------- xmm3 - -------------------------------------- +;---------- xmm4 - lo -> hi z1, z2, z3 coords ----------- +;--------------- as dwords floats --------------------- +;---------- xmm5 - lo -> hi y_min, y_max, x_min, x_max -- +;--------------- as dword integers -------------------- +;-----------mm7 - current triangle index --------------- +;---------------------- stack - no parameters ----------- +;-------------------------------------------------------- +;----------------- procedure don't save registers !! ---- + + push ebp + mov ebp,esp + sub esp,1024 + sub ebp,16 + and ebp,0xfffffff0 + + .1_nv equ [ebp-16] + .2_nv equ [ebp-32] + .3_nv equ [ebp-48] + .l_v equ [ebp-64] + .z3 equ [ebp-72] + .z2 equ [ebp-76] + .z1 equ [ebp-80] + .x1 equ [ebp-82] + .y1 equ [ebp-84] + .x2 equ [ebp-86] + .y2 equ [ebp-88] + .x3 equ [ebp-90] + .y3 equ [ebp-92] + .Zbuf equ [ebp-96] + .x_max equ [ebp-100] + .x_min equ [ebp-104] + .y_max equ [ebp-108] + .y_min equ [ebp-112] + .screen equ [ebp-116] + .dx12 equ [ebp-120] + .dx13 equ [ebp-124] + .dx23 equ [ebp-128] + .dn12 equ [ebp-144] + .dn13 equ [ebp-160] + .dn23 equ [ebp-176] + .dz12 equ [ebp-180] + .dz13 equ [ebp-184] + .dz23 equ [ebp-188] + .cnv1 equ [ebp-208] ; current normal vectors + .cnv2 equ [ebp-240] + .cz2 equ [ebp-244] + .cz1 equ [ebp-248] + .tri_no equ [ebp-252] + + + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + shufps xmm4,xmm4,11100001b + movaps xmm6,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm6 + + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + shufps xmm4,xmm4,11011000b + movaps xmm6,xmm1 + movaps xmm1,xmm2 + movaps xmm2,xmm6 + + jmp .sort3 + + .sort2: + + movaps .z1,xmm4 + mov .y1,eax + mov .y2,ebx + mov .y3,ecx + + movdqa .y_min,xmm5 +if 1 ; check if at last only fragment + packssdw xmm5,xmm5 ; of triangle is in visable area + pshuflw xmm5,xmm5,11011000b + movdqu xmm7,.y3 + movdqa xmm6,xmm5 + pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min + pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max + movdqa xmm4,xmm7 + pcmpgtw xmm7,xmm5 + pcmpgtw xmm4,xmm6 + pxor xmm7,xmm4 + pmovmskb eax,xmm7 + and eax,0x00aaaaaa + or eax,eax + jz .rpt_loop2_end +end if + movd .tri_no,mm7 + movaps .1_nv,xmm0 + movaps .2_nv,xmm1 + movaps .3_nv,xmm2 + ; movaps .l_v,xmm3 + mov .Zbuf,esi + mov .screen,edi + + + + mov bx,.y2 ; calc deltas + sub bx,.y1 + jnz .rpt_dx12_make + + xorps xmm7,xmm7 + mov dword .dx12,0 + mov dword .dz12,0 + movaps .dn12,xmm7 + jmp .rpt_dx12_done + + .rpt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx12,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z2 + rcpss xmm6,xmm6 + subss xmm5,.z1 + mulss xmm5,xmm6 + movss .dz12,xmm5 + + shufps xmm6,xmm6,0 + movaps xmm0,.2_nv + subps xmm0,.1_nv + mulps xmm0,xmm6 + movaps .dn12,xmm0 +; subps xmm3,xmm0 +; mulps xmm3,xmm6 + + .rpt_dx12_done: + mov bx,.y3 ; calc deltas + sub bx,.y1 + jnz .rpt_dx13_make + + xorps xmm7,xmm7 + mov dword .dx13,0 + mov dword .dz13,0 + movaps .dn13,xmm7 + jmp .rpt_dx13_done + + .rpt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx13,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + rcpss xmm6,xmm6 + subss xmm5,.z1 + mulss xmm5,xmm6 + movss .dz13,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.1_nv + shufps xmm6,xmm6,0 + mulps xmm0,xmm6 + movaps .dn13,xmm0 + + ; mulps xmm0,xmm6 + + .rpt_dx13_done: + + mov bx,.y3 ; calc deltas + sub bx,.y2 + jnz .rpt_dx23_make + + xorps xmm7,xmm7 + mov dword .dx23,0 + mov dword .dz23,0 + movaps .dn23,xmm7 + + jmp .rpt_dx23_done + + .rpt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx23,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + rcpss xmm6,xmm6 + subss xmm5,.z2 + mulss xmm5,xmm6 + movss .dz23,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.2_nv + shufps xmm6,xmm6,0 + mulps xmm0,xmm6 + movaps .dn23,xmm0 + ; mulps xmm0,xmm6 + + .rpt_dx23_done: + + movsx eax,word .x1 + shl eax,ROUND2 + mov ebx,eax + mov ecx,.z1 + mov .cz1,ecx + mov .cz2,ecx + movaps xmm0,.1_nv + movaps .cnv1,xmm0 + movaps .cnv2,xmm0 + mov edi,.screen + mov esi,.Zbuf + movsx ecx,word .y1 + cmp cx,.y2 + + jge .rpt_loop1_end + + .rpt_loop1: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + ; movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + movd mm7,.tri_no + + call ray_shd_l + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + ; fur x,y + movss xmm2,.cz1 + movss xmm3,.cz2 + shufps xmm4,xmm4,01001110b + addps xmm0,.dn13 + addps xmm1,.dn12 + addss xmm2,.dz13 + addss xmm3,.dz12 + + + add eax,.dx13 + add ebx,.dx12 + + shufps xmm4,xmm4,01001110b + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y2 + jl .rpt_loop1 + + + .rpt_loop1_end: + movsx ecx,word .y2 + cmp cx,.y3 + jge .rpt_loop2_end + + movsx ebx,word .x2 ; eax - cur x1 + shl ebx,ROUND2 ; ebx - cur x2 + push dword .z2 + pop dword .cz2 + movaps xmm0,.2_nv + movaps .cnv2,xmm0 + + mov edi,.screen + mov esi,.Zbuf + + + .rpt_loop2: + pushad + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + ; movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + movd mm7,.tri_no + + call ray_shd_l + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movss xmm2,.cz1 + movss xmm3,.cz2 + + addps xmm0,.dn13 + addps xmm1,.dn23 + addss xmm2,.dz13 + addss xmm3,.dz23 + add eax,.dx13 + add ebx,.dx23 + addps xmm4,xmm6 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y3 + jl .rpt_loop2 + + .rpt_loop2_end: + + add esp,1024 + pop ebp + + + +ret +align 16 +ray_shd_l: +; in: +; xmm0 - normal vector 1 +; xmm1 - normal vect 2 +; xmm3 - lo -> hi z1, z2 coords as dwords floats +; xmm2 - lo -> hi y_min, y_max, x_min, x_max +; as dword integers +; xmm4 - ---- +; mm7 - current triangle index +; eax - x1 +; ebx - x2 +; ecx - y +; edx - ----- +; edi - screen buffer +; esi - z buffer / stencil buffer filled with dd floats + + push ebp + mov ebp,esp + sub esp,320 + sub ebp,16 + and ebp,0xfffffff0 + + .n1 equ [ebp-16] + .n2 equ [ebp-32] + .lv equ [ebp-48] + .lx1 equ [ebp-52] + .lx2 equ [ebp-56] + .z2 equ [ebp-60] + .z1 equ [ebp-64] + .screen equ [ebp-68] + .zbuff equ [ebp-72] + .x_max equ [ebp-74] + .x_min equ [ebp-76] + .y_max equ [ebp-78] + .y_min equ [ebp-80] + .dn equ [ebp-96] + .dz equ [ebp-100] + .y equ [ebp-104] +; .cur_tri equ [ebp-108] + .cnv equ [ebp-128] + .Rlen equ [ebp-128-16] + .r1 equ [ebp-128-32] + .vect_t equ [ebp-128-48] + .cur_tri equ [ebp-128-64] +; .p3t equ [ebp-128-80] + .nray equ [ebp-128-96] + .final_col equ [ebp-128-112] + .aabb_mask equ dword[ebp-128-112-4] + + mov .y,ecx + movdqa xmm4,xmm2 + packssdw xmm2,xmm2 + movq .y_min,xmm2 + cmp cx,.y_min + jl .end_rp_line + cmp cx,.y_max + jge .end_rp_line ; + cmp eax,ebx + je .end_rp_line + jl @f + xchg eax,ebx + movaps xmm7,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm7 + shufps xmm3,xmm3,11100001b + @@: + movd .cur_tri,mm7 + cmp ax,.x_max + jge .end_rp_line + cmp bx,.x_min + jle .end_rp_line + ; movaps .lv,xmm4 + andps xmm0,[zero_hgst_dd] + andps xmm1,[zero_hgst_dd] + movaps .n1,xmm0 + movaps .n2,xmm1 + mov .lx1,eax + mov .lx2,ebx + movlps .z1,xmm3 + + sub ebx,eax + cvtsi2ss xmm7,ebx + rcpss xmm7,xmm7 + shufps xmm7,xmm7,0 + subps xmm1,xmm0 + mulps xmm1,xmm7 + movaps .dn,xmm1 + shufps xmm3,xmm3,11111001b + subss xmm3,.z1 + mulss xmm3,xmm7 + movss .dz,xmm3 + + subps xmm6,xmm5 + mulps xmm6,xmm7 + + mov ebx,.lx1 + cmp bx,.x_min ; clipping on function4 + jge @f + movzx eax,word .x_min + sub eax,ebx + cvtsi2ss xmm7,eax + shufps xmm7,xmm7,0 + mulss xmm3,xmm7 + mulps xmm1,xmm7 + mulps xmm6,xmm7 + addss xmm3,.z1 + addps xmm1,.n1 + addps xmm6,xmm5 + movsx eax,word .x_min + movss .z1,xmm3 + movaps .n1,xmm1 + mov dword .lx1,eax + @@: + + movzx eax,word .x_max + cmp .lx2,eax + jl @f + mov .lx2,eax + @@: + movzx eax,word[xres_var] + mul dword .y + add eax,.lx1 + mov .zbuff,esi + mov .screen,edi + shl eax,2 + add edi,eax + add esi,eax + mov ecx,.lx2 + sub ecx,.lx1 + + movd xmm0,[vect_x] + punpcklwd xmm0,[the_zero] + cvtdq2ps xmm0,xmm0 + movaps .vect_t,xmm0 + + + .ddraw: + + xorps xmm0,xmm0 + movss xmm2,.z1 + movss xmm5,.z1 + movaps .final_col,xmm0 + addss xmm2,[f1] + subss xmm5,[f1] + cmpnltss xmm2,dword[esi] + cmpnltss xmm5,dword[esi] + pxor xmm2,xmm5 + movd eax,xmm2 + or eax,eax + jz .skips + + movaps xmm7,.n1 + andps xmm7,[zero_hgst_dd] + mulps xmm7,xmm7 ; normalize + haddps xmm7,xmm7 + haddps xmm7,xmm7 + rsqrtps xmm7,xmm7 + mulps xmm7,.n1 + movaps .cnv,xmm7 + mov ebx,point_light_coords + mov edx,lights_aligned + xor eax,eax + .nx_light: + pushad + cvtsi2ss xmm0,.lx1 + cvtsi2ss xmm1,.y + movss xmm2,.z1 + movlhps xmm0,xmm1 + shufps xmm0,xmm2,11001000b + subps xmm0,[ebx] ; xmm0 - ray end, -> current vertex + movaps xmm3,[ebx] + andps xmm0,[zero_hgst_dd] + movaps xmm1,xmm0 + mulps xmm0,xmm0 + haddps xmm0,xmm0 + haddps xmm0,xmm0 + sqrtps xmm0,xmm0 + movss .Rlen,xmm0 + rcpps xmm0,xmm0 + mulps xmm0,xmm1 ; xmm0 - normalized ray vector + andps xmm0,[zero_hgst_dd] + movaps .nray,xmm0 + movaps .r1,xmm3 ; ray orgin + if 0 + movaps xmm1,xmm3 + call calc_bounding_box + + mov .aabb_mask,eax +end if + mov edi,[triangles_ptr] + xor ecx,ecx + .nx_tri: ; next triangle + + cmp ecx,.cur_tri ; prevent self shadowing + je .skipp + if 0 + mov edi,ecx + imul edi,[i12] + add edi,[triangles_ptr] + mov eax,[edi] + mov ebx,[edi+4] + mov edx,[edi+8] + imul eax,[i12] + imul ebx,[i12] + imul edx,[i12] + add eax,[points_ptr] + add ebx,[points_ptr] + add edx,[points_ptr] + movups xmm2,[eax] + movups xmm3,[ebx] + movups xmm4,[edx] + andps xmm2,[sign_mask] + andps xmm3,[sign_mask] + andps xmm4,[sign_mask] + movmskps ebx,xmm4 + cmpeqps xmm2,xmm3 + cmpeqps xmm3,xmm4 + andps xmm2,xmm3 + movmskps eax,xmm2 + and eax,111b + and ebx,111b + cmp eax,111b + jne @f + bt .aabb_mask,ebx + jnc .skipp + @@: +end if + mov edi,ecx + imul edi,[i12] + add edi,[triangles_ptr] + mov eax,[edi] + mov ebx,[edi+4] + mov edx,[edi+8] + imul eax,[i12] + imul ebx,[i12] + imul edx,[i12] + add eax,[points_rotated_ptr] + add ebx,[points_rotated_ptr] + add edx,[points_rotated_ptr] + movups xmm2,[eax] + movups xmm3,[ebx] + movups xmm4,[edx] + addps xmm2,.vect_t + addps xmm3,.vect_t + addps xmm4,.vect_t + + +;intersect_tri: procs header +; in: +; xmm0 - ray direction ; should be normalized +; xmm1 - ray orgin +; xmm2 - tri vert1 +; xmm3 - tri vert2 +; xmm4 - tri vert3 +; if eax = 1 - intersction with edge +; xmm6 - edge lenght +; if eax = 0 - intersect with ray (classic) +; out: +; eax = 1 - intersection occured +; xmm0 - float lo -> hi = t, v, u, ... + + movss xmm6,.Rlen + movaps xmm0,.nray + movaps xmm1,.r1 + subss xmm6,[the_one] + mov eax,1 + push ecx + call intersect_tri + pop ecx + cmp eax,1 + je .inter + .skipp: + .skp: + inc ecx + cmp ecx,[triangles_count_var] + jnz .nx_tri +; jz .do_process +; comiss xmm0,.Rlen +; jl .inter + + popad + .do_process: + movaps xmm5,.nray ;[edx] + andps xmm5,[zero_hgst_dd] ; global + mulps xmm5,.cnv ;.lv ; last dword should be zeroed + ; andps xmm5,[sign_z] ; global + haddps xmm5,xmm5 + haddps xmm5,xmm5 + andps xmm5,[abs_mask] ; global + movaps xmm7,xmm5 + mulps xmm7,xmm7 + mulps xmm7,xmm7 + mulps xmm5,[edx+16] + mulps xmm7,xmm7 + mulps xmm7,xmm7 + mulps xmm7,[edx+48] + addps xmm5,xmm7 + minps xmm5,[mask_255f] ; global + maxps xmm5,.final_col ; addps maxps + movaps .final_col,xmm5 + jmp .nx_loop + .inter: + + popad + .nx_loop: + ; add edx,64 ; unncomment to achive 3 lights + ; add ebx,16 + ; cmp edx,lights_aligned_end ; global + ; jnz .nx_light + + movaps xmm1,.final_col + cvtps2dq xmm1,xmm1 + packssdw xmm1,xmm1 + packuswb xmm1,xmm1 + movd [edi],xmm1 + .skips: + movaps xmm0,.n1 + movss xmm2,.z1 + add edi,4 + add esi,4 + add dword .lx1,1 + addps xmm0,.dn + addss xmm2,.dz + movaps .n1,xmm0 + movss .z1,xmm2 + dec ecx + jnz .ddraw + .end_rp_line: + add esp,320 + pop ebp + +ret diff --git a/programs/demos/view3ds/a_procs.inc b/programs/demos/view3ds/a_procs.inc index a68ad46dca..55a96e41a0 100644 --- a/programs/demos/view3ds/a_procs.inc +++ b/programs/demos/view3ds/a_procs.inc @@ -1,3 +1,200 @@ + + +if Ext > SSE2 + ;-------------------------------------------------------------------- +init_point_lights: + ; mov eax,1000 + ; cvtsi2ss xmm1,eax + ; shufps xmm1,xmm1,11000000b + ; mov esi,lights_aligned + ; mov edi,point_light_coords + ; mov ecx,3 + ; @@: + ; movaps xmm0,[esi] + ; addps xmm0,[f05xz] + ; mulps xmm0,xmm1 + ; movaps [edi],xmm0 + ; add esi,64 + ; add edi,16 + ; loop @b + mov ecx,3 + mov edi,point_light_coords + @@: + push ecx + xor ecx,ecx + movzx edx,word[size_x_var] + call random + cvtsi2ss xmm0,eax + movss [edi],xmm0 + xor ecx,ecx + movzx edx,word[size_x_var] + call random + cvtsi2ss xmm0,eax + movss [edi+4],xmm0 + ; movzx ebx,word[size_x_var] + ; shl ebx,2 + ; neg ebx + mov ecx,-1900 + ; sub ecx,100 + mov edx,-600 + call random + cvtsi2ss xmm0,eax + movss [edi+8],xmm0 + ; mov dword[edi+8],-1700.0 + mov [edi+12],dword 0 + add edi,16 + pop ecx + loop @b + +ret + +;------------------------------------------------------------------ +intersect_tri: ; Moeller-Trumbore method +; in: +; xmm0 - ray direction ; should be normalized +; xmm1 - ray orgin +; xmm2 - tri vert1 +; xmm3 - tri vert2 +; xmm4 - tri vert3 +; if eax = 1 - intersction with edge +; xmm6 - edge lenght +; if eax = 0 - intersect with ray (classic) +; out: +; eax = 1 - intersection occured +; xmm0 - float lo -> hi = t, v, u, ... + push ebp + mov ebp,esp + and ebp,-16 + sub esp,220 + + .dir equ [ebp-16] + .origin equ [ebp-32] + .ta equ [ebp-48] + .tb equ [ebp-64] + .tc equ [ebp-80] + .tvec equ [ebp-96] + .pvec equ [ebp-112] + .qvec equ [ebp-128] + .e1 equ [ebp-128-16] + .ift equ dword[ebp-152] + .invdet equ [ebp-156] + .det equ [ebp-160] + .ed_l equ [ebp-164] + .u equ [ebp-168] + .v equ [ebp-172] + .t equ [ebp-176] + .e2 equ [ebp-192] + + movaps .dir,xmm0 + movaps .origin,xmm1 + movaps .ta,xmm2 + movaps .tb,xmm3 + movaps .tc,xmm4 + mov .ift,eax + movss .ed_l,xmm6 + subps xmm3,xmm2 + subps xmm4,xmm2 + andps xmm3,[zero_hgst_dd] + andps xmm4,[zero_hgst_dd] + movaps .e1,xmm3 + movaps .e2,xmm4 + + lea esi,.dir + lea edi,.e2 + lea ebx,.pvec + call cross_aligned + + movaps xmm0,.e1 + mulps xmm0,.pvec + ; andps xmm0,[zero_hgst_dd] + haddps xmm0,xmm0 + haddps xmm0,xmm0 + movss .det,xmm0 +; cmpnless xmm0,[eps] +; movd eax,xmm0 +; or eax,eax +; jz @f + comiss xmm0,[eps] + jl @f + + rcpss xmm0,.det + movss .invdet,xmm0 + + movaps xmm0,.origin + subps xmm0,.ta + andps xmm0,[zero_hgst_dd] + movaps .tvec,xmm0 + + mulps xmm0,.pvec + haddps xmm0,xmm0 + haddps xmm0,xmm0 + mulss xmm0,.invdet + movss xmm1,xmm0 + movss .u,xmm0 + cmpnless xmm1,[epsone] + cmpnless xmm0,[epsminus] + pxor xmm1,xmm0 + movd eax,xmm1 + or eax,eax + jz @f + + lea esi,.tvec + lea edi,.e1 + lea ebx,.qvec + call cross_aligned + + movaps xmm0,.dir + mulps xmm0,.qvec + haddps xmm0,xmm0 + haddps xmm0,xmm0 + mulss xmm0,.invdet + movss .v,xmm0 + movss xmm1,xmm0 + addss xmm1,.u + cmpnless xmm1,[epsone] + cmpnless xmm0,[epsminus] + pxor xmm1,xmm0 + movd eax,xmm1 + or eax,eax + jz @f + + movaps xmm1,.e2 + mulps xmm1,.qvec + haddps xmm1,xmm1 + haddps xmm1,xmm1 + mulss xmm1,.invdet + movss .t,xmm1 + ; cmpnless xmm1,[eps] + ; movmskps eax,xmm1 + ; test eax,1 + ; jz @f + comiss xmm1,[eps] + jl @f + + mov eax,1 + cmp .ift,0 + je .end ; ok intersect occured, no edge cause + + movss xmm0,.t ; else check with edge lenght + ; movss xmm1,.t + cmpnless xmm0,[eps] + cmpnless xmm1,.ed_l + xorps xmm0,xmm1 + movd ebx,xmm0 + or ebx,ebx + jz @f + + ; mov eax,1 + ; movaps xmm0,.t + jmp .end + @@: + xor eax,eax + .end: + movaps xmm0,.t + add esp,220 + pop ebp +ret +end if ;=============================================================== do_edges_list: push ebp @@ -223,13 +420,18 @@ ret do_sinus: +;in - ax - render mode .x equ [ebp-8] .y equ [ebp-12] .new_y equ [ebp-16] .temp equ [ebp-20] + .dr_f equ word[ebp-22] + push ebp mov ebp,esp - sub esp,64 + sub esp,30 + mov .dr_f,ax + mov dword .x,0 mov dword .y,0 mov esi,[screen_ptr] @@ -243,53 +445,20 @@ do_sinus: cld rep stosd pop edi -; movzx eax,[sinus_flag] -; mov edx,10 -; mul edx -; mov [sin_amplitude],eax -; mov [sin_frq],eax fninit -;if Ext = SSE2 -; movups xmm1,[const0123] ; xmm1 - init values -; mov eax,0x000000ff -; movd xmm2,eax -; shufps xmm2,xmm2,0 ; xmm2 - mask value -; mov eax,4 -; movd xmm3,eax -; shufps xmm3,xmm3,0 .again: -if 0 - fild dword .x - fidiv [sin_frq] - fsin - fimul [sin_amplitude] - fiadd dword .y - fistp dword .new_y -else fild dword .x fmul [sin_frq] fistp dword .temp mov eax, .temp -; mov bx, [angle_x] -; add bx, [angle_y] -; movzx ebx,bx -; shr ebx,1 ; change phase -; add eax,ebx - and eax, 0x000000ff -; cdq - ; mul [sin_frq] -; and eax,0x000000ff -; and ax,0x00ff -; cwde - fld dword [sin_tab+eax*4] fimul dword [sin_amplitude] fiadd dword .y fistp dword .new_y -end if + mov eax,.new_y or eax,eax jl .skip @@ -298,20 +467,19 @@ end if jg .skip movzx edx,word[size_x_var] mul edx -; shl eax,9 add eax,dword .x lea ebx,[eax*3] - cmp [dr_flag],12 ; 32 bit col cause - jl @f + cmp .dr_f,12 ; 32 bit col cause + jb @f add ebx,eax @@: mov eax,[esi] mov [edi+ebx],eax .skip: add esi,3 - cmp [dr_flag],12 - jl @f + cmp .dr_f,12 + jb @f inc esi @@: inc dword .x @@ -330,8 +498,8 @@ end if movzx ecx,word[size_x_var] movzx eax,word[size_y_var] imul ecx,eax - cmp [dr_flag],12 - jge @f + cmp .dr_f,12 + jae @f lea ecx,[ecx*3] shr ecx,2 ; mov ecx,SIZE_X*SIZE_Y*3/4 @@ -377,7 +545,19 @@ draw_dots: ret do_emboss: ; sse2 version only +; in ax - render model + push ebp + mov ebp,esp + sub esp,4 + + .dr_mod equ word[ebp-2] + + mov .dr_mod,ax + if Ext >= SSE2 + + + movzx ecx,[bumps_deep_flag] inc ecx call blur_screen ;blur n times @@ -392,20 +572,20 @@ if Ext >= SSE2 sub ecx,ebx mov esi,[screen_ptr] mov edi,[Zbuffer_ptr] - cmp [dr_flag],12 + cmp .dr_mod,11 jge @f lea ebx,[ebx*3] - jmp .f + jmp .gf @@: shl ebx,2 -.f: +.gf: mov edx,esi add esi,ebx lea ebx,[ebx+esi] pxor xmm0,xmm0 push eax .emb: - cmp [dr_flag],12 + cmp .dr_mod ,11 jge @f movlps xmm1,[esi+3] movhps xmm1,[esi+6] @@ -442,14 +622,7 @@ if Ext >= SSE2 pmaxsw xmm1,xmm7 pmaxsw xmm1,xmm6 -if 0 - movaps xmm7,xmm3 - movaps xmm6,xmm3 - psrlq xmm7,2*8 - psrlq xmm6,4*8 - pmaxsw xmm3,xmm7 - pmaxsw xmm3,xmm6 -end if + pmaxsw xmm1,xmm3 movd eax,xmm1 @@ -469,7 +642,7 @@ end if mov eax,[eax] mov [edi+4],eax - cmp [dr_flag],12 + cmp .dr_mod,11 jl @f add esi,2 add ebx,2 @@ -487,7 +660,7 @@ end if pop ecx ;,eax mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] - cmp [dr_flag],12 + cmp .dr_mod,11 jge .e @@: movsd @@ -498,6 +671,11 @@ end if end if + + + mov esp,ebp + pop ebp + ret ;align 16 diff --git a/programs/demos/view3ds/asc.inc b/programs/demos/view3ds/asc.inc index 8a81af1432..bbebe5edfe 100644 --- a/programs/demos/view3ds/asc.inc +++ b/programs/demos/view3ds/asc.inc @@ -1,815 +1,815 @@ -; Files *.asc routines by Maciej Guba -; Thanks to Reverend for integer/float/ascii conversion examples -read_asc: - mov eax,[fptr] - .find_vert: - cmp dword[eax],'Vert' - je @f - inc eax - jmp .find_vert - @@: - add eax,4 - cmp dword[eax],'ices' - jne .find_vert - add eax,3 - @@: - inc eax - cmp byte[eax],'0' ; search end of ascii number of vertices string - jb @b - cmp byte[eax],'9' - ja @b -; eax - start ascii number - @@: - inc eax - cmp byte[eax],'0' - jb .convert1 - cmp byte[eax],'9' - ja .convert1 - jmp @b - .convert1: - dec eax - mov ebx,eax - push eax - call ascii_to_integer - mov [points_count_var],edx - pop eax - - @@: - inc eax - cmp dword[eax],'Face' - jne @b - add eax,3 - @@: - inc eax - cmp byte[eax],'0' - jb @b - cmp byte[eax],'9' - ja @b - ; eax - start ascii number - @@: - inc eax - cmp byte[eax],'0' - jb .convert2 - cmp byte[eax],'9' - ja .convert2 - jmp @b - ; eax - end ascii number - .convert2: - dec eax - mov ebx,eax - push eax - call ascii_to_integer - mov [triangles_count_var],edx - pop eax - - @@: - inc eax - cmp dword[eax],'Vert' - jnz @b - inc eax - - mov edi,[points_ptr] - xor ebx,ebx - .decode_vertices: - push ebx - @@: - inc eax - cmp dword[eax],'Vert' - jne @b - xor ecx,ecx - - .decode_coord: - push ecx - @@: - inc eax - mov dl,byte[eax] - cmp dl,byte[XYZpartices+ecx] - jne @b - @@: - inc eax - cmp byte[eax],'.' - je .readF - cmp byte[eax],'-' - je .readF - cmp byte[eax],'0' - jb @b - cmp byte[eax],'9' - ja @b -.readF: ; read float - mov esi,eax - push eax - push ecx - - call atof ; st0 - desired dword float - - pop ecx - pop eax - - fstp dword[edi] - add edi,4 - - pop ecx - inc ecx - cmp ecx,3 - jne .decode_coord - pop ebx - inc ebx - cmp ebx,[points_count_var] - jne .decode_vertices - mov dword[edi],-1 - - - - mov esi,eax - @@: - inc esi - cmp dword[esi],'Face' - jne @b - xor edx,edx - mov edi,[triangles_ptr] - cld - .decode_face: - - push edx - @@: - inc esi - cmp dword[esi],'Face' - jne @b - @@: - inc esi - cmp byte[esi],'0' ; face number start - jb @b - cmp byte[esi],'9' - ja @b - @@: - inc esi - cmp byte[esi],'0' - jb @f - cmp byte[esi],'9' ; face number end - ja @f - jmp @b - @@: - xor ecx,ecx - .next_vertex_number: - - push ecx - @@: - inc esi - cmp byte[esi],'0' - jb @b - cmp byte[esi],'9' - ja @b - ; eax - start ascii number - @@: - inc esi - cmp byte[esi],'0' - jb @f - cmp byte[esi],'9' - ja @f - jmp @b - ; eax - end ascii number - @@: - dec esi - mov ebx,esi - push esi - call ascii_to_integer - mov eax,edx - - stosd - pop esi - add esi,4 - - pop ecx - inc ecx - cmp ecx,3 - jne .next_vertex_number - pop edx - inc edx - cmp edx,[triangles_count_var] - jne .decode_face - mov dword[edi],-1 ;dword[triangles+ebx+2],-1 ; end mark - mov eax,1 ;-> mark if ok -ret - -ascii_to_integer: -; in --- [ebx] -> end of ascii string -; out -- edx -> desired number - xor edx,edx - xor ecx,ecx - .again: - movzx eax,byte[ebx] - sub al,'0' - cwde - push edx - mul dword[convert_muler+ecx] - pop edx - add edx,eax - dec ebx - cmp byte[ebx],'0' - jb .end - cmp byte[ebx],'9' - ja .end - add ecx,4 - jmp .again - @@: - - .end: -ret - -;=============================================================================== -; ASCII to float conversion procedure -; -; input: -; esi - pointer to string -; -; output: -; st0 - number changed into float -; -;=============================================================================== - -atof: - .string equ ebp-4 - - push ebp - mov ebp,esp - sub esp,32 - push eax ecx esi - mov [.string],esi - fninit - fldz - fldz - - cld - cmp byte [esi], '-' - jnz @F - inc esi - @@: - xor eax, eax - align 4 - .loop.integer_part: - lodsb - cmp al, '.' - jz .mantisa - cmp al,'0' - jb .exit - cmp al,'9' - ja .exit - fimul [i10] - sub al, '0' - push eax - fiadd dword [esp] - add esp, 4 - jmp .loop.integer_part - - .mantisa: - xor ecx, ecx - xor eax, eax - cld - fxch st1 - @@: - - lodsb - cmp al,'0' - jb .exit - cmp al,'9' - ja .exit - cmp ecx,7*4 - je .exit ; max 7 digits in mantisa - sub al,'0' - push eax - fild dword[esp] - fidiv dword[convert_muler+4+ecx] - faddp - add esp,4 - add ecx,4 - jmp @b - .exit: - faddp - - mov eax, [.string] - cmp byte [eax], '-' - jnz @F - fchs - @@: - cld - stc ; always returns no error - pop esi ecx eax - mov esp,ebp - pop ebp - ret - - -itoa: ; unsigned dword integer to ascii procedure -; in eax - variable -; esi - Pointer to ascii string -; out esi - desired ascii string -; edi - end of ascii string - ptr to memory - .temp_string equ dword[ebp-36] - .ptr equ dword[ebp-40] - .var equ dword[ebp-44] - push ecx - push ebp - mov ebp,esp - sub esp,64 - mov .var,eax - mov eax,-1 - lea edi,.temp_string - cld - mov ecx,9 - rep stosd ; make floor - - - mov .ptr,esi - lea edi,.temp_string - add edi,34 - std - xor eax,eax - stosb ; mark begin - mov eax,.var - mov esi,10 - @@: - xor edx,edx - div esi - xchg eax,edx - add al,'0' - stosb - xchg eax,edx - or eax,eax - jnz @b - stosb ; mark end - - lea esi,.temp_string - cld - @@: - lodsb - or al,al - jnz @b - - mov edi,.ptr - @@: - lodsb - stosb - or al,al - jnz @b - - mov esp,ebp - pop ebp - pop ecx -ret -if 1 -ftoa_mac: -; in : esi - pointer to dword float -; edi - pointer to ascii string - .ptr_f equ dword[ebp-4] - .sign equ dword[ebp-8] ; 0 -> less than zero, 1 - otherwise - .ptr_ascii equ dword[ebp-12] - .integer equ dword[ebp-20] - .fraction equ dword[ebp-28] - .status_orginal equ word[ebp-32] - .status_changed equ word[ebp-34] - push ecx - push ebp - mov ebp,esp - sub esp,64 - fninit - fnstcw .status_orginal - mov ax, .status_orginal - or ax, 0000110000000000b - mov .status_changed, ax - fldcw .status_changed -; -------------------------------- -; check if signed - xor eax, eax - fld dword[esi] - fst .sign - test .sign, 80000000h - setz al - mov .sign, eax - - mov .ptr_f,esi - mov .ptr_ascii,edi - fabs - fld st0 - frndint - fist .integer - fsubp st1, st0 - - mov eax,.integer - mov esi,.ptr_ascii - call itoa - ; edi -> ptr to end of ascii string - dec edi - mov al,'.' - stosb - - mov ecx, 6 ; max 6 digits in fraction part - .loop: - fimul [i10] - fld st0 - frndint - fist .fraction - fsubp st1, st0 - mov esi,edi - mov eax,.fraction - add al,'0' - stosb - ftst - fnstsw ax - test ax, 0100000000000000b - jz @F - test ax, 0000010100000000b - jz .finish - @@: - loop .loop -if 0 - fldcw .status_orginal - fimul [i10] - fist .fraction - ; mov esi,edi - mov eax,.fraction - add al,'0' - stosb - ; call itoa -; -------------------------------- -; restore previous values - .finish: - ; fstp st0 - ffree st - mov eax,.fraction - mov esi,edi -; call itoa - - add al,'0' - stosb -end if - .finish: - ffree st - cmp .sign,0 - jnz @f - mov esi,.ptr_ascii - dec esi - mov byte[esi],'-' - @@: - mov esp,ebp - pop ebp - pop ecx - -ret -end if -if 0 -;=============================================================================== -; float to ASCII conversion procedure -; -; input: -; buffer - pointer to memory where output will be saved -; precision - number of digits after dot -; -; output: -; no immediate output -; -; notes: -; separate integer and mantisa part with dot '.' -; so GOOD 123.456 -; WRONG 123,456 -; -; coded by Reverend // HTB + RAG -;=============================================================================== -proc ftoa buffer, precision -locals - status_original dw ? - status_changed dw ? - integer dd ? - mantisa dd ? - signed dd ? -endl - push eax ecx;edi ecx -; -------------------------------- -; set correct precision - mov eax, [precision] - cmp eax, 51 - jb @F - mov eax, 51 - @@: - mov [precision], eax -; -------------------------------- -; change control wortd of fpu to prevent rounding - fnstcw [status_original] - mov ax, [status_original] - or ax, 0000110000000000b - mov [status_changed], ax - fldcw [status_changed] -; -------------------------------- -; check if signed - xor eax, eax - fst [signed] - test [signed], 80000000h - setnz al - mov [signed], eax -; -------------------------------- -; cut integer and mantisa separately - fld st0 - fld st0 ; st0 = x, st1 = x - frndint - fist [integer] ; st0 = x, st1 = x - fabs - fsubp st1, st0 ; st0 = mantisa(x) -; -------------------------------- -; save integer part in buffer - ; mov edi, [buffer] - mov esi,[buffer] - ; push [signed] - ; push edi - ; push 10 - ; push [integer] - mov eax,[integer] - call itoa - ; add edi, eax - mov al, '.' - stosb - mov esi,edi -; -------------------------------- -; save mantisa part in buffer - mov ecx, [precision] - dec ecx - .loop: - fimul [i10] - fld st0 - frndint - fist [mantisa] - fsubp st1, st0 -; push 0 -; push edi -; push 10 -; push [mantisa] - mov esi,edi - mov eax,[mantisa] - call itoa - ; add edi, eax - ftst - fnstsw ax - test ax, 0100000000000000b - jz @F - test ax, 0000010100000000b - jz .finish - @@: - loop .loop - fldcw [status_original] - fimul [i10] - fist [mantisa] -; push 0 -; push edi -; push 10 -; push [mantisa] - mov esi,edi - mov eax,[mantisa] - call itoa -; -------------------------------- -; restore previous values - .finish: - fstp st0 - cmp [signed],1 - jnz @f - mov byte[buffer],'-' - @@: - stc - pop ecx eax ;edi eax - ret -endp -end if -if 0 -write_asc: - .counter equ dword[ebp-4] - push ebp - mov ebp,esp - sub esp,64 - fninit - mov edi,asc_file_buffer - mov esi,asc_main_header - cld - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: - - mov esi,asc_info_header - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: - push esi ; -> position in header info - movzx eax,[points_count_var] - mov esi,edi - call itoa ; unsigned dword integer to ascii procedure - pop esi - inc esi - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: - push esi - movzx eax,[triangles_count_var] - mov esi,edi - call itoa - pop esi - inc esi - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: -;============================================================= -;================vertex list parser=========================== -;============================================================= - - xor ecx,ecx - .again_vertex: - push ecx - mov esi,asc_one_vertex_formula - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: - mov eax,ecx - ; push ecx - push esi - mov esi,edi - call itoa - pop esi -; pop ecx - inc esi - xor ebx,ebx - .next_vertex_coef: - push ebx - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: - ; int3 - push esi - lea esi,[ecx*3] - shl esi,2 - add esi,points_r - add esi,ebx - ; int3 - call ftoa_mac - ; std - ; fld dword[esi] - - - ; pushad - ; stdcall ftoa, edi, 30 - ; popad - ; add edi,20 - - - pop esi - pop ebx - add ebx,4 - cmp ebx,12 - jnz .next_vertex_coef - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: - pop ecx - inc ecx - cmp cx,[points_count_var] - jnz .again_vertex - - - ; mov edi,[temp_edi] - - - mov esi,asc_face_list_header - @@: - lodsb - cmp al,1 ; all face header - jz @f - stosb - jmp @b - @@: - ;===================================== - ; ==============face list parser====== - ;===================================== - xor ecx,ecx - .again_face: - push ecx - mov .counter,ecx - mov esi,asc_one_face_formula - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: - mov eax,ecx - push esi - mov esi,edi - call itoa - pop esi - inc esi - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: - - xor ebx,ebx - .next_face_index: - push ebx - mov ecx,.counter - lea ecx,[ecx*3] - add ecx,ecx - movzx eax,word[triangles+ecx+ebx] - push esi - mov esi,edi - call itoa - pop esi - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: - pop ebx - add ebx,2 - cmp ebx,6 - jnz .next_face_index - -; push esi - mov esi,asc_material - @@: - lodsb - cmp al,1 - jz @f - stosb - jmp @b - @@: -; pop esi - - pop ecx - inc ecx - cmp cx,[triangles_count_var] - jnz .again_face - - ; write file - sub edi,asc_file_buffer - ; mov [file_buffer+2],edi - mov [FileSize],edi - - invoke CreateFile,asc_file_name, GENERIC_WRITE, 0, 0,CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0 - mov [hfile],eax - invoke WriteFile,eax,asc_file_buffer,[FileSize], byteswritten, 0 - invoke CloseHandle,[hfile] - - mov esp,ebp - pop ebp -ret -end if -if 0 -asc_file_buffer: - rd 65536 -ascii_string rb 50 -ftoa_muler dd 1000000000 -file_size dd ? -file_handle dd ? -end if - -if 0 -convert_muler: -dd 1, 10, 100, 1000, 10000 -XYZpartices: -db 'X','Y','Z' -i10 dw 10 -points_count_var dd ? -triangles_count_var dd ? -points rb 100 -triangles rb 100 -asc_file: - -file "2TORUS.ASC" +; Files *.asc routines by Maciej Guba +; Thanks to Reverend for integer/float/ascii conversion examples +read_asc: + mov eax,[fptr] + .find_vert: + cmp dword[eax],'Vert' + je @f + inc eax + jmp .find_vert + @@: + add eax,4 + cmp dword[eax],'ices' + jne .find_vert + add eax,3 + @@: + inc eax + cmp byte[eax],'0' ; search end of ascii number of vertices string + jb @b + cmp byte[eax],'9' + ja @b +; eax - start ascii number + @@: + inc eax + cmp byte[eax],'0' + jb .convert1 + cmp byte[eax],'9' + ja .convert1 + jmp @b + .convert1: + dec eax + mov ebx,eax + push eax + call ascii_to_integer + mov [points_count_var],edx + pop eax + + @@: + inc eax + cmp dword[eax],'Face' + jne @b + add eax,3 + @@: + inc eax + cmp byte[eax],'0' + jb @b + cmp byte[eax],'9' + ja @b + ; eax - start ascii number + @@: + inc eax + cmp byte[eax],'0' + jb .convert2 + cmp byte[eax],'9' + ja .convert2 + jmp @b + ; eax - end ascii number + .convert2: + dec eax + mov ebx,eax + push eax + call ascii_to_integer + mov [triangles_count_var],edx + pop eax + + @@: + inc eax + cmp dword[eax],'Vert' + jnz @b + inc eax + + mov edi,[points_ptr] + xor ebx,ebx + .decode_vertices: + push ebx + @@: + inc eax + cmp dword[eax],'Vert' + jne @b + xor ecx,ecx + + .decode_coord: + push ecx + @@: + inc eax + mov dl,byte[eax] + cmp dl,byte[XYZpartices+ecx] + jne @b + @@: + inc eax + cmp byte[eax],'.' + je .readF + cmp byte[eax],'-' + je .readF + cmp byte[eax],'0' + jb @b + cmp byte[eax],'9' + ja @b +.readF: ; read float + mov esi,eax + push eax + push ecx + + call atof ; st0 - desired dword float + + pop ecx + pop eax + + fstp dword[edi] + add edi,4 + + pop ecx + inc ecx + cmp ecx,3 + jne .decode_coord + pop ebx + inc ebx + cmp ebx,[points_count_var] + jne .decode_vertices + mov dword[edi],-1 + + + + mov esi,eax + @@: + inc esi + cmp dword[esi],'Face' + jne @b + xor edx,edx + mov edi,[triangles_ptr] + cld + .decode_face: + + push edx + @@: + inc esi + cmp dword[esi],'Face' + jne @b + @@: + inc esi + cmp byte[esi],'0' ; face number start + jb @b + cmp byte[esi],'9' + ja @b + @@: + inc esi + cmp byte[esi],'0' + jb @f + cmp byte[esi],'9' ; face number end + ja @f + jmp @b + @@: + xor ecx,ecx + .next_vertex_number: + + push ecx + @@: + inc esi + cmp byte[esi],'0' + jb @b + cmp byte[esi],'9' + ja @b + ; eax - start ascii number + @@: + inc esi + cmp byte[esi],'0' + jb @f + cmp byte[esi],'9' + ja @f + jmp @b + ; eax - end ascii number + @@: + dec esi + mov ebx,esi + push esi + call ascii_to_integer + mov eax,edx + + stosd + pop esi + add esi,4 + + pop ecx + inc ecx + cmp ecx,3 + jne .next_vertex_number + pop edx + inc edx + cmp edx,[triangles_count_var] + jne .decode_face + mov dword[edi],-1 ;dword[triangles+ebx+2],-1 ; end mark + mov eax,1 ;-> mark if ok +ret + +ascii_to_integer: +; in --- [ebx] -> end of ascii string +; out -- edx -> desired number + xor edx,edx + xor ecx,ecx + .again: + movzx eax,byte[ebx] + sub al,'0' + cwde + push edx + mul dword[convert_muler+ecx] + pop edx + add edx,eax + dec ebx + cmp byte[ebx],'0' + jb .end + cmp byte[ebx],'9' + ja .end + add ecx,4 + jmp .again + @@: + + .end: +ret + +;=============================================================================== +; ASCII to float conversion procedure +; +; input: +; esi - pointer to string +; +; output: +; st0 - number changed into float +; +;=============================================================================== + +atof: + .string equ ebp-4 + + push ebp + mov ebp,esp + sub esp,32 + push eax ecx esi + mov [.string],esi + fninit + fldz + fldz + + cld + cmp byte [esi], '-' + jnz @F + inc esi + @@: + xor eax, eax + align 4 + .loop.integer_part: + lodsb + cmp al, '.' + jz .mantisa + cmp al,'0' + jb .exit + cmp al,'9' + ja .exit + fimul [i10] + sub al, '0' + push eax + fiadd dword [esp] + add esp, 4 + jmp .loop.integer_part + + .mantisa: + xor ecx, ecx + xor eax, eax + cld + fxch st1 + @@: + + lodsb + cmp al,'0' + jb .exit + cmp al,'9' + ja .exit + cmp ecx,7*4 + je .exit ; max 7 digits in mantisa + sub al,'0' + push eax + fild dword[esp] + fidiv dword[convert_muler+4+ecx] + faddp + add esp,4 + add ecx,4 + jmp @b + .exit: + faddp + + mov eax, [.string] + cmp byte [eax], '-' + jnz @F + fchs + @@: + cld + stc ; always returns no error + pop esi ecx eax + mov esp,ebp + pop ebp + ret + + +itoa: ; unsigned dword integer to ascii procedure +; in eax - variable +; esi - Pointer to ascii string +; out esi - desired ascii string +; edi - end of ascii string - ptr to memory + .temp_string equ dword[ebp-36] + .ptr equ dword[ebp-40] + .var equ dword[ebp-44] + push ecx + push ebp + mov ebp,esp + sub esp,64 + mov .var,eax + mov eax,-1 + lea edi,.temp_string + cld + mov ecx,9 + rep stosd ; make floor + + + mov .ptr,esi + lea edi,.temp_string + add edi,34 + std + xor eax,eax + stosb ; mark begin + mov eax,.var + mov esi,10 + @@: + xor edx,edx + div esi + xchg eax,edx + add al,'0' + stosb + xchg eax,edx + or eax,eax + jnz @b + stosb ; mark end + + lea esi,.temp_string + cld + @@: + lodsb + or al,al + jnz @b + + mov edi,.ptr + @@: + lodsb + stosb + or al,al + jnz @b + + mov esp,ebp + pop ebp + pop ecx +ret +if 1 +ftoa_mac: +; in : esi - pointer to dword float +; edi - pointer to ascii string + .ptr_f equ dword[ebp-4] + .sign equ dword[ebp-8] ; 0 -> less than zero, 1 - otherwise + .ptr_ascii equ dword[ebp-12] + .integer equ dword[ebp-20] + .fraction equ dword[ebp-28] + .status_orginal equ word[ebp-32] + .status_changed equ word[ebp-34] + push ecx + push ebp + mov ebp,esp + sub esp,64 + fninit + fnstcw .status_orginal + mov ax, .status_orginal + or ax, 0000110000000000b + mov .status_changed, ax + fldcw .status_changed +; -------------------------------- +; check if signed + xor eax, eax + fld dword[esi] + fst .sign + test .sign, 80000000h + setz al + mov .sign, eax + + mov .ptr_f,esi + mov .ptr_ascii,edi + fabs + fld st0 + frndint + fist .integer + fsubp st1, st0 + + mov eax,.integer + mov esi,.ptr_ascii + call itoa + ; edi -> ptr to end of ascii string + dec edi + mov al,'.' + stosb + + mov ecx, 6 ; max 6 digits in fraction part + .loop: + fimul [i10] + fld st0 + frndint + fist .fraction + fsubp st1, st0 + mov esi,edi + mov eax,.fraction + add al,'0' + stosb + ftst + fnstsw ax + test ax, 0100000000000000b + jz @F + test ax, 0000010100000000b + jz .finish + @@: + loop .loop +if 0 + fldcw .status_orginal + fimul [i10] + fist .fraction + ; mov esi,edi + mov eax,.fraction + add al,'0' + stosb + ; call itoa +; -------------------------------- +; restore previous values + .finish: + ; fstp st0 + ffree st + mov eax,.fraction + mov esi,edi +; call itoa + + add al,'0' + stosb +end if + .finish: + ffree st + cmp .sign,0 + jnz @f + mov esi,.ptr_ascii + dec esi + mov byte[esi],'-' + @@: + mov esp,ebp + pop ebp + pop ecx + +ret +end if +if 0 +;=============================================================================== +; float to ASCII conversion procedure +; +; input: +; buffer - pointer to memory where output will be saved +; precision - number of digits after dot +; +; output: +; no immediate output +; +; notes: +; separate integer and mantisa part with dot '.' +; so GOOD 123.456 +; WRONG 123,456 +; +; coded by Reverend // HTB + RAG +;=============================================================================== +proc ftoa buffer, precision +locals + status_original dw ? + status_changed dw ? + integer dd ? + mantisa dd ? + signed dd ? +endl + push eax ecx;edi ecx +; -------------------------------- +; set correct precision + mov eax, [precision] + cmp eax, 51 + jb @F + mov eax, 51 + @@: + mov [precision], eax +; -------------------------------- +; change control wortd of fpu to prevent rounding + fnstcw [status_original] + mov ax, [status_original] + or ax, 0000110000000000b + mov [status_changed], ax + fldcw [status_changed] +; -------------------------------- +; check if signed + xor eax, eax + fst [signed] + test [signed], 80000000h + setnz al + mov [signed], eax +; -------------------------------- +; cut integer and mantisa separately + fld st0 + fld st0 ; st0 = x, st1 = x + frndint + fist [integer] ; st0 = x, st1 = x + fabs + fsubp st1, st0 ; st0 = mantisa(x) +; -------------------------------- +; save integer part in buffer + ; mov edi, [buffer] + mov esi,[buffer] + ; push [signed] + ; push edi + ; push 10 + ; push [integer] + mov eax,[integer] + call itoa + ; add edi, eax + mov al, '.' + stosb + mov esi,edi +; -------------------------------- +; save mantisa part in buffer + mov ecx, [precision] + dec ecx + .loop: + fimul [i10] + fld st0 + frndint + fist [mantisa] + fsubp st1, st0 +; push 0 +; push edi +; push 10 +; push [mantisa] + mov esi,edi + mov eax,[mantisa] + call itoa + ; add edi, eax + ftst + fnstsw ax + test ax, 0100000000000000b + jz @F + test ax, 0000010100000000b + jz .finish + @@: + loop .loop + fldcw [status_original] + fimul [i10] + fist [mantisa] +; push 0 +; push edi +; push 10 +; push [mantisa] + mov esi,edi + mov eax,[mantisa] + call itoa +; -------------------------------- +; restore previous values + .finish: + fstp st0 + cmp [signed],1 + jnz @f + mov byte[buffer],'-' + @@: + stc + pop ecx eax ;edi eax + ret +endp +end if +if 0 +write_asc: + .counter equ dword[ebp-4] + push ebp + mov ebp,esp + sub esp,64 + fninit + mov edi,asc_file_buffer + mov esi,asc_main_header + cld + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: + + mov esi,asc_info_header + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: + push esi ; -> position in header info + movzx eax,[points_count_var] + mov esi,edi + call itoa ; unsigned dword integer to ascii procedure + pop esi + inc esi + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: + push esi + movzx eax,[triangles_count_var] + mov esi,edi + call itoa + pop esi + inc esi + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: +;============================================================= +;================vertex list parser=========================== +;============================================================= + + xor ecx,ecx + .again_vertex: + push ecx + mov esi,asc_one_vertex_formula + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: + mov eax,ecx + ; push ecx + push esi + mov esi,edi + call itoa + pop esi +; pop ecx + inc esi + xor ebx,ebx + .next_vertex_coef: + push ebx + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: + ; int3 + push esi + lea esi,[ecx*3] + shl esi,2 + add esi,points_r + add esi,ebx + ; int3 + call ftoa_mac + ; std + ; fld dword[esi] + + + ; pushad + ; stdcall ftoa, edi, 30 + ; popad + ; add edi,20 + + + pop esi + pop ebx + add ebx,4 + cmp ebx,12 + jnz .next_vertex_coef + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: + pop ecx + inc ecx + cmp cx,[points_count_var] + jnz .again_vertex + + + ; mov edi,[temp_edi] + + + mov esi,asc_face_list_header + @@: + lodsb + cmp al,1 ; all face header + jz @f + stosb + jmp @b + @@: + ;===================================== + ; ==============face list parser====== + ;===================================== + xor ecx,ecx + .again_face: + push ecx + mov .counter,ecx + mov esi,asc_one_face_formula + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: + mov eax,ecx + push esi + mov esi,edi + call itoa + pop esi + inc esi + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: + + xor ebx,ebx + .next_face_index: + push ebx + mov ecx,.counter + lea ecx,[ecx*3] + add ecx,ecx + movzx eax,word[triangles+ecx+ebx] + push esi + mov esi,edi + call itoa + pop esi + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: + pop ebx + add ebx,2 + cmp ebx,6 + jnz .next_face_index + +; push esi + mov esi,asc_material + @@: + lodsb + cmp al,1 + jz @f + stosb + jmp @b + @@: +; pop esi + + pop ecx + inc ecx + cmp cx,[triangles_count_var] + jnz .again_face + + ; write file + sub edi,asc_file_buffer + ; mov [file_buffer+2],edi + mov [FileSize],edi + + invoke CreateFile,asc_file_name, GENERIC_WRITE, 0, 0,CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0 + mov [hfile],eax + invoke WriteFile,eax,asc_file_buffer,[FileSize], byteswritten, 0 + invoke CloseHandle,[hfile] + + mov esp,ebp + pop ebp +ret +end if +if 0 +asc_file_buffer: + rd 65536 +ascii_string rb 50 +ftoa_muler dd 1000000000 +file_size dd ? +file_handle dd ? +end if + +if 0 +convert_muler: +dd 1, 10, 100, 1000, 10000 +XYZpartices: +db 'X','Y','Z' +i10 dw 10 +points_count_var dd ? +triangles_count_var dd ? +points rb 100 +triangles rb 100 +asc_file: + +file "2TORUS.ASC" end if \ No newline at end of file diff --git a/programs/demos/view3ds/b_procs.inc b/programs/demos/view3ds/b_procs.inc index c79774c834..20a30c5207 100644 --- a/programs/demos/view3ds/b_procs.inc +++ b/programs/demos/view3ds/b_procs.inc @@ -737,22 +737,33 @@ ret blur_screen: ;blur n times ; blur or fire ;in - ecx times count -;.counter equ dword[esp-4] -.counter1 equ dword[esp-8] +; ax - render mode + + .val equ dword[ebp-4] + .dr_model equ word[ebp-6] + .fire equ dword[ebp-10] + if Ext>=SSE2 push ebp mov ebp,esp - push dword 0x01010101 - movss xmm5,[esp] + sub esp,10 + ; xorps xmm5,xmm5 + ; or edx,edx + ; jz @f + mov .val,0x01010101 + movss xmm5,.val shufps xmm5,xmm5,0 + @@: + mov .dr_model,ax + + .again_blur: push ecx mov edi,[screen_ptr] movzx ecx,word[size_x_var] ;SIZE_X*3/4 - - cmp [dr_flag],12 + cmp .dr_model,11 jge @f - lea ecx,[ecx*3+1] + lea ecx,[ecx*3+3] shr ecx,2 @@: @@ -763,11 +774,11 @@ if Ext>=SSE2 movzx ecx,word[size_y_var] sub ecx,3 imul ecx,ebx - cmp [dr_flag],12 ; 32 bit per pix cause + cmp .dr_model,11 ; 32 bit per pix cause jge @f lea ecx,[ecx*3] shr ecx,4 - lea ebx,[ebx *3] + lea ebx,[ebx*3] jmp .blr @@: @@ -781,7 +792,7 @@ if Ext>=SSE2 mov ecx,edi sub ecx,ebx movups xmm1,[ecx] - cmp [dr_flag],12 + cmp .dr_model,12 jge @f movups xmm2,[edi-3] movups xmm3,[edi+3] @@ -802,9 +813,9 @@ if Ext>=SSE2 end if xor eax,eax movzx ecx,word[size_x_var] - cmp [dr_flag],12 + cmp .dr_model,11 jge @f - lea ecx,[ecx*3] + lea ecx,[ecx*3+3] shr ecx,2 @@: ; mov ecx,SIZE_X*3/4 diff --git a/programs/demos/view3ds/bump_cat.inc b/programs/demos/view3ds/bump_cat.inc index 574b391fbd..7f6c97727d 100644 --- a/programs/demos/view3ds/bump_cat.inc +++ b/programs/demos/view3ds/bump_cat.inc @@ -1,1132 +1,1132 @@ -;SIZE_X equ 350 -;SIZE_Y equ 350 -ROUND equ 8 -;TEX_X equ 512 -;TEX_Y equ 512 -;TEXTURE_SIZE EQU (512*512)-1 -;TEX_SHIFT EQU 9 -CATMULL_SHIFT equ 8 -;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 -;Ext = NON -;MMX = 1 -;NON = 0 -;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- -;------- DOS 13h mode demos -------------------------------------------- -;------- Procedure draws bump triangle using Catmull Z-buffer algorithm- -;------- (Z coordinate interpolation)----------------------------------- -bump_triangle_z: -;------------------in - eax - x1 shl 16 + y1 ----------- -;---------------------- ebx - x2 shl 16 + y2 ----------- -;---------------------- ecx - x3 shl 16 + y3 ----------- -;---------------------- edx - pointer to bump map ------ -;---------------------- esi - pointer to environment map -;---------------------- edi - pointer to screen buffer-- -;---------------------- stack : bump coordinates-------- -;---------------------- environment coordinates- -;---------------------- Z position coordinates-- -;---------------------- pointer io Z buffer----- -;-- Z-buffer - filled with coordinates as dword -------- -;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- -.b_x1 equ ebp+4 ; procedure don't save registers !!! -.b_y1 equ ebp+6 ; each coordinate as word -.b_x2 equ ebp+8 -.b_y2 equ ebp+10 -.b_x3 equ ebp+12 -.b_y3 equ ebp+14 -.e_x1 equ ebp+16 -.e_y1 equ ebp+18 -.e_x2 equ ebp+20 -.e_y2 equ ebp+22 -.e_x3 equ ebp+24 -.e_y3 equ ebp+26 -.z1 equ word[ebp+28] -.z2 equ word[ebp+30] -.z3 equ word[ebp+32] -.z_buff equ dword[ebp+34] ; pointer to Z-buffer - - -.t_bmap equ dword[ebp-4] ; pointer to bump map -.t_emap equ dword[ebp-8] ; pointer to e. map -.x1 equ word[ebp-10] -.y1 equ word[ebp-12] -.x2 equ word[ebp-14] -.y2 equ word[ebp-16] -.x3 equ word[ebp-18] -.y3 equ word[ebp-20] - -.dx12 equ dword[ebp-24] -.dz12 equ [ebp-28] -.dbx12 equ dword[ebp-32] -.dby12 equ [ebp-36] -.dex12 equ dword[ebp-40] -.dey12 equ [ebp-44] - -.dx13 equ dword[ebp-48] -.dz13 equ [ebp-52] -.dbx13 equ dword[ebp-56] -.dby13 equ [ebp-60] -.dex13 equ dword[ebp-64] -.dey13 equ [ebp-68] - -.dx23 equ dword[ebp-72] -.dz23 equ [ebp-76] -.dbx23 equ dword[ebp-80] -.dby23 equ [ebp-84] -.dex23 equ dword[ebp-88] -.dey23 equ [ebp-92] - -.cx1 equ dword[ebp-96] ; current variables -.cz1 equ [ebp-100] -.cx2 equ dword[ebp-104] -.cz2 equ [ebp-108] -.cbx1 equ dword[ebp-112] -.cby1 equ [ebp-116] -.cex1 equ dword[ebp-120] -.cey1 equ [ebp-124] -.cbx2 equ dword[ebp-128] -.cby2 equ [ebp-132] -.cex2 equ dword[ebp-136] -.cey2 equ [ebp-140] - - mov ebp,esp - push edx ; store bump map - push esi ; store e. map - ; sub esp,120 - .sort3: ; sort triangle coordinates... - cmp ax,bx - jle .sort1 - xchg eax,ebx - mov edx,dword[.b_x1] - xchg edx,dword[.b_x2] - mov dword[.b_x1],edx - mov edx,dword[.e_x1] - xchg edx,dword[.e_x2] - mov dword[.e_x1],edx - mov dx,.z1 - xchg dx,.z2 - mov .z1,dx - .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - mov edx,dword[.b_x2] - xchg edx,dword[.b_x3] - mov dword[.b_x2],edx - mov edx,dword[.e_x2] - xchg edx,dword[.e_x3] - mov dword[.e_x2],edx - mov dx,.z2 - xchg dx,.z3 - mov .z2,dx - jmp .sort3 - .sort2: - push eax ; store triangle coords in variables - push ebx - push ecx - - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .loop23_done - ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that - ; or edx,ebx ; if any *one* of them is negative a sign flag is raised - ; or edx,ecx - ; test edx,80000000h ; Check only X - ; jne .loop23_done - - ; cmp .x1,SIZE_X ; { - ; jg .loop23_done - ; cmp .x2,SIZE_X ; This can be optimized with effort - ; jg .loop23_done - ; cmp .x3,SIZE_X - ; jg .loop23_done ; { - - - mov bx,.y2 ; calc delta 12 - sub bx,.y1 - jnz .bt_dx12_make - mov ecx,6 - xor edx,edx - @@: - push edx ;dword 0 - loop @b - jmp .bt_dx12_done - .bt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx12,eax - push eax - - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax -if Ext>=SSE - - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps - ; mov eax,255 - cvtsi2ss xmm4,[i255d] - divss xmm3,xmm4 - rcpss xmm3,xmm3 - ; mulss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x1] - movd mm1,[.b_x2] - movd mm2,[.e_x1] - movd mm3,[.e_x2] - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - psubd mm1,mm0 - psubd mm3,mm2 - - ; cvtpi2ps xmm0,mm0 - ; movlhps xmm0,xmm0 - ; cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - ; subps xmm1,xmm0 - - -; cvtpi2ps xmm0,mm3 - ; divps xmm1,xmm3 - mulps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey12,mm0 - movq .dby12,mm1 - - -else - - mov ax,word[.b_x2] - sub ax,word[.b_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx12,eax - push eax - - mov ax,word[.b_y2] - sub ax,word[.b_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby12,eax - push eax - - mov ax,word[.e_x2] - sub ax,word[.e_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex12,eax - push eax - - mov ax,word[.e_y2] - sub ax,word[.e_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey12,eax - push eax - -end if - - .bt_dx12_done: - - mov bx,.y3 ; calc delta13 - sub bx,.y1 - jnz .bt_dx13_make - mov ecx,6 - xor edx,edx - @@: - push edx ;dword 0 - loop @b - jmp .bt_dx13_done - .bt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx13,eax - push eax - - mov ax,.z3 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz13,eax - push eax - -if Ext>=SSE - - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps - ; mov eax,255 - cvtsi2ss xmm4,[i255d] - divss xmm3,xmm4 - rcpss xmm3,xmm3 - ; mulss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x1] - movd mm1,[.b_x3] - movd mm2,[.e_x1] - movd mm3,[.e_x3] - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - psubd mm1,mm0 - psubd mm3,mm2 - - ; cvtpi2ps xmm0,mm0 - ; movlhps xmm0,xmm0 - ; cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - ; subps xmm1,xmm0 - - -; cvtpi2ps xmm0,mm3 - ; divps xmm1,xmm3 - mulps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey13,mm0 - movq .dby13,mm1 - - -else - - mov ax,word[.b_x3] - sub ax,word[.b_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx13,eax - push eax - - mov ax,word[.b_y3] - sub ax,word[.b_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby13,eax - push eax - - mov ax,word[.e_x3] - sub ax,word[.e_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex13,eax - push eax - - mov ax,word[.e_y3] - sub ax,word[.e_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey13,eax - push eax -end if - - .bt_dx13_done: - - mov bx,.y3 ; calc delta23 - sub bx,.y2 - jnz .bt_dx23_make - mov ecx,6 - xor edx,edx - @@: - push edx ;dword 0 - loop @b - jmp .bt_dx23_done - .bt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx23,eax - push eax - - mov ax,.z3 - sub ax,.z2 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz23,eax - push eax - ; sub esp,40 -if Ext>=SSE - - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps - ; mov eax,255 - cvtsi2ss xmm4,[i255d] - divss xmm3,xmm4 - rcpss xmm3,xmm3 - ; mulss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x2] - movd mm1,[.b_x3] - movd mm2,[.e_x2] - movd mm3,[.e_x3] - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - psubd mm1,mm0 - psubd mm3,mm2 - -; cvtpi2ps xmm0,mm0 -; movlhps xmm0,xmm0 -; cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 -; subps xmm1,xmm0 - - -; cvtpi2ps xmm0,mm3 - ; divps xmm1,xmm3 - mulps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey23,mm0 - movq .dby23,mm1 - -else - - mov ax,word[.b_x3] - sub ax,word[.b_x2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx23,eax - push eax - - mov ax,word[.b_y3] - sub ax,word[.b_y2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby23,eax - push eax - - mov ax,word[.e_x3] - sub ax,word[.e_x2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex23,eax - push eax - - mov ax,word[.e_y3] - sub ax,word[.e_y2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey23,eax - push eax - -end if - - .bt_dx23_done: - sub esp,48 - - movsx eax,.x1 - shl eax,ROUND - mov .cx1,eax - mov .cx2,eax - ; push eax - ; push eax - - movsx eax,word[.b_x1] - shl eax,ROUND - mov .cbx1,eax - mov .cbx2,eax - ; push eax - ; push eax - - movsx eax,word[.b_y1] - shl eax,ROUND - mov .cby1,eax - mov .cby2,eax - ; push eax - ; push eax - - movsx eax,word[.e_x1] - shl eax,ROUND - mov .cex1,eax - mov .cex2,eax - ; push eax - ; push eax - - movsx eax,word[.e_y1] - shl eax,ROUND - mov .cey1,eax - mov .cey2,eax - ; push eax - ; push eax - - movsx eax,.z1 - shl eax,CATMULL_SHIFT - mov .cz1,eax - mov .cz2,eax - ; push eax - ; push eax - - movsx ecx,.y1 - cmp cx,.y2 - jge .loop12_done - .loop12: - call .call_bump_line -if Ext >= SSE2 - movups xmm0,.cey2 - movups xmm1,.cey1 - movups xmm2,.dey12 - movups xmm3,.dey13 - paddd xmm0,xmm2 - paddd xmm1,xmm3 - movups .cey2,xmm0 - movups .cey1,xmm1 - movq mm4,.cz1 - movq mm5,.cz2 - paddd mm4,.dz13 - paddd mm5,.dz12 - movq .cz1,mm4 - movq .cz2,mm5 -end if - - -if (Ext = MMX) | (Ext = SSE) - movq mm0,.cby2 - movq mm1,.cby1 - movq mm2,.cey2 - movq mm3,.cey1 - movq mm4,.cz1 - movq mm5,.cz2 - paddd mm0,.dby12 - paddd mm1,.dby13 - paddd mm2,.dey12 - paddd mm3,.dey13 - paddd mm4,.dz13 - paddd mm5,.dz12 - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey1,mm3 - movq .cey2,mm2 - movq .cz1,mm4 - movq .cz2,mm5 -else if Ext = NON - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx12 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby12 - add .cby2,edx - - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex12 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey12 - add .cey2,eax - - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx12 - add .cx2,ebx - - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz12 - add .cz2,edx -end if - inc ecx - cmp cx,.y2 - jl .loop12 - .loop12_done: - - movsx ecx,.y2 - cmp cx,.y3 - jge .loop23_done - - movsx eax,.z2 - shl eax,CATMULL_SHIFT - mov .cz2,eax - - movsx eax,.x2 - shl eax,ROUND - mov .cx2,eax - - movzx eax,word[.b_x2] - shl eax,ROUND - mov .cbx2,eax - - movzx eax,word[.b_y2] - shl eax,ROUND - mov .cby2,eax - - movzx eax,word[.e_x2] - shl eax,ROUND - mov .cex2,eax - - movzx eax,word[.e_y2] - shl eax,ROUND - mov .cey2,eax - - .loop23: - call .call_bump_line -if Ext >= SSE2 - movups xmm0,.cey2 - movups xmm1,.cey1 - movups xmm2,.dey23 - movups xmm3,.dey13 - paddd xmm0,xmm2 - paddd xmm1,xmm3 - movups .cey2,xmm0 - movups .cey1,xmm1 - movq mm4,.cz1 - movq mm5,.cz2 - paddd mm4,.dz13 - paddd mm5,.dz23 - movq .cz1,mm4 - movq .cz2,mm5 -end if -if (Ext = MMX) | (Ext = SSE) - movq mm0,.cby2 - movq mm1,.cby1 - movq mm2,.cey2 - movq mm3,.cey1 - movq mm4,.cz1 - movq mm5,.cz2 - paddd mm0,.dby23 - paddd mm1,.dby13 - paddd mm2,.dey23 - paddd mm3,.dey13 - paddd mm4,.dz13 - paddd mm5,.dz23 - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey1,mm3 - movq .cey2,mm2 - movq .cz1,mm4 - movq .cz2,mm5 -else if Ext = NON - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx23 - add .cx2,ebx - - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx23 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby23 - add .cby2,edx - - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex23 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey23 - add .cey2,eax - - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz23 - add .cz2,edx -end if - inc ecx - cmp cx,.y3 - jl .loop23 - .loop23_done: - - mov esp,ebp -ret 34 - -.call_bump_line: - - ; push ebp - ; push ecx - pushad - - push dword .cz1 - push dword .cz2 - push .z_buff - push .t_emap - push .t_bmap - push dword .cey2 - push .cex2 - push dword .cby2 - push .cbx2 - push dword .cey1 - push .cex1 - push dword .cby1 - push .cbx1 - push ecx - - mov eax,.cx1 - sar eax,ROUND - mov ebx,.cx2 - sar ebx,ROUND - - call bump_line_z - - popad -ret -bump_line_z: -;--------------in: eax - x1 -;-------------- ebx - x2 -;-------------- edi - pointer to screen buffer -;stack - another parameters : -.y equ dword [ebp+4] -.bx1 equ [ebp+8] ; --- -.by1 equ dword [ebp+12] ; | -.ex1 equ [ebp+16] ; | -.ey1 equ dword [ebp+20] ; |> bump and env coords -.bx2 equ [ebp+24] ; |> shifted shl ROUND -.by2 equ dword [ebp+28] ; | -.ex2 equ [ebp+32] ; | -.ey2 equ dword [ebp+36] ; --- -.bmap equ dword [ebp+40] -.emap equ dword [ebp+44] -.z_buff equ dword [ebp+48] -.z2 equ dword [ebp+52] ; -- |> z coords shifted -.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT - -.x1 equ dword [ebp-4] -.x2 equ dword [ebp-8] -.dbx equ dword [ebp-12] -.dby equ [ebp-16] -.dex equ dword [ebp-20] -.dey equ [ebp-24] -.dz equ dword [ebp-28] -.cbx equ dword [ebp-32] -.cby equ [ebp-36] -.cex equ dword [ebp-40] -.cey equ [ebp-44] -.cz equ dword [ebp-48] -.czbuff equ dword [ebp-52] -.temp1 equ ebp-60 -.temp2 equ ebp-68 -.temp3 equ ebp-76 -.temp4 equ ebp-84 -.temp5 equ ebp-92 - - mov ebp,esp - - mov ecx,.y - or ecx,ecx - jl .bl_end - ; mov dx,[size_x_var] - ; dec dx - cmp cx,[size_y_var] ;SIZE_Y - jge .bl_end - - cmp eax,ebx - jl .bl_ok - je .bl_end - - xchg eax,ebx -if Ext=NON - mov edx,.bx1 - xchg edx,.bx2 - mov .bx1,edx - mov edx,.by1 - xchg edx,.by2 - mov .by1,edx - - mov edx,.ex1 - xchg edx,.ex2 - mov .ex1,edx - mov edx,.ey1 - xchg edx,.ey2 - mov .ey1,edx -end if -if Ext = MMX - movq mm0,.bx1 - movq mm1,.ex1 - movq mm2,.bx2 - movq mm3,.ex2 - movq .bx2,mm0 - movq .ex2,mm1 - movq .bx1,mm2 - movq .ex1,mm3 -end if -if Ext >= SSE - movups xmm0,.bx1 - movups xmm1,.bx2 - movups .bx2,xmm0 - movups .bx1,xmm1 -end if - - mov edx,.z1 - xchg edx,.z2 - mov .z1,edx - - .bl_ok: - - push eax - push ebx ;store x1, x2 - movzx edx,word[size_x_var] - dec edx - cmp .x1,edx ;SIZE_X - jge .bl_end - cmp .x2,0 - jle .bl_end - - mov ebx,.x2 - sub ebx,.x1 - -if Ext >= SSE - - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps - shufps xmm3,xmm3,0 - - cvtpi2ps xmm0,.bx1 ;mm0 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,.ex1 ;mm2 - cvtpi2ps xmm1,.bx2 ;mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,.ex2 ;mm3 - subps xmm1,xmm0 - - divps xmm1,xmm3 - - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey,mm0 - movq .dby,mm1 - -else - - mov eax,.bx2 ; calc .dbx - sub eax,.bx1 - cdq - idiv ebx - push eax - - mov eax,.by2 ; calc .dby - sub eax,.by1 - cdq - idiv ebx - push eax - - mov eax,.ex2 ; calc .dex - sub eax,.ex1 - cdq - idiv ebx - push eax - - mov eax,.ey2 ; calc .dey - sub eax,.ey1 - cdq - idiv ebx - push eax - -end if - - mov eax,.z2 ; calc .dz - sub eax,.z1 - cdq - idiv ebx - push eax - - cmp .x1,0 ; set correctly begin variable - jge @f ; CLIPPING ON FUNCTION - ; cutting triangle exceedes screen - mov ebx,.x1 - neg ebx - imul ebx ; eax = .dz * abs(.x1) - add .z1,eax - mov .x1,0 - - mov eax,.dbx - imul ebx - add .bx1,eax - - mov eax,.dby - imul ebx - add .by1,eax - - mov eax,.dex - imul ebx - add .ex1,eax - - mov eax,.dey - imul ebx - add .ey1,eax - @@: - movzx edx,word[size_x_var] - dec edx - cmp .x2,edx ;SIZE_X - jl @f - mov .x2,edx ;SIZE_X - @@: - movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers - mov ebx,.y - mul ebx - mov ebx,.x1 - add eax,ebx - mov ebx,eax - lea eax,[eax*3] - add edi,eax - mov esi,.z_buff ; z-buffer filled with dd variables - shl ebx,2 - add esi,ebx - - mov ecx,.x2 - sub ecx,.x1 - ; init current variables - push dword .bx1 - push .by1 - push dword .ex1 - push .ey1 - - push .z1 ; current z shl CATMULL_SHIFT - push esi -;if Ext = SSE2 -; movups xmm1,.dey -;end if -if Ext>=MMX - movq mm0,.cby - movq mm1,.cey - movq mm2,.dby - movq mm3,.dey -end if -if Ext >= SSE2 - mov eax,TEXTURE_SIZE - movd xmm1,eax - shufps xmm1,xmm1,0 - push dword TEX_X - push dword -TEX_X - push dword 1 - push dword -1 - movups xmm2,[esp] - movd xmm3,.bmap - shufps xmm3,xmm3,0 -end if - -;align 16 - .draw: - ; if TEX = SHIFTING ;bump drawing only in shifting mode - mov esi,.czbuff ; .czbuff current address in buffer - mov ebx,.cz ; .cz - cur z position - cmp ebx,dword[esi] - jge .skip - -if Ext>=MMX - movq mm6,mm0 - psrld mm6,ROUND - movd eax,mm6 - psrlq mm6,32 - movd esi,mm6 -else - mov eax,.cby - sar eax,ROUND - mov esi,.cbx - sar esi,ROUND -end if - shl eax,TEX_SHIFT ;- - add esi,eax ;- ; esi - current bump map index - -if Ext = SSE2 - movd xmm0,esi - shufps xmm0,xmm0,0 - paddd xmm0,xmm2 - pand xmm0,xmm1 - paddd xmm0,xmm3 - - movd ebx,xmm0 - movzx eax,byte[ebx] -; -; shufps xmm0,xmm0,11100001b - psrldq xmm0,4 - movd ebx,xmm0 - movzx ebx,byte[ebx] - sub eax,ebx -; -; shufps xmm0,xmm0,11111110b - psrldq xmm0,4 - movd ebx,xmm0 - movzx edx, byte [ebx] -; -; shufps xmm0,xmm0,11111111b - psrldq xmm0,4 - movd ebx,xmm0 - movzx ebx, byte [ebx] - sub edx,ebx -; -else - - mov ebx,esi - dec ebx - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx eax,byte [ebx] - - mov ebx,esi - inc ebx - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx ebx,byte [ebx] - - sub eax,ebx - - mov ebx,esi - sub ebx,TEX_X - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx edx,byte [ebx] - - mov ebx,esi - add ebx,TEX_X - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx ebx,byte [ebx] - - sub edx,ebx -end if - ; eax - horizontal sub - ; edx - vertical sub -if Ext = NON - mov ebx,.cex ;.cex - current env map X - sar ebx,ROUND - add eax,ebx ; eax - modified x coord - - mov ebx,.cey ;.cey - current env map y - sar ebx,ROUND - add edx,ebx ; edx - modified y coord -else - movq mm6,mm1 ; mm5 - copy of cur env coords - psrld mm6,ROUND - movd ebx,mm6 - psrlq mm6,32 - add eax,ebx - movd ebx,mm6 - add edx,ebx -end if - or eax,eax - jl .black - cmp eax,TEX_X - jg .black - or edx,edx - jl .black - cmp edx,TEX_Y - jg .black - - shl edx,TEX_SHIFT - add edx,eax - lea esi,[edx*3] - add esi,.emap - lodsd - jmp .put_pixel - .black: - xor eax,eax - .put_pixel: - stosd - dec edi - mov ebx,.cz - mov esi,.czbuff - mov dword[esi],ebx - jmp .no_skip - .skip: - add edi,3 - .no_skip: - add .czbuff,4 - -;if Ext = SSE2 -; movups xmm0,.cey -; paddd xmm0,xmm1 -; movups .cey,xmm0 -; -;end if -if Ext >= MMX - paddd mm0,mm2 - paddd mm1,mm3 -end if - -if Ext=NON - mov eax,.dbx - add .cbx,eax - mov eax,.dby - add .cby,eax - mov eax,.dex - add .cex,eax - mov eax,.dey - add .cey,eax -end if - mov eax,.dz - add .cz,eax - - dec ecx - jnz .draw - ; end if - .bl_end: - mov esp,ebp -ret 56 +;SIZE_X equ 350 +;SIZE_Y equ 350 +ROUND equ 8 +;TEX_X equ 512 +;TEX_Y equ 512 +;TEXTURE_SIZE EQU (512*512)-1 +;TEX_SHIFT EQU 9 +CATMULL_SHIFT equ 8 +;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 +;Ext = NON +;MMX = 1 +;NON = 0 +;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- +;------- DOS 13h mode demos -------------------------------------------- +;------- Procedure draws bump triangle using Catmull Z-buffer algorithm- +;------- (Z coordinate interpolation)----------------------------------- +bump_triangle_z: +;------------------in - eax - x1 shl 16 + y1 ----------- +;---------------------- ebx - x2 shl 16 + y2 ----------- +;---------------------- ecx - x3 shl 16 + y3 ----------- +;---------------------- edx - pointer to bump map ------ +;---------------------- esi - pointer to environment map +;---------------------- edi - pointer to screen buffer-- +;---------------------- stack : bump coordinates-------- +;---------------------- environment coordinates- +;---------------------- Z position coordinates-- +;---------------------- pointer io Z buffer----- +;-- Z-buffer - filled with coordinates as dword -------- +;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- +.b_x1 equ ebp+4 ; procedure don't save registers !!! +.b_y1 equ ebp+6 ; each coordinate as word +.b_x2 equ ebp+8 +.b_y2 equ ebp+10 +.b_x3 equ ebp+12 +.b_y3 equ ebp+14 +.e_x1 equ ebp+16 +.e_y1 equ ebp+18 +.e_x2 equ ebp+20 +.e_y2 equ ebp+22 +.e_x3 equ ebp+24 +.e_y3 equ ebp+26 +.z1 equ word[ebp+28] +.z2 equ word[ebp+30] +.z3 equ word[ebp+32] +.z_buff equ dword[ebp+34] ; pointer to Z-buffer + + +.t_bmap equ dword[ebp-4] ; pointer to bump map +.t_emap equ dword[ebp-8] ; pointer to e. map +.x1 equ word[ebp-10] +.y1 equ word[ebp-12] +.x2 equ word[ebp-14] +.y2 equ word[ebp-16] +.x3 equ word[ebp-18] +.y3 equ word[ebp-20] + +.dx12 equ dword[ebp-24] +.dz12 equ [ebp-28] +.dbx12 equ dword[ebp-32] +.dby12 equ [ebp-36] +.dex12 equ dword[ebp-40] +.dey12 equ [ebp-44] + +.dx13 equ dword[ebp-48] +.dz13 equ [ebp-52] +.dbx13 equ dword[ebp-56] +.dby13 equ [ebp-60] +.dex13 equ dword[ebp-64] +.dey13 equ [ebp-68] + +.dx23 equ dword[ebp-72] +.dz23 equ [ebp-76] +.dbx23 equ dword[ebp-80] +.dby23 equ [ebp-84] +.dex23 equ dword[ebp-88] +.dey23 equ [ebp-92] + +.cx1 equ dword[ebp-96] ; current variables +.cz1 equ [ebp-100] +.cx2 equ dword[ebp-104] +.cz2 equ [ebp-108] +.cbx1 equ dword[ebp-112] +.cby1 equ [ebp-116] +.cex1 equ dword[ebp-120] +.cey1 equ [ebp-124] +.cbx2 equ dword[ebp-128] +.cby2 equ [ebp-132] +.cex2 equ dword[ebp-136] +.cey2 equ [ebp-140] + + mov ebp,esp + push edx ; store bump map + push esi ; store e. map + ; sub esp,120 + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + mov edx,dword[.b_x1] + xchg edx,dword[.b_x2] + mov dword[.b_x1],edx + mov edx,dword[.e_x1] + xchg edx,dword[.e_x2] + mov dword[.e_x1],edx + mov dx,.z1 + xchg dx,.z2 + mov .z1,dx + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + mov edx,dword[.b_x2] + xchg edx,dword[.b_x3] + mov dword[.b_x2],edx + mov edx,dword[.e_x2] + xchg edx,dword[.e_x3] + mov dword[.e_x2],edx + mov dx,.z2 + xchg dx,.z3 + mov .z2,dx + jmp .sort3 + .sort2: + push eax ; store triangle coords in variables + push ebx + push ecx + + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .loop23_done + ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that + ; or edx,ebx ; if any *one* of them is negative a sign flag is raised + ; or edx,ecx + ; test edx,80000000h ; Check only X + ; jne .loop23_done + + ; cmp .x1,SIZE_X ; { + ; jg .loop23_done + ; cmp .x2,SIZE_X ; This can be optimized with effort + ; jg .loop23_done + ; cmp .x3,SIZE_X + ; jg .loop23_done ; { + + + mov bx,.y2 ; calc delta 12 + sub bx,.y1 + jnz .bt_dx12_make + mov ecx,6 + xor edx,edx + @@: + push edx ;dword 0 + loop @b + jmp .bt_dx12_done + .bt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx12,eax + push eax + + mov ax,.z2 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax +if Ext>=SSE + + sub esp,16 + cvtsi2ss xmm3,ebx ;rcps + ; mov eax,255 + cvtsi2ss xmm4,[i255d] + divss xmm3,xmm4 + rcpss xmm3,xmm3 + ; mulss xmm3,xmm4 + shufps xmm3,xmm3,0 + + movd mm0,[.b_x1] + movd mm1,[.b_x2] + movd mm2,[.e_x1] + movd mm3,[.e_x2] + pxor mm4,mm4 + punpcklwd mm0,mm4 + punpcklwd mm1,mm4 + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + + psubd mm1,mm0 + psubd mm3,mm2 + + ; cvtpi2ps xmm0,mm0 + ; movlhps xmm0,xmm0 + ; cvtpi2ps xmm0,mm2 + cvtpi2ps xmm1,mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + ; subps xmm1,xmm0 + + +; cvtpi2ps xmm0,mm3 + ; divps xmm1,xmm3 + mulps xmm1,xmm3 + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dey12,mm0 + movq .dby12,mm1 + + +else + + mov ax,word[.b_x2] + sub ax,word[.b_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dbx12,eax + push eax + + mov ax,word[.b_y2] + sub ax,word[.b_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dby12,eax + push eax + + mov ax,word[.e_x2] + sub ax,word[.e_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dex12,eax + push eax + + mov ax,word[.e_y2] + sub ax,word[.e_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dey12,eax + push eax + +end if + + .bt_dx12_done: + + mov bx,.y3 ; calc delta13 + sub bx,.y1 + jnz .bt_dx13_make + mov ecx,6 + xor edx,edx + @@: + push edx ;dword 0 + loop @b + jmp .bt_dx13_done + .bt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx13,eax + push eax + + mov ax,.z3 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + ; mov .dz13,eax + push eax + +if Ext>=SSE + + sub esp,16 + cvtsi2ss xmm3,ebx ;rcps + ; mov eax,255 + cvtsi2ss xmm4,[i255d] + divss xmm3,xmm4 + rcpss xmm3,xmm3 + ; mulss xmm3,xmm4 + shufps xmm3,xmm3,0 + + movd mm0,[.b_x1] + movd mm1,[.b_x3] + movd mm2,[.e_x1] + movd mm3,[.e_x3] + pxor mm4,mm4 + punpcklwd mm0,mm4 + punpcklwd mm1,mm4 + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + + psubd mm1,mm0 + psubd mm3,mm2 + + ; cvtpi2ps xmm0,mm0 + ; movlhps xmm0,xmm0 + ; cvtpi2ps xmm0,mm2 + cvtpi2ps xmm1,mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + ; subps xmm1,xmm0 + + +; cvtpi2ps xmm0,mm3 + ; divps xmm1,xmm3 + mulps xmm1,xmm3 + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dey13,mm0 + movq .dby13,mm1 + + +else + + mov ax,word[.b_x3] + sub ax,word[.b_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dbx13,eax + push eax + + mov ax,word[.b_y3] + sub ax,word[.b_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dby13,eax + push eax + + mov ax,word[.e_x3] + sub ax,word[.e_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dex13,eax + push eax + + mov ax,word[.e_y3] + sub ax,word[.e_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dey13,eax + push eax +end if + + .bt_dx13_done: + + mov bx,.y3 ; calc delta23 + sub bx,.y2 + jnz .bt_dx23_make + mov ecx,6 + xor edx,edx + @@: + push edx ;dword 0 + loop @b + jmp .bt_dx23_done + .bt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx23,eax + push eax + + mov ax,.z3 + sub ax,.z2 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + ; mov .dz23,eax + push eax + ; sub esp,40 +if Ext>=SSE + + sub esp,16 + cvtsi2ss xmm3,ebx ;rcps + ; mov eax,255 + cvtsi2ss xmm4,[i255d] + divss xmm3,xmm4 + rcpss xmm3,xmm3 + ; mulss xmm3,xmm4 + shufps xmm3,xmm3,0 + + movd mm0,[.b_x2] + movd mm1,[.b_x3] + movd mm2,[.e_x2] + movd mm3,[.e_x3] + pxor mm4,mm4 + punpcklwd mm0,mm4 + punpcklwd mm1,mm4 + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + + psubd mm1,mm0 + psubd mm3,mm2 + +; cvtpi2ps xmm0,mm0 +; movlhps xmm0,xmm0 +; cvtpi2ps xmm0,mm2 + cvtpi2ps xmm1,mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 +; subps xmm1,xmm0 + + +; cvtpi2ps xmm0,mm3 + ; divps xmm1,xmm3 + mulps xmm1,xmm3 + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dey23,mm0 + movq .dby23,mm1 + +else + + mov ax,word[.b_x3] + sub ax,word[.b_x2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dbx23,eax + push eax + + mov ax,word[.b_y3] + sub ax,word[.b_y2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dby23,eax + push eax + + mov ax,word[.e_x3] + sub ax,word[.e_x2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dex23,eax + push eax + + mov ax,word[.e_y3] + sub ax,word[.e_y2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dey23,eax + push eax + +end if + + .bt_dx23_done: + sub esp,48 + + movsx eax,.x1 + shl eax,ROUND + mov .cx1,eax + mov .cx2,eax + ; push eax + ; push eax + + movsx eax,word[.b_x1] + shl eax,ROUND + mov .cbx1,eax + mov .cbx2,eax + ; push eax + ; push eax + + movsx eax,word[.b_y1] + shl eax,ROUND + mov .cby1,eax + mov .cby2,eax + ; push eax + ; push eax + + movsx eax,word[.e_x1] + shl eax,ROUND + mov .cex1,eax + mov .cex2,eax + ; push eax + ; push eax + + movsx eax,word[.e_y1] + shl eax,ROUND + mov .cey1,eax + mov .cey2,eax + ; push eax + ; push eax + + movsx eax,.z1 + shl eax,CATMULL_SHIFT + mov .cz1,eax + mov .cz2,eax + ; push eax + ; push eax + + movsx ecx,.y1 + cmp cx,.y2 + jge .loop12_done + .loop12: + call .call_bump_line +if Ext >= SSE2 + movups xmm0,.cey2 + movups xmm1,.cey1 + movups xmm2,.dey12 + movups xmm3,.dey13 + paddd xmm0,xmm2 + paddd xmm1,xmm3 + movups .cey2,xmm0 + movups .cey1,xmm1 + movq mm4,.cz1 + movq mm5,.cz2 + paddd mm4,.dz13 + paddd mm5,.dz12 + movq .cz1,mm4 + movq .cz2,mm5 +end if + + +if (Ext = MMX) | (Ext = SSE) + movq mm0,.cby2 + movq mm1,.cby1 + movq mm2,.cey2 + movq mm3,.cey1 + movq mm4,.cz1 + movq mm5,.cz2 + paddd mm0,.dby12 + paddd mm1,.dby13 + paddd mm2,.dey12 + paddd mm3,.dey13 + paddd mm4,.dz13 + paddd mm5,.dz12 + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey1,mm3 + movq .cey2,mm2 + movq .cz1,mm4 + movq .cz2,mm5 +else if Ext = NON + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx12 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby12 + add .cby2,edx + + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex12 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey12 + add .cey2,eax + + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx12 + add .cx2,ebx + + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz12 + add .cz2,edx +end if + inc ecx + cmp cx,.y2 + jl .loop12 + .loop12_done: + + movsx ecx,.y2 + cmp cx,.y3 + jge .loop23_done + + movsx eax,.z2 + shl eax,CATMULL_SHIFT + mov .cz2,eax + + movsx eax,.x2 + shl eax,ROUND + mov .cx2,eax + + movzx eax,word[.b_x2] + shl eax,ROUND + mov .cbx2,eax + + movzx eax,word[.b_y2] + shl eax,ROUND + mov .cby2,eax + + movzx eax,word[.e_x2] + shl eax,ROUND + mov .cex2,eax + + movzx eax,word[.e_y2] + shl eax,ROUND + mov .cey2,eax + + .loop23: + call .call_bump_line +if Ext >= SSE2 + movups xmm0,.cey2 + movups xmm1,.cey1 + movups xmm2,.dey23 + movups xmm3,.dey13 + paddd xmm0,xmm2 + paddd xmm1,xmm3 + movups .cey2,xmm0 + movups .cey1,xmm1 + movq mm4,.cz1 + movq mm5,.cz2 + paddd mm4,.dz13 + paddd mm5,.dz23 + movq .cz1,mm4 + movq .cz2,mm5 +end if +if (Ext = MMX) | (Ext = SSE) + movq mm0,.cby2 + movq mm1,.cby1 + movq mm2,.cey2 + movq mm3,.cey1 + movq mm4,.cz1 + movq mm5,.cz2 + paddd mm0,.dby23 + paddd mm1,.dby13 + paddd mm2,.dey23 + paddd mm3,.dey13 + paddd mm4,.dz13 + paddd mm5,.dz23 + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey1,mm3 + movq .cey2,mm2 + movq .cz1,mm4 + movq .cz2,mm5 +else if Ext = NON + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx23 + add .cx2,ebx + + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx23 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby23 + add .cby2,edx + + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex23 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey23 + add .cey2,eax + + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz23 + add .cz2,edx +end if + inc ecx + cmp cx,.y3 + jl .loop23 + .loop23_done: + + mov esp,ebp +ret 34 + +.call_bump_line: + + ; push ebp + ; push ecx + pushad + + push dword .cz1 + push dword .cz2 + push .z_buff + push .t_emap + push .t_bmap + push dword .cey2 + push .cex2 + push dword .cby2 + push .cbx2 + push dword .cey1 + push .cex1 + push dword .cby1 + push .cbx1 + push ecx + + mov eax,.cx1 + sar eax,ROUND + mov ebx,.cx2 + sar ebx,ROUND + + call bump_line_z + + popad +ret +bump_line_z: +;--------------in: eax - x1 +;-------------- ebx - x2 +;-------------- edi - pointer to screen buffer +;stack - another parameters : +.y equ dword [ebp+4] +.bx1 equ [ebp+8] ; --- +.by1 equ dword [ebp+12] ; | +.ex1 equ [ebp+16] ; | +.ey1 equ dword [ebp+20] ; |> bump and env coords +.bx2 equ [ebp+24] ; |> shifted shl ROUND +.by2 equ dword [ebp+28] ; | +.ex2 equ [ebp+32] ; | +.ey2 equ dword [ebp+36] ; --- +.bmap equ dword [ebp+40] +.emap equ dword [ebp+44] +.z_buff equ dword [ebp+48] +.z2 equ dword [ebp+52] ; -- |> z coords shifted +.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT + +.x1 equ dword [ebp-4] +.x2 equ dword [ebp-8] +.dbx equ dword [ebp-12] +.dby equ [ebp-16] +.dex equ dword [ebp-20] +.dey equ [ebp-24] +.dz equ dword [ebp-28] +.cbx equ dword [ebp-32] +.cby equ [ebp-36] +.cex equ dword [ebp-40] +.cey equ [ebp-44] +.cz equ dword [ebp-48] +.czbuff equ dword [ebp-52] +.temp1 equ ebp-60 +.temp2 equ ebp-68 +.temp3 equ ebp-76 +.temp4 equ ebp-84 +.temp5 equ ebp-92 + + mov ebp,esp + + mov ecx,.y + or ecx,ecx + jl .bl_end + ; mov dx,[size_x_var] + ; dec dx + cmp cx,[size_y_var] ;SIZE_Y + jge .bl_end + + cmp eax,ebx + jl .bl_ok + je .bl_end + + xchg eax,ebx +if Ext=NON + mov edx,.bx1 + xchg edx,.bx2 + mov .bx1,edx + mov edx,.by1 + xchg edx,.by2 + mov .by1,edx + + mov edx,.ex1 + xchg edx,.ex2 + mov .ex1,edx + mov edx,.ey1 + xchg edx,.ey2 + mov .ey1,edx +end if +if Ext = MMX + movq mm0,.bx1 + movq mm1,.ex1 + movq mm2,.bx2 + movq mm3,.ex2 + movq .bx2,mm0 + movq .ex2,mm1 + movq .bx1,mm2 + movq .ex1,mm3 +end if +if Ext >= SSE + movups xmm0,.bx1 + movups xmm1,.bx2 + movups .bx2,xmm0 + movups .bx1,xmm1 +end if + + mov edx,.z1 + xchg edx,.z2 + mov .z1,edx + + .bl_ok: + + push eax + push ebx ;store x1, x2 + movzx edx,word[size_x_var] + dec edx + cmp .x1,edx ;SIZE_X + jge .bl_end + cmp .x2,0 + jle .bl_end + + mov ebx,.x2 + sub ebx,.x1 + +if Ext >= SSE + + sub esp,16 + cvtsi2ss xmm3,ebx ;rcps + shufps xmm3,xmm3,0 + + cvtpi2ps xmm0,.bx1 ;mm0 + movlhps xmm0,xmm0 + cvtpi2ps xmm0,.ex1 ;mm2 + cvtpi2ps xmm1,.bx2 ;mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,.ex2 ;mm3 + subps xmm1,xmm0 + + divps xmm1,xmm3 + + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dey,mm0 + movq .dby,mm1 + +else + + mov eax,.bx2 ; calc .dbx + sub eax,.bx1 + cdq + idiv ebx + push eax + + mov eax,.by2 ; calc .dby + sub eax,.by1 + cdq + idiv ebx + push eax + + mov eax,.ex2 ; calc .dex + sub eax,.ex1 + cdq + idiv ebx + push eax + + mov eax,.ey2 ; calc .dey + sub eax,.ey1 + cdq + idiv ebx + push eax + +end if + + mov eax,.z2 ; calc .dz + sub eax,.z1 + cdq + idiv ebx + push eax + + cmp .x1,0 ; set correctly begin variable + jge @f ; CLIPPING ON FUNCTION + ; cutting triangle exceedes screen + mov ebx,.x1 + neg ebx + imul ebx ; eax = .dz * abs(.x1) + add .z1,eax + mov .x1,0 + + mov eax,.dbx + imul ebx + add .bx1,eax + + mov eax,.dby + imul ebx + add .by1,eax + + mov eax,.dex + imul ebx + add .ex1,eax + + mov eax,.dey + imul ebx + add .ey1,eax + @@: + movzx edx,word[size_x_var] + dec edx + cmp .x2,edx ;SIZE_X + jl @f + mov .x2,edx ;SIZE_X + @@: + movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers + mov ebx,.y + mul ebx + mov ebx,.x1 + add eax,ebx + mov ebx,eax + lea eax,[eax*3] + add edi,eax + mov esi,.z_buff ; z-buffer filled with dd variables + shl ebx,2 + add esi,ebx + + mov ecx,.x2 + sub ecx,.x1 + ; init current variables + push dword .bx1 + push .by1 + push dword .ex1 + push .ey1 + + push .z1 ; current z shl CATMULL_SHIFT + push esi +;if Ext = SSE2 +; movups xmm1,.dey +;end if +if Ext>=MMX + movq mm0,.cby + movq mm1,.cey + movq mm2,.dby + movq mm3,.dey +end if +if Ext >= SSE2 + mov eax,TEXTURE_SIZE + movd xmm1,eax + shufps xmm1,xmm1,0 + push dword TEX_X + push dword -TEX_X + push dword 1 + push dword -1 + movups xmm2,[esp] + movd xmm3,.bmap + shufps xmm3,xmm3,0 +end if + +;align 16 + .draw: + ; if TEX = SHIFTING ;bump drawing only in shifting mode + mov esi,.czbuff ; .czbuff current address in buffer + mov ebx,.cz ; .cz - cur z position + cmp ebx,dword[esi] + jge .skip + +if Ext>=MMX + movq mm6,mm0 + psrld mm6,ROUND + movd eax,mm6 + psrlq mm6,32 + movd esi,mm6 +else + mov eax,.cby + sar eax,ROUND + mov esi,.cbx + sar esi,ROUND +end if + shl eax,TEX_SHIFT ;- + add esi,eax ;- ; esi - current bump map index + +if Ext = SSE2 + movd xmm0,esi + shufps xmm0,xmm0,0 + paddd xmm0,xmm2 + pand xmm0,xmm1 + paddd xmm0,xmm3 + + movd ebx,xmm0 + movzx eax,byte[ebx] +; +; shufps xmm0,xmm0,11100001b + psrldq xmm0,4 + movd ebx,xmm0 + movzx ebx,byte[ebx] + sub eax,ebx +; +; shufps xmm0,xmm0,11111110b + psrldq xmm0,4 + movd ebx,xmm0 + movzx edx, byte [ebx] +; +; shufps xmm0,xmm0,11111111b + psrldq xmm0,4 + movd ebx,xmm0 + movzx ebx, byte [ebx] + sub edx,ebx +; +else + + mov ebx,esi + dec ebx + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx eax,byte [ebx] + + mov ebx,esi + inc ebx + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx ebx,byte [ebx] + + sub eax,ebx + + mov ebx,esi + sub ebx,TEX_X + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx edx,byte [ebx] + + mov ebx,esi + add ebx,TEX_X + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx ebx,byte [ebx] + + sub edx,ebx +end if + ; eax - horizontal sub + ; edx - vertical sub +if Ext = NON + mov ebx,.cex ;.cex - current env map X + sar ebx,ROUND + add eax,ebx ; eax - modified x coord + + mov ebx,.cey ;.cey - current env map y + sar ebx,ROUND + add edx,ebx ; edx - modified y coord +else + movq mm6,mm1 ; mm5 - copy of cur env coords + psrld mm6,ROUND + movd ebx,mm6 + psrlq mm6,32 + add eax,ebx + movd ebx,mm6 + add edx,ebx +end if + or eax,eax + jl .black + cmp eax,TEX_X + jg .black + or edx,edx + jl .black + cmp edx,TEX_Y + jg .black + + shl edx,TEX_SHIFT + add edx,eax + lea esi,[edx*3] + add esi,.emap + lodsd + jmp .put_pixel + .black: + xor eax,eax + .put_pixel: + stosd + dec edi + mov ebx,.cz + mov esi,.czbuff + mov dword[esi],ebx + jmp .no_skip + .skip: + add edi,3 + .no_skip: + add .czbuff,4 + +;if Ext = SSE2 +; movups xmm0,.cey +; paddd xmm0,xmm1 +; movups .cey,xmm0 +; +;end if +if Ext >= MMX + paddd mm0,mm2 + paddd mm1,mm3 +end if + +if Ext=NON + mov eax,.dbx + add .cbx,eax + mov eax,.dby + add .cby,eax + mov eax,.dex + add .cex,eax + mov eax,.dey + add .cey,eax +end if + mov eax,.dz + add .cz,eax + + dec ecx + jnz .draw + ; end if + .bl_end: + mov esp,ebp +ret 56 diff --git a/programs/demos/view3ds/bump_tex.inc b/programs/demos/view3ds/bump_tex.inc index 0951f27932..3b325d04e3 100644 --- a/programs/demos/view3ds/bump_tex.inc +++ b/programs/demos/view3ds/bump_tex.inc @@ -1,1817 +1,1817 @@ - -;CATMULL_SHIFT equ 8 -;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 -;ROUND equ 8 -;Ext = NON -;MMX = 1 -;NON = 0 -;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- -;------- DOS 13h mode demos -------------------------------------------- -;------- Procedure draws bump triangle with texture, I use ------------- -;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- -;--------I calc texture pixel by this way: col1*col2/256 --------------- -bump_tex_triangle_z: -;------------------in - eax - x1 shl 16 + y1 ----------- -;---------------------- ebx - x2 shl 16 + y2 ----------- -;---------------------- ecx - x3 shl 16 + y3 ----------- -;---------------------- edx - pointer to bump map------- -;---------------------- esi - pointer to env map-------- -;---------------------- edi - pointer to screen buffer-- -;---------------------- stack : bump coordinates-------- -;---------------------- environment coordinates- -;---------------------- Z position coordinates-- -;---------------------- pointer to Z buffer----- -;---------------------- pointer to texture------ -;---------------------- texture coordinates----- -;-- Z-buffer - filled with coordinates as dword -------- -;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- -.b_x1 equ ebp+4 ; procedure don't save registers !!! -.b_y1 equ ebp+6 ; each coordinate as word -.b_x2 equ ebp+8 -.b_y2 equ ebp+10 ; b - bump map coords -.b_x3 equ ebp+12 ; e - env map coords -.b_y3 equ ebp+14 -.e_x1 equ ebp+16 -.e_y1 equ ebp+18 -.e_x2 equ ebp+20 -.e_y2 equ ebp+22 -.e_x3 equ ebp+24 -.e_y3 equ ebp+26 -.z1 equ word[ebp+28] -.z2 equ word[ebp+30] -.z3 equ word[ebp+32] -.z_buff equ dword[ebp+34] ; pointer to Z-buffer -.tex_ptr equ dword[ebp+38] ; ptr to texture -.t_x1 equ ebp+42 ; texture coords -.t_y1 equ ebp+44 -.t_x2 equ ebp+46 -.t_y2 equ ebp+48 -.t_x3 equ ebp+50 -.t_y3 equ ebp+52 - - - -.t_bmap equ dword[ebp-4] ; pointer to bump map -.t_emap equ dword[ebp-8] ; pointer to env map -.x1 equ word[ebp-10] -.y1 equ word[ebp-12] -.x2 equ word[ebp-14] -.y2 equ word[ebp-16] -.x3 equ word[ebp-18] -.y3 equ word[ebp-20] - -if 0 ;Ext <= SSE2 - -.dx12 equ dword[edi-4] -.dz12 equ [edi-8] -.dbx12 equ dword[edi-12] -.dby12 equ [edi-16] -.dex12 equ dword[edi-20] -.dey12 equ [edi-24] -.dtx12 equ dword[edi-28] -.dty12 equ [edi-32] - -.dx13 equ dword[ebp-52-4*1] -.dz13 equ [ebp-52-4*2] -.dbx13 equ dword[ebp-52-4*3] -.dby13 equ [ebp-52-4*4] -.dex13 equ dword[ebp-52-4*5] -.dey13 equ [ebp-52-4*6] -.dtx13 equ dword[ebp-52-4*7] -.dty13 equ [ebp-52-4*8] - - -.dx23 equ dword[ebp-(52+4*9)] -.dz23 equ [ebp-(52+4*10)] -.dbx23 equ dword[ebp-(52+4*11)] -.dby23 equ [ebp-(52+4*12)] -.dex23 equ dword[ebp-(52+4*13)] -.dey23 equ [ebp-(52+4*14)] -.dtx23 equ dword[ebp-(52+4*15)] -.dty23 equ [ebp-(52+4*16)] - -else - -.dx12 equ dword[ebp-24] -.dz12 equ [ebp-28] -.dbx12 equ dword[ebp-32] -.dby12 equ [ebp-36] -.dex12 equ dword[ebp-40] -.dey12 equ [ebp-44] -.dtx12 equ dword[ebp-48] -.dty12 equ [ebp-52] - -.dx13 equ dword[ebp-52-4*1] -.dz13 equ [ebp-52-4*2] -.dbx13 equ dword[ebp-52-4*3] -.dby13 equ [ebp-52-4*4] -.dex13 equ dword[ebp-52-4*5] -.dey13 equ [ebp-52-4*6] -.dtx13 equ dword[ebp-52-4*7] -.dty13 equ [ebp-52-4*8] - - -.dx23 equ dword[ebp-(52+4*9)] -.dz23 equ [ebp-(52+4*10)] -.dbx23 equ dword[ebp-(52+4*11)] -.dby23 equ [ebp-(52+4*12)] -.dex23 equ dword[ebp-(52+4*13)] -.dey23 equ [ebp-(52+4*14)] -.dtx23 equ dword[ebp-(52+4*15)] -.dty23 equ [ebp-(52+4*16)] - -end if - -if Ext < SSE - -.cx1 equ dword[ebp-(52+4*17)] ; current variables -.cz1 equ [ebp-(52+4*18)] -.cx2 equ dword[ebp-(52+4*19)] -.cz2 equ [ebp-(52+4*20)] -.cbx1 equ dword[ebp-(52+4*21)] -.cby1 equ [ebp-(52+4*22)] -.cbx2 equ dword[ebp-(52+4*23)] -.cby2 equ [ebp-(52+4*24)] -.cex1 equ dword[ebp-(52+4*25)] -.cey1 equ [ebp-(52+4*26)] -.cex2 equ dword[ebp-(52+4*27)] -.cey2 equ [ebp-(52+4*28)] - -.ctx1 equ dword[ebp-(52+4*29)] -.cty1 equ [ebp-(52+4*30)] -.ctx2 equ dword[ebp-(52+4*31)] -.cty2 equ [ebp-(52+4*32)] - -else - -.cx1 equ dword[ebp-(52+4*17)] ; current variables -.cz1 equ [ebp-(52+4*18)] -.cbx1 equ dword[ebp-(52+4*19)] -.cby1 equ [ebp-(52+4*20)] -.cex1 equ dword[ebp-(52+4*21)] -.cey1 equ [ebp-(52+4*22)] -.ctx1 equ dword[ebp-(52+4*23)] -.cty1 equ [ebp-(52+4*24)] - -.cx2 equ dword[ebp-(52+4*25)] -.cz2 equ [ebp-(52+4*26)] -.cbx2 equ dword[ebp-(52+4*27)] -.cby2 equ [ebp-(52+4*28)] -.cex2 equ dword[ebp-(52+4*29)] -.cey2 equ [ebp-(52+4*30)] -.ctx2 equ dword[ebp-(52+4*31)] -.cty2 equ [ebp-(52+4*32)] - -end if - cld - mov ebp,esp - push edx ; store bump map - push esi ; store e. map - ; sub esp,120 - .sort3: ; sort triangle coordinates... - cmp ax,bx - jle .sort1 - xchg eax,ebx - mov edx,dword[.b_x1] - xchg edx,dword[.b_x2] - mov dword[.b_x1],edx - mov edx,dword[.e_x1] - xchg edx,dword[.e_x2] - mov dword[.e_x1],edx - mov edx,dword[.t_x1] - xchg edx,dword[.t_x2] - mov dword[.t_x1],edx - mov dx,.z1 - xchg dx,.z2 - mov .z1,dx - .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - mov edx,dword[.b_x2] - xchg edx,dword[.b_x3] - mov dword[.b_x2],edx - mov edx,dword[.e_x2] - xchg edx,dword[.e_x3] - mov dword[.e_x2],edx - mov edx,dword[.t_x2] - xchg edx,dword[.t_x3] - mov dword[.t_x2],edx - mov dx,.z2 - xchg dx,.z3 - mov .z2,dx - jmp .sort3 - .sort2: - push eax ; store triangle coords in variables - push ebx - push ecx - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .loop23_done - ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that - ; or edx,ebx ; if any *one* of them is negative a sign flag is raised - ; or edx,ecx - ; test edx,80000000h ; Check only X - ; jne .loop23_done - - ; cmp .x1,SIZE_X ; { - ; jg .loop23_done - ; cmp .x2,SIZE_X ; This can be optimized with effort - ; jg .loop23_done - ; cmp .x3,SIZE_X - ; jg .loop23_done ; { - - - mov bx,.y2 ; calc delta 12 - sub bx,.y1 - jnz .bt_dx12_make -if 0 ;Ext >= SSE2 - pxor xmm0,xmm0 - movups .dty12,xmm0 - movups .dey12,xmm0 - sub esp,16 -else - mov ecx,8 - xor edx,edx - @@: - push edx ;dword 0 - loop @b -end if - jmp .bt_dx12_done - .bt_dx12_make: - movsx ebx,bx - - -if Ext>=SSE - sub esp,32 - ; mov eax,256 - cvtsi2ss xmm4,[i255d] - cvtsi2ss xmm3,ebx ;rcps -if 0 ;Ext >= SSE2 - mov edi,ebp - sub edi,512 - or edi,0x0000000f -end if - divss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x1] - movd mm1,[.b_x2] - movd mm2,[.e_x1] - movd mm3,[.e_x2] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - psubd mm1,mm0 - psubd mm3,mm2 - - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | - - shufps xmm1,xmm1,10110001b - ;xmm1--> | dbx | dby | dex | dey | -;1 movups .dey12,xmm1 - cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 ;mm1,xmm1 - movq .dey12,mm0 - movq .dby12,mm1 -;------------- - ; pxor mm0,mm0 - ; pxor mm1,mm1 - ;/ pinsrw mm0,.z1,1 - ;/ pinsrw mm0,.x1,0 - ;/ pinsrw mm1,.z2,1 - ;/ pinsrw mm1,.x2,0 - mov ax,.z2 - sub ax,.z1 - cwde - - mov dx,.x2 - sub dx,.x1 - movsx edx,dx - - ;/ movd mm1,eax - - ;/ punpcklwd mm0,mm4 - ;/ punpcklwd mm1,mm4 - - ; cvtpi2ps xmm1,mm1 - ; cvtpi2ps xmm2,mm0 - ; subps xmm1,xmm2 - - ;/ psubd mm1,mm0 - - movd mm2,[.t_x1] - movd mm3,[.t_x2] - - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - psubd mm3,mm2 - - ;/ cvtpi2ps xmm1,mm1 - cvtsi2ss xmm1,eax - movlhps xmm1,xmm1 - cvtsi2ss xmm1,edx - ; movss xmm1,xmm4 - shufps xmm1,xmm1,00101111b - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | - - shufps xmm1,xmm1,11100001b - ; xmm1--> | dx | dz | dtx | dty | -;1 movlps .dty12,xmm1 -;1 movhps .dz12,xmm1 - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dty12,mm0 - movq .dz12,mm1 -;---- -; mov ax,.z2 -; sub ax,.z1 -; cwde -; mov bx,.x2 -; sub bx,.x1 -; movsx ebx,bx -; movd mm1,eax -; psllq mm1,32 -; movd mm1,ebx - -;; push ebx -;; push eax -;; movq mm1,[esp] -;; add esp,8 -;;; mov ax,.z1 -;;; mov bx,.z2 -;;; shl eax,16 -;;; shl ebx,16 -;;; mov ax,.x1 -;;; mov bx,.x2 -; movd mm2,[.t_x1] -; movd mm3,[.t_x2] -;; movd mm0,eax -;; movd mm1,ebx - -; pxor mm4,mm4 -;; punpcklwd mm0,mm4 -;; punpcklwd mm1,mm4 -; punpcklwd mm2,mm4 -; punpcklwd mm3,mm4 - -;; psubd mm1,mm0 -; psubd mm3,mm2 - - -; cvtpi2ps xmm1,mm1 -; movlhps xmm1,xmm1 -; cvtpi2ps xmm1,mm3 - -; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx | - -; shufps xmm1,xmm1,10110001b - ; xmm1--> | dx | dz | dtx | dty | -; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | -; movhlps xmm1,xmm1 -; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | -; movq .dty12,mm0 -; movq .dz12,mm1 -else - mov ax,.x2 - sub ax,.x1 - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dx12,eax - push eax - - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax - - mov ax,word[.b_x2] - sub ax,word[.b_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx12,eax - push eax - - mov ax,word[.b_y2] - sub ax,word[.b_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby12,eax - push eax - - mov ax,word[.e_x2] - sub ax,word[.e_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex12,eax - push eax - - mov ax,word[.e_y2] - sub ax,word[.e_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey12,eax - push eax - - mov ax,word[.t_x2] - sub ax,word[.t_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dtx12,eax - push eax - - mov ax,word[.t_y2] - sub ax,word[.t_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dty12,eax - push eax -end if - .bt_dx12_done: - - mov bx,.y3 ; calc delta13 - sub bx,.y1 - jnz .bt_dx13_make - mov ecx,8 - xor edx,edx - @@: - push edx ;dword 0 - loop @b - jmp .bt_dx13_done - .bt_dx13_make: - movsx ebx,bx - -if Ext>=SSE - - sub esp,32 - ; mov eax,256 - cvtsi2ss xmm4,[i255d] - cvtsi2ss xmm3,ebx ;rcps - divss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x1] - movd mm1,[.b_x3] - movd mm2,[.e_x1] - movd mm3,[.e_x3] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - psubd mm1,mm0 - psubd mm3,mm2 - - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | - - shufps xmm1,xmm1,10110001b - ;xmm1--> | dbx | dby | dex | dey | -;1 movups .dey13,xmm1 - - cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 ;mm1,xmm1 - movq .dey13,mm0 - movq .dby13,mm1 - - mov ax,.z3 - sub ax,.z1 - cwde - - mov dx,.x3 - sub dx,.x1 - movsx edx,dx - - movd mm2,[.t_x1] - movd mm3,[.t_x3] - - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - psubd mm3,mm2 - - cvtsi2ss xmm1,eax - movlhps xmm1,xmm1 - cvtsi2ss xmm1,edx - shufps xmm1,xmm1,00101111b - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | - - shufps xmm1,xmm1,11100001b - ; xmm1--> | dx | dz | dtx | dty | -;1 movlps .dty13,xmm1 -;1 movhps .dz13,xmm1 - - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dty13,mm0 - movq .dz13,mm1 - -else - - mov ax,.x3 - sub ax,.x1 - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dx13,eax - push eax - - mov ax,.z3 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz13,eax - push eax - - - mov ax,word[.b_x3] - sub ax,word[.b_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx13,eax - push eax - - mov ax,word[.b_y3] - sub ax,word[.b_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby13,eax - push eax - - mov ax,word[.e_x3] - sub ax,word[.e_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex13,eax - push eax - - mov ax,word[.e_y3] - sub ax,word[.e_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey13,eax - push eax - - mov ax,word[.t_x3] - sub ax,word[.t_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dtx13,eax - push eax - - mov ax,word[.t_y3] - sub ax,word[.t_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dty13,eax - push eax -end if - .bt_dx13_done: - - mov bx,.y3 ; calc delta23 - sub bx,.y2 - jnz .bt_dx23_make - mov ecx,8 - xor edx,edx - @@: - push edx ;dword 0 - loop @b - jmp .bt_dx23_done - .bt_dx23_make: - movsx ebx,bx - -if Ext>=SSE - - sub esp,32 - ; mov eax,256 - cvtsi2ss xmm4,[i255d] - cvtsi2ss xmm3,ebx ;rcps - divss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x2] - movd mm1,[.b_x3] - movd mm2,[.e_x2] - movd mm3,[.e_x3] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - psubd mm1,mm0 - psubd mm3,mm2 - - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | - - shufps xmm1,xmm1,10110001b - ;xmm1--> | dbx | dby | dex | dey | -;1 movups .dey23,xmm1 - - cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 ;mm1,xmm1 - movq .dey23,mm0 - movq .dby23,mm1 - - mov ax,.z3 - sub ax,.z2 - cwde - - mov dx,.x3 - sub dx,.x2 - movsx edx,dx - - movd mm2,[.t_x2] - movd mm3,[.t_x3] - - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - psubd mm3,mm2 - - cvtsi2ss xmm1,eax - movlhps xmm1,xmm1 - cvtsi2ss xmm1,edx - shufps xmm1,xmm1,00101111b - cvtpi2ps xmm1,mm3 - - divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | - - shufps xmm1,xmm1,11100001b - ; xmm1--> | dx | dz | dtx | dty | -; movlps .dty23,xmm1 -; movhps .dz23,xmm1 - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | - movq .dty23,mm0 - movq .dz23,mm1 - - -else - mov ax,.x3 - sub ax,.x2 - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dx23,eax - push eax - - mov ax,.z3 - sub ax,.z2 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz23,eax - push eax - - mov ax,word[.b_x3] - sub ax,word[.b_x2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx23,eax - push eax - - mov ax,word[.b_y3] - sub ax,word[.b_y2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby23,eax - push eax - - mov ax,word[.e_x3] - sub ax,word[.e_x2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex23,eax - push eax - - mov ax,word[.e_y3] - sub ax,word[.e_y2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey23,eax - push eax - - - mov ax,word[.t_x3] - sub ax,word[.t_x2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dtx23,eax - push eax - - mov ax,word[.t_y3] - sub ax,word[.t_y2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dty23,eax - push eax -end if - ; sub esp,40 - .bt_dx23_done: - sub esp,64 - - movsx eax,.x1 - shl eax,ROUND - mov .cx1,eax - mov .cx2,eax - ; push eax - ; push eax - - movsx ebx,word[.b_x1] - shl ebx,ROUND - mov .cbx1,ebx - mov .cbx2,ebx - ; push ebx - ; push ebx - - movsx ecx,word[.b_y1] - shl ecx,ROUND - mov .cby1,ecx - mov .cby2,ecx - ; push ecx - ; push ecx - - movsx edx,word[.e_x1] - shl edx,ROUND - mov .cex1,edx - mov .cex2,edx - ; push edx - ; push edx - - movsx eax,word[.e_y1] - shl eax,ROUND - mov .cey1,eax - mov .cey2,eax - ; push eax - ; push eax - - movsx ebx,.z1 - shl ebx,CATMULL_SHIFT - mov .cz1,ebx - mov .cz2,ebx - ; push ebx - ; push ebx - - ; sub esp,16 - movsx ecx,word[.t_x1] - shl ecx,ROUND - mov .ctx1,ecx - mov .ctx2,ecx - ;push ecx - ;push ecx - - movsx edx,word[.t_y1] - shl edx,ROUND - mov .cty1,edx - mov .cty2,edx - ; push edx - ; push edx - -if Ext >= SSE2 - movups xmm0,.cby1 - movups xmm1,.cty1 - movups xmm2,.cby2 - movups xmm3,.cty2 - movups xmm4,.dby13 - movups xmm5,.dty13 - movups xmm6,.dby12 - movups xmm7,.dty12 - .scby1 equ [edi] - .scty1 equ [edi+16] - .scby2 equ [edi+32] - .scty2 equ [edi+48] - .sdby13 equ [edi+64] - .sdty13 equ [edi+80] - .sdby12 equ [edi+96] - .sdty12 equ [edi+128] - push edi - mov edi,sse_repository - movaps .scby1,xmm0 - movaps .scty1,xmm1 - movaps .scby2,xmm2 - movaps .scty2,xmm3 - movaps .sdby13,xmm4 - movaps .sdty13,xmm5 - movaps .sdby12,xmm6 - movaps .sdty12,xmm7 - pop edi - -end if - movsx ecx,.y1 - cmp cx,.y2 - jge .loop12_done - .loop12: -;if Ext >= SSE2 -; fxsave [sse_repository] -;end if - call .call_line -if Ext >= SSE2 -; fxrstor [sse_repository] - movups xmm0,.cby1 - movups xmm1,.cty1 - movups xmm2,.cby2 - movups xmm3,.cty2 - ; movups xmm4,.dby13 - ; movups xmm5,.dty13 - ; movups xmm6,.dby12 - ; movups xmm7,.dty12 - ; paddd xmm0,xmm4 - ; paddd xmm1,xmm5 - ; paddd xmm2,xmm6 - ; paddd xmm3,xmm7 - push edi - mov edi,sse_repository - paddd xmm0,.sdby13 - paddd xmm1,.sdty13 - paddd xmm2,.sdby12 - paddd xmm3,.sdty12 - pop edi - movups .cby1,xmm0 - movups .cty1,xmm1 - movups .cby2,xmm2 - movups .cty2,xmm3 -end if - -if (Ext = MMX) | (Ext = SSE) - movq mm0,.cby2 - movq mm1,.cby1 - movq mm2,.cey2 - movq mm3,.cey1 - movq mm4,.cty1 - movq mm5,.cty2 - movq mm6,.cz1 - movq mm7,.cz2 - paddd mm0,.dby12 - paddd mm1,.dby13 - paddd mm2,.dey12 - paddd mm3,.dey13 - paddd mm4,.dty13 - paddd mm5,.dty12 - paddd mm6,.dz13 - paddd mm7,.dz12 - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey1,mm3 - movq .cey2,mm2 - movq .cty1,mm4 - movq .cty2,mm5 - movq .cz1,mm6 - movq .cz2,mm7 -end if -if Ext = NON - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx12 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby12 - add .cby2,edx - - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex12 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey12 - add .cey2,eax - - mov eax,.dtx13 - add .ctx1,eax - mov ebx,.dtx12 - add .ctx2,ebx - mov edx,.dty13 - add .cty1,edx - mov eax,.dty12 - add .cty2,eax - - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx12 - add .cx2,ebx - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz12 - add .cz2,edx -end if - inc ecx - cmp cx,.y2 - jl .loop12 - .loop12_done: - - movsx ecx,.y2 - cmp cx,.y3 - jge .loop23_done - - - movsx eax,.z2 - shl eax,CATMULL_SHIFT - mov .cz2,eax - - movsx ebx,.x2 - shl ebx,ROUND - mov .cx2,ebx - - movzx edx,word[.b_x2] - shl edx,ROUND - mov .cbx2,edx - - movzx eax,word[.b_y2] - shl eax,ROUND - mov .cby2,eax - - movzx ebx,word[.e_x2] - shl ebx,ROUND - mov .cex2,ebx - - movzx edx,word[.e_y2] - shl edx,ROUND - mov .cey2,edx - - movzx eax,word[.t_x2] - shl eax,ROUND - mov .ctx2,eax - - movzx ebx,word[.t_y2] - shl ebx,ROUND - mov .cty2,ebx -if Ext >= SSE2 - movups xmm2,.cby2 - movups xmm3,.cty2 - ; movups xmm4,.dby13 - ; movups xmm5,.dty13 - movups xmm6,.dby23 - movups xmm7,.dty23 -; .scby1 equ [edi] -; .scty1 equ [edi+16] -; .scby2 equ [edi+32] -; .scty2 equ [edi+48] -; .sdby13 equ [edi+64] -; .sdty13 equ [edi+80] - .sdby23 equ [edi+160] - .sdty23 equ [edi+192] - push edi - mov edi,sse_repository -; movaps .scby1,xmm0 -; movaps .scty1,xmm1 - movaps .scby2,xmm2 - movaps .scty2,xmm3 -; movaps .sdby13,xmm4 -; movaps .sdty13,xmm5 - movaps .sdby23,xmm6 - movaps .sdty23,xmm7 - pop edi - -end if - - .loop23: -;if Ext >= SSE2 -; fxsave [sse_repository] -;end if - call .call_line - -if Ext >= SSE2 - - movups xmm0,.cby1 - movups xmm1,.cty1 - movups xmm2,.cby2 - movups xmm3,.cty2 - - - push edi - mov edi,sse_repository - paddd xmm0,.sdby13 - paddd xmm1,.sdty13 - paddd xmm2,.sdby23 - paddd xmm3,.sdty23 - pop edi - movups .cby1,xmm0 - movups .cty1,xmm1 - movups .cby2,xmm2 - movups .cty2,xmm3 - - - - -; fxrstor [sse_repository] -; movups xmm0,.cby1 -; movups xmm1,.cty1 -; movups xmm2,.cby2 -; movups xmm3,.cty2 -; movups xmm4,.dby13 -; movups xmm5,.dty13 -; movups xmm6,.dby23 -; movups xmm7,.dty23 -; paddd xmm0,xmm4 -; paddd xmm1,xmm5 -; paddd xmm2,xmm6 - ; paddd xmm3,xmm7 - ; movups .cby1,xmm0 - ; movups .cty1,xmm1 - ; movups .cby2,xmm2 - ; movups .cty2,xmm3 -; -end if -if (Ext = MMX) | (Ext = SSE) - movq mm0,.cby2 - movq mm1,.cby1 - movq mm2,.cey2 - movq mm3,.cey1 - movq mm4,.cty1 - movq mm5,.cty2 - movq mm6,.cz1 - movq mm7,.cz2 - paddd mm0,.dby23 - paddd mm1,.dby13 - paddd mm2,.dey23 - paddd mm3,.dey13 - paddd mm4,.dty13 - paddd mm5,.dty23 - paddd mm6,.dz13 - paddd mm7,.dz23 - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey2,mm2 - movq .cey1,mm3 - movq .cty1,mm4 - movq .cty2,mm5 - movq .cz1,mm6 - movq .cz2,mm7 -end if -If Ext = NON - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx23 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby23 - add .cby2,edx - - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex23 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey23 - add .cey2,eax - - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx23 - add .cx2,ebx - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz23 - add .cz2,edx - - mov eax,.dtx13 - add .ctx1,eax - mov ebx,.dtx23 - add .ctx2,ebx - mov edx,.dty13 - add .cty1,edx - mov eax,.dty23 - add .cty2,eax -end if - inc ecx - cmp cx,.y3 - jl .loop23 - .loop23_done: - - mov esp,ebp -ret 50 - -.call_line: - - pushad - ; xmm0= cby1,cbx1,cz1,cx1 - ; xmm1= cty1,ctx1,cey1,cex1 -if Ext >= SSE2 - sub esp,8 - shufps xmm1,xmm1,10110001b - shufps xmm3,xmm3,10110001b - movlps [esp],xmm1 -else - push dword .cty1 - push .ctx1 -end if - push dword .cz1 -if Ext>=SSE2 - sub esp,8 - movlps [esp],xmm3 -else - push dword .cty2 - push .ctx2 -end if - push dword .cz2 -if Ext>=SSE2 - sub esp,32 - movhps [esp+24],xmm3 - shufps xmm2,xmm2,10110001b - movlps [esp+16],xmm2 - movhps [esp+8],xmm1 - shufps xmm0,xmm0,10110001b - movlps [esp],xmm0 ;================================ - -else - push dword .cey2 - push .cex2 - push dword .cby2 - push .cbx2 - push dword .cey1 - push .cex1 - push dword .cby1 - push .cbx1 -end if - - push .tex_ptr - push .z_buff - push .t_emap - push .t_bmap - - push ecx - - mov eax,.cx1 - sar eax,ROUND - mov ebx,.cx2 - sar ebx,ROUND - - call bump_tex_line_z - - popad -;end if -ret -bump_tex_line_z: -;--------------in: eax - x1 -;-------------- ebx - x2 -;-------------- edi - pointer to screen buffer -;stack - another parameters : -.y equ dword [ebp+4] -.bmap equ dword [ebp+8] ; bump map pointer -.emap equ dword [ebp+12] ; env map pointer -.z_buff equ dword [ebp+16] ; z buffer -.tex_map equ dword [ebp+20] ; texture pointer - -.bx1 equ [ebp+24] ; --- -.by1 equ [ebp+28] ; | -.ex1 equ [ebp+32] ; | -.ey1 equ [ebp+36] ; | -.bx2 equ [ebp+40] ; | -.by2 equ [ebp+44] ; |> b. map and e. map coords -.ex2 equ [ebp+48] ; |> shifted shl ROUND -.ey2 equ [ebp+52] ; --- -.z2 equ [ebp+56] -.tx2 equ [ebp+60] -.ty2 equ [ebp+64] -.z1 equ [ebp+68] -.tx1 equ [ebp+72] -.ty1 equ [ebp+76] - - - -.x1 equ [ebp-4] -.x2 equ [ebp-8] -.dbx equ [ebp-12] -.dby equ [ebp-16] -.dex equ [ebp-20] -.dey equ [ebp-24] -.dz equ [ebp-28] -.dtx equ [ebp-32] -.dty equ [ebp-36] - -.cbx equ [ebp-40] -.cby equ [ebp-44] -.cex equ [ebp-48] -.cey equ [ebp-52] -.cz equ [ebp-56] -.czbuff equ [ebp-60] -.ctx equ [ebp-64] -.cty equ [ebp-68] -.c_scr equ [ebp-72] - -.temp1 equ ebp-80 -.temp2 equ ebp-88 -.temp3 equ ebp-76 -.temp4 equ ebp-84 -.temp5 equ ebp-92 - - mov ebp,esp - - mov ecx,.y - or ecx,ecx - jl .bl_end - movzx edx,word[size_y_var] - cmp ecx,edx ;SIZE_Y - jge .bl_end - - cmp eax,ebx - jl .bl_ok - je .bl_end - - -if Ext=NON - mov edx,.bx1 - xchg edx,.bx2 - mov .bx1,edx - mov edx,.by1 - xchg edx,.by2 - mov .by1,edx - - mov edx,.ex1 - xchg edx,.ex2 - mov .ex1,edx - mov edx,.ey1 - xchg edx,.ey2 - mov .ey1,edx - - mov edx,.tx1 - xchg edx,.tx2 - mov .tx1,edx - mov edx,.ty1 - xchg edx,.ty2 - mov .ty1,edx -end if -if Ext = MMX - movq mm0,.bx1 - movq mm1,.bx2 - movq mm2,.ex1 - movq mm3,.ex2 - movq mm4,.tx1 - movq mm5,.tx2 - movq .bx2,mm0 - movq .bx1,mm1 - movq .ex1,mm3 - movq .ex2,mm2 - movq .tx1,mm5 - movq .tx2,mm4 -end if -if Ext>=SSE - movups xmm0,.bx1 - movups xmm1,.bx2 - movups .bx1,xmm1 - movups .bx2,xmm0 - movq mm0,.tx1 - movq mm1,.tx2 - movq .tx1,mm1 - movq .tx2,mm0 -end if -;if Ext>=SSE2 -; movaps xmm4,xmm0 -; movaps xmm0,xmm2 -; movaps xmm2,xmm4 -; movaps xmm5,xmm1 -; movaps xmm1,xmm3 -; movaps xmm3,xmm5 -;else - - xchg eax,ebx - mov edx,.z1 - xchg edx,.z2 - mov .z1,edx -;end if - .bl_ok: -;if Ext >= SSE2 -; shufps xmm0,xmm0,11100001b -; shufps xmm2,xmm2,11100001b -; movlps .bx1,xmm0 -; movlps .bx2,xmm2 - - -; shufps xmm0,xmm0,00011011b -; shufps xmm2,xmm2,00011011b -; movd eax,xmm0 -; movd ebx,xmm2 -; shufps xmm0,xmm0,11000110b -; shufps xmm2,xmm2,11000110b -; movd .z1,xmm0 -; movd .z2,xmm2 -; shufps xmm1,xmm1,10110001b -; shufps xmm3,xmm3,10110001b -; movlps .ex1,xmm1 -; movlps .ex2,xmm2 -; movhps .tx1,xmm1 -; movhps .tx2,xmm2 - -; xchg eax,ebx -; mov edx,.z1 -; xchg edx,.z2 -; mov .z1,edx - - -;end if - - push eax - push ebx ;store x1, x2 - movzx ebx,word[size_x_var] - ; mov eax,.x1 - cmp dword .x1,ebx ;dword .x1,SIZE_X - jge .bl_end - cmp dword .x2,0 - jle .bl_end - - mov ebx,.x2 - sub ebx,.x1 - -if Ext>=SSE - - sub esp,28 - cvtsi2ss xmm3,ebx ;rcps - shufps xmm3,xmm3,0 -; float using SSE variant ::--> -; movups xmm0,.bx1 ; new -; movups xmm1,.bx2 ; new - - cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point - movlhps xmm0,xmm0 - cvtpi2ps xmm0,.ex1 ;mm2 - cvtpi2ps xmm1,.bx2 ;mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,.ex2 ;mm3 - subps xmm1,xmm0 - - divps xmm1,xmm3 - - shufps xmm1,xmm1,10110001b -; movups .dey,xmm1 ; new - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey,mm0 - movq .dby,mm1 - - movd mm2,.z1 - movd mm3,.z2 - - cvtpi2ps xmm0,.tx1 ;mm0 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,.tx2 ;mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 -; movups xmm0,,z1 ; new -; movups xmm1,.z2 ; new - subps xmm1,xmm0 - - divps xmm1,xmm3 - -; movups .dz,xmm1 ;new - - shufps xmm1,xmm1,10110100b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movd .dz,mm0 - movq .dty,mm1 - -else - - mov eax,.bx2 ; calc .dbx - sub eax,.bx1 - cdq - idiv ebx - push eax - - mov eax,.by2 ; calc .dby - sub eax,.by1 - cdq - idiv ebx - push eax - - mov eax,.ex2 ; calc .dex - sub eax,.ex1 - cdq - idiv ebx - push eax - - mov eax,.ey2 ; calc .dey - sub eax,.ey1 - cdq - idiv ebx - push eax - - - mov eax,.z2 ; calc .dz - sub eax,.z1 - cdq - idiv ebx - push eax - - mov eax,.tx2 ; calc .dtx - sub eax,.tx1 - cdq - idiv ebx - push eax - - mov eax,.ty2 ; calc .dty - sub eax,.ty1 - cdq - idiv ebx - push eax - -end if - cmp dword .x1,0 ; set correctly begin variable - jge @f ; CLIPPING ON FUNCTION - ; cutting triangle exceedes screen - mov ebx,.x1 - neg ebx - -;if Ext >= SSE - -; cvtsi2ss xmm0,ebx -; shufps xmm0,xmm0,0 -; movups xmm1,.dey -; mulps xmm1,xmm0 -; shufps xmm1,xmm1,00011011b -; movups xmm2,.bx1 -; addps xmm2,xmm1 -; movups .bx1,xmm2 - - mov eax,.dz - imul ebx ; eax = .dz * abs(.x1) - add .z1,eax - mov dword .x1,0 - - mov eax,.dbx - imul ebx - add .bx1,eax - - mov eax,.dby - imul ebx - add .by1,eax - - mov eax,.dex - imul ebx - add .ex1,eax - - mov eax,.dey - imul ebx - add .ey1,eax - - mov eax,.dtx - imul ebx - add .tx1,eax - - mov eax,.dty - imul ebx - add .ty1,eax - - @@: - ; mov ebx,.x2 - movzx eax,word[size_x_var] - ; cmp dword .x2,SIZE_X - cmp dword .x2,eax ; eax,ebx - jl @f - mov dword .x2,eax ;SIZE_X - @@: - movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers - mul .y - add eax,.x1 - lea esi,[4*eax] - add esi,.z_buff ; z-buffer filled with dd variables - lea eax,[eax*3] - add edi,eax - - - mov ecx,.x2 - sub ecx,.x1 - ; init current variables - push dword .bx1 ; current b, e and t shifted shl ROUND .cbx - push dword .by1 ; .cby - push dword .ex1 ; .cex - push dword .ey1 ; .cey - - push dword .z1 ; current z shl CATMULL_SHIFT ; .cz - push esi ; .czbuff - - push dword .tx1 ; .ctx - push dword .ty1 ; .cty - push edi ; .c_scr -if Ext = SSE2 - mov eax,TEXTURE_SIZE - movd xmm1,eax - shufps xmm1,xmm1,0 - push dword TEX_X - push dword -TEX_X - push dword 1 - push dword -1 - movups xmm2,[esp] - movd xmm3,.bmap - shufps xmm3,xmm3,0 -end if - -if Ext>=MMX - movq mm7,.cty - movq mm6,.cby - movq mm5,.cey -; movq mm4,.dtyq -; movq mm3,.dbyq -end if - - .draw: - ; if TEX = SHIFTING ;bump drawing only in shifting mode - mov esi,.czbuff ; .czbuff current address in buffer - mov ebx,.cz ; .cz - cur z position - cmp ebx,dword[esi] - jge .skip -if Ext=NON - mov eax,.cby - shr eax,ROUND - mov esi,.cbx - shr esi,ROUND -else - movq mm1,mm6 - psrld mm1,ROUND - movd eax,mm1 - psrlq mm1,32 - movd esi,mm1 -end if - - shl eax,TEX_SHIFT - add esi,eax ;- ; esi - current bump map index - -if Ext = SSE2 - - movd xmm0,esi - shufps xmm0,xmm0,0 - paddd xmm0,xmm2 - pand xmm0,xmm1 - paddd xmm0,xmm3 - - movd ebx,xmm0 - movzx eax,byte[ebx] -; -; shufps xmm0,xmm0,11100001b - psrldq xmm0,4 - movd ebx,xmm0 - movzx ebx,byte[ebx] - sub eax,ebx -; -; shufps xmm0,xmm0,11111110b - psrldq xmm0,4 - movd ebx,xmm0 - movzx edx, byte [ebx] -; -; shufps xmm0,xmm0,11111111b - psrldq xmm0,4 - movd ebx,xmm0 - movzx ebx, byte [ebx] - sub edx,ebx -; -else -; mov ebx,esi -; dec ebx - lea ebx,[esi-1] - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx eax,byte [ebx] - -; mov ebx,esi -; inc ebx - lea ebx,[esi+1] - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx ebx,byte [ebx] - sub eax,ebx - -; mov ebx,esi -; sub ebx,TEX_X - lea ebx,[esi-TEX_X] - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx edx,byte [ebx] - -; mov ebx,esi -; add ebx,TEX_X - lea ebx,[esi+TEX_X] - and ebx,TEXTURE_SIZE - add ebx,.bmap - movzx ebx,byte [ebx] - sub edx,ebx -end if - - ; eax - horizontal sub modificated x coord - ; edx - vertical sub modificated y coord -if Ext=NON - mov ebx,.cex ;.cex - current env map X - shr ebx,ROUND - add eax,ebx - - - mov ebx,.cey ;.cey - current env map y - shr ebx,ROUND - add edx,ebx - -else - movq mm1,mm5 ; mm5 - copy of cur env coords - psrld mm1,ROUND - movd ebx,mm1 - psrlq mm1,32 - add eax,ebx - movd ebx,mm1 - add edx,ebx -; movq qword[.temp1],mm3 -; add eax,dword [.temp1] -; add edx,dword [.temp1+4] -end if - - or eax,eax - jl .black - cmp eax,TEX_X - jg .black - or edx,edx - jl .black - cmp edx,TEX_Y - jg .black - - shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze - add edx,eax ; proponuje nie stawiac czarnego pixela tylko - lea esi,[edx*3] ; niezaburzony. - add esi,.emap ; - lodsd - -if Ext=NON - mov edx,.cty - shr edx,ROUND ; sar - - mov edi,.ctx - shr edi,ROUND ; sar -else - movq mm1,mm7 - psrld mm1,ROUND - movd edx,mm1 - psrlq mm1,32 - movd edi,mm1 - -end if - - shl edx,TEX_SHIFT - add edi,edx - and edi,TEXTURE_SIZE - lea esi,[edi*3] - add esi,.tex_map - -if Ext=NON - mov edx,eax - lodsd - push ax - mul dl - mov dl,ah - pop ax - shr ax,8 - mul dh - mov al,dl - mov edi,.c_scr - stosw - shr edx,16 - shr eax,16 - mul dl - shr ax,8 - stosb -else - movd mm0,eax - pxor mm1,mm1 - punpcklbw mm0,mm1 - movd mm2,[esi] - punpcklbw mm2,mm1 - pmullw mm0,mm2 - psrlw mm0,8 - packuswb mm0,mm1 - mov edi,.c_scr - movd [edi],mm0 - -end if - - jmp .actual_zbuff ; actualize z buffer - @@: - .black: - xor eax,eax - mov edi,.c_scr - stosd - .actual_zbuff: - mov eax,.cz - mov edi,.czbuff - stosd - - .skip: - add dword .czbuff,4 - add dword .c_scr,3 - -if Ext=NON - mov eax,.dbx - add .cbx,eax - mov ebx,.dby - add .cby,ebx - - mov edx,.dex - add .cex,edx - mov eax,.dey - add .cey,eax - - mov ebx,.dtx - add .ctx,ebx - mov edx,.dty - add .cty,edx - -else - paddd mm7,.dty - paddd mm6,.dby - paddd mm5,.dey -end if - mov eax,.dz - add .cz,eax - - dec ecx - jnz .draw - - .bl_end: - mov esp,ebp -ret 76 -;Ext = MMX - -; else -; movq mm5, qword[.temp1] ;- -; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X -; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE -; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap -; movd ebx,mm5 -; psrlq mm5,32 -; end if + +;CATMULL_SHIFT equ 8 +;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 +;ROUND equ 8 +;Ext = NON +;MMX = 1 +;NON = 0 +;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- +;------- DOS 13h mode demos -------------------------------------------- +;------- Procedure draws bump triangle with texture, I use ------------- +;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- +;--------I calc texture pixel by this way: col1*col2/256 --------------- +bump_tex_triangle_z: +;------------------in - eax - x1 shl 16 + y1 ----------- +;---------------------- ebx - x2 shl 16 + y2 ----------- +;---------------------- ecx - x3 shl 16 + y3 ----------- +;---------------------- edx - pointer to bump map------- +;---------------------- esi - pointer to env map-------- +;---------------------- edi - pointer to screen buffer-- +;---------------------- stack : bump coordinates-------- +;---------------------- environment coordinates- +;---------------------- Z position coordinates-- +;---------------------- pointer to Z buffer----- +;---------------------- pointer to texture------ +;---------------------- texture coordinates----- +;-- Z-buffer - filled with coordinates as dword -------- +;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- +.b_x1 equ ebp+4 ; procedure don't save registers !!! +.b_y1 equ ebp+6 ; each coordinate as word +.b_x2 equ ebp+8 +.b_y2 equ ebp+10 ; b - bump map coords +.b_x3 equ ebp+12 ; e - env map coords +.b_y3 equ ebp+14 +.e_x1 equ ebp+16 +.e_y1 equ ebp+18 +.e_x2 equ ebp+20 +.e_y2 equ ebp+22 +.e_x3 equ ebp+24 +.e_y3 equ ebp+26 +.z1 equ word[ebp+28] +.z2 equ word[ebp+30] +.z3 equ word[ebp+32] +.z_buff equ dword[ebp+34] ; pointer to Z-buffer +.tex_ptr equ dword[ebp+38] ; ptr to texture +.t_x1 equ ebp+42 ; texture coords +.t_y1 equ ebp+44 +.t_x2 equ ebp+46 +.t_y2 equ ebp+48 +.t_x3 equ ebp+50 +.t_y3 equ ebp+52 + + + +.t_bmap equ dword[ebp-4] ; pointer to bump map +.t_emap equ dword[ebp-8] ; pointer to env map +.x1 equ word[ebp-10] +.y1 equ word[ebp-12] +.x2 equ word[ebp-14] +.y2 equ word[ebp-16] +.x3 equ word[ebp-18] +.y3 equ word[ebp-20] + +if 0 ;Ext <= SSE2 + +.dx12 equ dword[edi-4] +.dz12 equ [edi-8] +.dbx12 equ dword[edi-12] +.dby12 equ [edi-16] +.dex12 equ dword[edi-20] +.dey12 equ [edi-24] +.dtx12 equ dword[edi-28] +.dty12 equ [edi-32] + +.dx13 equ dword[ebp-52-4*1] +.dz13 equ [ebp-52-4*2] +.dbx13 equ dword[ebp-52-4*3] +.dby13 equ [ebp-52-4*4] +.dex13 equ dword[ebp-52-4*5] +.dey13 equ [ebp-52-4*6] +.dtx13 equ dword[ebp-52-4*7] +.dty13 equ [ebp-52-4*8] + + +.dx23 equ dword[ebp-(52+4*9)] +.dz23 equ [ebp-(52+4*10)] +.dbx23 equ dword[ebp-(52+4*11)] +.dby23 equ [ebp-(52+4*12)] +.dex23 equ dword[ebp-(52+4*13)] +.dey23 equ [ebp-(52+4*14)] +.dtx23 equ dword[ebp-(52+4*15)] +.dty23 equ [ebp-(52+4*16)] + +else + +.dx12 equ dword[ebp-24] +.dz12 equ [ebp-28] +.dbx12 equ dword[ebp-32] +.dby12 equ [ebp-36] +.dex12 equ dword[ebp-40] +.dey12 equ [ebp-44] +.dtx12 equ dword[ebp-48] +.dty12 equ [ebp-52] + +.dx13 equ dword[ebp-52-4*1] +.dz13 equ [ebp-52-4*2] +.dbx13 equ dword[ebp-52-4*3] +.dby13 equ [ebp-52-4*4] +.dex13 equ dword[ebp-52-4*5] +.dey13 equ [ebp-52-4*6] +.dtx13 equ dword[ebp-52-4*7] +.dty13 equ [ebp-52-4*8] + + +.dx23 equ dword[ebp-(52+4*9)] +.dz23 equ [ebp-(52+4*10)] +.dbx23 equ dword[ebp-(52+4*11)] +.dby23 equ [ebp-(52+4*12)] +.dex23 equ dword[ebp-(52+4*13)] +.dey23 equ [ebp-(52+4*14)] +.dtx23 equ dword[ebp-(52+4*15)] +.dty23 equ [ebp-(52+4*16)] + +end if + +if Ext < SSE + +.cx1 equ dword[ebp-(52+4*17)] ; current variables +.cz1 equ [ebp-(52+4*18)] +.cx2 equ dword[ebp-(52+4*19)] +.cz2 equ [ebp-(52+4*20)] +.cbx1 equ dword[ebp-(52+4*21)] +.cby1 equ [ebp-(52+4*22)] +.cbx2 equ dword[ebp-(52+4*23)] +.cby2 equ [ebp-(52+4*24)] +.cex1 equ dword[ebp-(52+4*25)] +.cey1 equ [ebp-(52+4*26)] +.cex2 equ dword[ebp-(52+4*27)] +.cey2 equ [ebp-(52+4*28)] + +.ctx1 equ dword[ebp-(52+4*29)] +.cty1 equ [ebp-(52+4*30)] +.ctx2 equ dword[ebp-(52+4*31)] +.cty2 equ [ebp-(52+4*32)] + +else + +.cx1 equ dword[ebp-(52+4*17)] ; current variables +.cz1 equ [ebp-(52+4*18)] +.cbx1 equ dword[ebp-(52+4*19)] +.cby1 equ [ebp-(52+4*20)] +.cex1 equ dword[ebp-(52+4*21)] +.cey1 equ [ebp-(52+4*22)] +.ctx1 equ dword[ebp-(52+4*23)] +.cty1 equ [ebp-(52+4*24)] + +.cx2 equ dword[ebp-(52+4*25)] +.cz2 equ [ebp-(52+4*26)] +.cbx2 equ dword[ebp-(52+4*27)] +.cby2 equ [ebp-(52+4*28)] +.cex2 equ dword[ebp-(52+4*29)] +.cey2 equ [ebp-(52+4*30)] +.ctx2 equ dword[ebp-(52+4*31)] +.cty2 equ [ebp-(52+4*32)] + +end if + cld + mov ebp,esp + push edx ; store bump map + push esi ; store e. map + ; sub esp,120 + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + mov edx,dword[.b_x1] + xchg edx,dword[.b_x2] + mov dword[.b_x1],edx + mov edx,dword[.e_x1] + xchg edx,dword[.e_x2] + mov dword[.e_x1],edx + mov edx,dword[.t_x1] + xchg edx,dword[.t_x2] + mov dword[.t_x1],edx + mov dx,.z1 + xchg dx,.z2 + mov .z1,dx + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + mov edx,dword[.b_x2] + xchg edx,dword[.b_x3] + mov dword[.b_x2],edx + mov edx,dword[.e_x2] + xchg edx,dword[.e_x3] + mov dword[.e_x2],edx + mov edx,dword[.t_x2] + xchg edx,dword[.t_x3] + mov dword[.t_x2],edx + mov dx,.z2 + xchg dx,.z3 + mov .z2,dx + jmp .sort3 + .sort2: + push eax ; store triangle coords in variables + push ebx + push ecx + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .loop23_done + ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that + ; or edx,ebx ; if any *one* of them is negative a sign flag is raised + ; or edx,ecx + ; test edx,80000000h ; Check only X + ; jne .loop23_done + + ; cmp .x1,SIZE_X ; { + ; jg .loop23_done + ; cmp .x2,SIZE_X ; This can be optimized with effort + ; jg .loop23_done + ; cmp .x3,SIZE_X + ; jg .loop23_done ; { + + + mov bx,.y2 ; calc delta 12 + sub bx,.y1 + jnz .bt_dx12_make +if 0 ;Ext >= SSE2 + pxor xmm0,xmm0 + movups .dty12,xmm0 + movups .dey12,xmm0 + sub esp,16 +else + mov ecx,8 + xor edx,edx + @@: + push edx ;dword 0 + loop @b +end if + jmp .bt_dx12_done + .bt_dx12_make: + movsx ebx,bx + + +if Ext>=SSE + sub esp,32 + ; mov eax,256 + cvtsi2ss xmm4,[i255d] + cvtsi2ss xmm3,ebx ;rcps +if 0 ;Ext >= SSE2 + mov edi,ebp + sub edi,512 + or edi,0x0000000f +end if + divss xmm3,xmm4 + shufps xmm3,xmm3,0 + + movd mm0,[.b_x1] + movd mm1,[.b_x2] + movd mm2,[.e_x1] + movd mm3,[.e_x2] + + pxor mm4,mm4 + punpcklwd mm0,mm4 + punpcklwd mm1,mm4 + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + + psubd mm1,mm0 + psubd mm3,mm2 + + cvtpi2ps xmm1,mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + + divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | + + shufps xmm1,xmm1,10110001b + ;xmm1--> | dbx | dby | dex | dey | +;1 movups .dey12,xmm1 + cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 ;mm1,xmm1 + movq .dey12,mm0 + movq .dby12,mm1 +;------------- + ; pxor mm0,mm0 + ; pxor mm1,mm1 + ;/ pinsrw mm0,.z1,1 + ;/ pinsrw mm0,.x1,0 + ;/ pinsrw mm1,.z2,1 + ;/ pinsrw mm1,.x2,0 + mov ax,.z2 + sub ax,.z1 + cwde + + mov dx,.x2 + sub dx,.x1 + movsx edx,dx + + ;/ movd mm1,eax + + ;/ punpcklwd mm0,mm4 + ;/ punpcklwd mm1,mm4 + + ; cvtpi2ps xmm1,mm1 + ; cvtpi2ps xmm2,mm0 + ; subps xmm1,xmm2 + + ;/ psubd mm1,mm0 + + movd mm2,[.t_x1] + movd mm3,[.t_x2] + + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + psubd mm3,mm2 + + ;/ cvtpi2ps xmm1,mm1 + cvtsi2ss xmm1,eax + movlhps xmm1,xmm1 + cvtsi2ss xmm1,edx + ; movss xmm1,xmm4 + shufps xmm1,xmm1,00101111b + cvtpi2ps xmm1,mm3 + + divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | + + shufps xmm1,xmm1,11100001b + ; xmm1--> | dx | dz | dtx | dty | +;1 movlps .dty12,xmm1 +;1 movhps .dz12,xmm1 + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dty12,mm0 + movq .dz12,mm1 +;---- +; mov ax,.z2 +; sub ax,.z1 +; cwde +; mov bx,.x2 +; sub bx,.x1 +; movsx ebx,bx +; movd mm1,eax +; psllq mm1,32 +; movd mm1,ebx + +;; push ebx +;; push eax +;; movq mm1,[esp] +;; add esp,8 +;;; mov ax,.z1 +;;; mov bx,.z2 +;;; shl eax,16 +;;; shl ebx,16 +;;; mov ax,.x1 +;;; mov bx,.x2 +; movd mm2,[.t_x1] +; movd mm3,[.t_x2] +;; movd mm0,eax +;; movd mm1,ebx + +; pxor mm4,mm4 +;; punpcklwd mm0,mm4 +;; punpcklwd mm1,mm4 +; punpcklwd mm2,mm4 +; punpcklwd mm3,mm4 + +;; psubd mm1,mm0 +; psubd mm3,mm2 + + +; cvtpi2ps xmm1,mm1 +; movlhps xmm1,xmm1 +; cvtpi2ps xmm1,mm3 + +; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx | + +; shufps xmm1,xmm1,10110001b + ; xmm1--> | dx | dz | dtx | dty | +; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | +; movhlps xmm1,xmm1 +; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | +; movq .dty12,mm0 +; movq .dz12,mm1 +else + mov ax,.x2 + sub ax,.x1 + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dx12,eax + push eax + + mov ax,.z2 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax + + mov ax,word[.b_x2] + sub ax,word[.b_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dbx12,eax + push eax + + mov ax,word[.b_y2] + sub ax,word[.b_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dby12,eax + push eax + + mov ax,word[.e_x2] + sub ax,word[.e_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dex12,eax + push eax + + mov ax,word[.e_y2] + sub ax,word[.e_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dey12,eax + push eax + + mov ax,word[.t_x2] + sub ax,word[.t_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dtx12,eax + push eax + + mov ax,word[.t_y2] + sub ax,word[.t_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dty12,eax + push eax +end if + .bt_dx12_done: + + mov bx,.y3 ; calc delta13 + sub bx,.y1 + jnz .bt_dx13_make + mov ecx,8 + xor edx,edx + @@: + push edx ;dword 0 + loop @b + jmp .bt_dx13_done + .bt_dx13_make: + movsx ebx,bx + +if Ext>=SSE + + sub esp,32 + ; mov eax,256 + cvtsi2ss xmm4,[i255d] + cvtsi2ss xmm3,ebx ;rcps + divss xmm3,xmm4 + shufps xmm3,xmm3,0 + + movd mm0,[.b_x1] + movd mm1,[.b_x3] + movd mm2,[.e_x1] + movd mm3,[.e_x3] + + pxor mm4,mm4 + punpcklwd mm0,mm4 + punpcklwd mm1,mm4 + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + + psubd mm1,mm0 + psubd mm3,mm2 + + cvtpi2ps xmm1,mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + + divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | + + shufps xmm1,xmm1,10110001b + ;xmm1--> | dbx | dby | dex | dey | +;1 movups .dey13,xmm1 + + cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 ;mm1,xmm1 + movq .dey13,mm0 + movq .dby13,mm1 + + mov ax,.z3 + sub ax,.z1 + cwde + + mov dx,.x3 + sub dx,.x1 + movsx edx,dx + + movd mm2,[.t_x1] + movd mm3,[.t_x3] + + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + psubd mm3,mm2 + + cvtsi2ss xmm1,eax + movlhps xmm1,xmm1 + cvtsi2ss xmm1,edx + shufps xmm1,xmm1,00101111b + cvtpi2ps xmm1,mm3 + + divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | + + shufps xmm1,xmm1,11100001b + ; xmm1--> | dx | dz | dtx | dty | +;1 movlps .dty13,xmm1 +;1 movhps .dz13,xmm1 + + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dty13,mm0 + movq .dz13,mm1 + +else + + mov ax,.x3 + sub ax,.x1 + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dx13,eax + push eax + + mov ax,.z3 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + ; mov .dz13,eax + push eax + + + mov ax,word[.b_x3] + sub ax,word[.b_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dbx13,eax + push eax + + mov ax,word[.b_y3] + sub ax,word[.b_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dby13,eax + push eax + + mov ax,word[.e_x3] + sub ax,word[.e_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dex13,eax + push eax + + mov ax,word[.e_y3] + sub ax,word[.e_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dey13,eax + push eax + + mov ax,word[.t_x3] + sub ax,word[.t_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dtx13,eax + push eax + + mov ax,word[.t_y3] + sub ax,word[.t_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dty13,eax + push eax +end if + .bt_dx13_done: + + mov bx,.y3 ; calc delta23 + sub bx,.y2 + jnz .bt_dx23_make + mov ecx,8 + xor edx,edx + @@: + push edx ;dword 0 + loop @b + jmp .bt_dx23_done + .bt_dx23_make: + movsx ebx,bx + +if Ext>=SSE + + sub esp,32 + ; mov eax,256 + cvtsi2ss xmm4,[i255d] + cvtsi2ss xmm3,ebx ;rcps + divss xmm3,xmm4 + shufps xmm3,xmm3,0 + + movd mm0,[.b_x2] + movd mm1,[.b_x3] + movd mm2,[.e_x2] + movd mm3,[.e_x3] + + pxor mm4,mm4 + punpcklwd mm0,mm4 + punpcklwd mm1,mm4 + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + + psubd mm1,mm0 + psubd mm3,mm2 + + cvtpi2ps xmm1,mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + + divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | + + shufps xmm1,xmm1,10110001b + ;xmm1--> | dbx | dby | dex | dey | +;1 movups .dey23,xmm1 + + cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 ;mm1,xmm1 + movq .dey23,mm0 + movq .dby23,mm1 + + mov ax,.z3 + sub ax,.z2 + cwde + + mov dx,.x3 + sub dx,.x2 + movsx edx,dx + + movd mm2,[.t_x2] + movd mm3,[.t_x3] + + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + psubd mm3,mm2 + + cvtsi2ss xmm1,eax + movlhps xmm1,xmm1 + cvtsi2ss xmm1,edx + shufps xmm1,xmm1,00101111b + cvtpi2ps xmm1,mm3 + + divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | + + shufps xmm1,xmm1,11100001b + ; xmm1--> | dx | dz | dtx | dty | +; movlps .dty23,xmm1 +; movhps .dz23,xmm1 + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | + movq .dty23,mm0 + movq .dz23,mm1 + + +else + mov ax,.x3 + sub ax,.x2 + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dx23,eax + push eax + + mov ax,.z3 + sub ax,.z2 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + ; mov .dz23,eax + push eax + + mov ax,word[.b_x3] + sub ax,word[.b_x2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dbx23,eax + push eax + + mov ax,word[.b_y3] + sub ax,word[.b_y2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dby23,eax + push eax + + mov ax,word[.e_x3] + sub ax,word[.e_x2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dex23,eax + push eax + + mov ax,word[.e_y3] + sub ax,word[.e_y2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dey23,eax + push eax + + + mov ax,word[.t_x3] + sub ax,word[.t_x2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dtx23,eax + push eax + + mov ax,word[.t_y3] + sub ax,word[.t_y2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dty23,eax + push eax +end if + ; sub esp,40 + .bt_dx23_done: + sub esp,64 + + movsx eax,.x1 + shl eax,ROUND + mov .cx1,eax + mov .cx2,eax + ; push eax + ; push eax + + movsx ebx,word[.b_x1] + shl ebx,ROUND + mov .cbx1,ebx + mov .cbx2,ebx + ; push ebx + ; push ebx + + movsx ecx,word[.b_y1] + shl ecx,ROUND + mov .cby1,ecx + mov .cby2,ecx + ; push ecx + ; push ecx + + movsx edx,word[.e_x1] + shl edx,ROUND + mov .cex1,edx + mov .cex2,edx + ; push edx + ; push edx + + movsx eax,word[.e_y1] + shl eax,ROUND + mov .cey1,eax + mov .cey2,eax + ; push eax + ; push eax + + movsx ebx,.z1 + shl ebx,CATMULL_SHIFT + mov .cz1,ebx + mov .cz2,ebx + ; push ebx + ; push ebx + + ; sub esp,16 + movsx ecx,word[.t_x1] + shl ecx,ROUND + mov .ctx1,ecx + mov .ctx2,ecx + ;push ecx + ;push ecx + + movsx edx,word[.t_y1] + shl edx,ROUND + mov .cty1,edx + mov .cty2,edx + ; push edx + ; push edx + +if Ext >= SSE2 + movups xmm0,.cby1 + movups xmm1,.cty1 + movups xmm2,.cby2 + movups xmm3,.cty2 + movups xmm4,.dby13 + movups xmm5,.dty13 + movups xmm6,.dby12 + movups xmm7,.dty12 + .scby1 equ [edi] + .scty1 equ [edi+16] + .scby2 equ [edi+32] + .scty2 equ [edi+48] + .sdby13 equ [edi+64] + .sdty13 equ [edi+80] + .sdby12 equ [edi+96] + .sdty12 equ [edi+128] + push edi + mov edi,sse_repository + movaps .scby1,xmm0 + movaps .scty1,xmm1 + movaps .scby2,xmm2 + movaps .scty2,xmm3 + movaps .sdby13,xmm4 + movaps .sdty13,xmm5 + movaps .sdby12,xmm6 + movaps .sdty12,xmm7 + pop edi + +end if + movsx ecx,.y1 + cmp cx,.y2 + jge .loop12_done + .loop12: +;if Ext >= SSE2 +; fxsave [sse_repository] +;end if + call .call_line +if Ext >= SSE2 +; fxrstor [sse_repository] + movups xmm0,.cby1 + movups xmm1,.cty1 + movups xmm2,.cby2 + movups xmm3,.cty2 + ; movups xmm4,.dby13 + ; movups xmm5,.dty13 + ; movups xmm6,.dby12 + ; movups xmm7,.dty12 + ; paddd xmm0,xmm4 + ; paddd xmm1,xmm5 + ; paddd xmm2,xmm6 + ; paddd xmm3,xmm7 + push edi + mov edi,sse_repository + paddd xmm0,.sdby13 + paddd xmm1,.sdty13 + paddd xmm2,.sdby12 + paddd xmm3,.sdty12 + pop edi + movups .cby1,xmm0 + movups .cty1,xmm1 + movups .cby2,xmm2 + movups .cty2,xmm3 +end if + +if (Ext = MMX) | (Ext = SSE) + movq mm0,.cby2 + movq mm1,.cby1 + movq mm2,.cey2 + movq mm3,.cey1 + movq mm4,.cty1 + movq mm5,.cty2 + movq mm6,.cz1 + movq mm7,.cz2 + paddd mm0,.dby12 + paddd mm1,.dby13 + paddd mm2,.dey12 + paddd mm3,.dey13 + paddd mm4,.dty13 + paddd mm5,.dty12 + paddd mm6,.dz13 + paddd mm7,.dz12 + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey1,mm3 + movq .cey2,mm2 + movq .cty1,mm4 + movq .cty2,mm5 + movq .cz1,mm6 + movq .cz2,mm7 +end if +if Ext = NON + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx12 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby12 + add .cby2,edx + + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex12 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey12 + add .cey2,eax + + mov eax,.dtx13 + add .ctx1,eax + mov ebx,.dtx12 + add .ctx2,ebx + mov edx,.dty13 + add .cty1,edx + mov eax,.dty12 + add .cty2,eax + + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx12 + add .cx2,ebx + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz12 + add .cz2,edx +end if + inc ecx + cmp cx,.y2 + jl .loop12 + .loop12_done: + + movsx ecx,.y2 + cmp cx,.y3 + jge .loop23_done + + + movsx eax,.z2 + shl eax,CATMULL_SHIFT + mov .cz2,eax + + movsx ebx,.x2 + shl ebx,ROUND + mov .cx2,ebx + + movzx edx,word[.b_x2] + shl edx,ROUND + mov .cbx2,edx + + movzx eax,word[.b_y2] + shl eax,ROUND + mov .cby2,eax + + movzx ebx,word[.e_x2] + shl ebx,ROUND + mov .cex2,ebx + + movzx edx,word[.e_y2] + shl edx,ROUND + mov .cey2,edx + + movzx eax,word[.t_x2] + shl eax,ROUND + mov .ctx2,eax + + movzx ebx,word[.t_y2] + shl ebx,ROUND + mov .cty2,ebx +if Ext >= SSE2 + movups xmm2,.cby2 + movups xmm3,.cty2 + ; movups xmm4,.dby13 + ; movups xmm5,.dty13 + movups xmm6,.dby23 + movups xmm7,.dty23 +; .scby1 equ [edi] +; .scty1 equ [edi+16] +; .scby2 equ [edi+32] +; .scty2 equ [edi+48] +; .sdby13 equ [edi+64] +; .sdty13 equ [edi+80] + .sdby23 equ [edi+160] + .sdty23 equ [edi+192] + push edi + mov edi,sse_repository +; movaps .scby1,xmm0 +; movaps .scty1,xmm1 + movaps .scby2,xmm2 + movaps .scty2,xmm3 +; movaps .sdby13,xmm4 +; movaps .sdty13,xmm5 + movaps .sdby23,xmm6 + movaps .sdty23,xmm7 + pop edi + +end if + + .loop23: +;if Ext >= SSE2 +; fxsave [sse_repository] +;end if + call .call_line + +if Ext >= SSE2 + + movups xmm0,.cby1 + movups xmm1,.cty1 + movups xmm2,.cby2 + movups xmm3,.cty2 + + + push edi + mov edi,sse_repository + paddd xmm0,.sdby13 + paddd xmm1,.sdty13 + paddd xmm2,.sdby23 + paddd xmm3,.sdty23 + pop edi + movups .cby1,xmm0 + movups .cty1,xmm1 + movups .cby2,xmm2 + movups .cty2,xmm3 + + + + +; fxrstor [sse_repository] +; movups xmm0,.cby1 +; movups xmm1,.cty1 +; movups xmm2,.cby2 +; movups xmm3,.cty2 +; movups xmm4,.dby13 +; movups xmm5,.dty13 +; movups xmm6,.dby23 +; movups xmm7,.dty23 +; paddd xmm0,xmm4 +; paddd xmm1,xmm5 +; paddd xmm2,xmm6 + ; paddd xmm3,xmm7 + ; movups .cby1,xmm0 + ; movups .cty1,xmm1 + ; movups .cby2,xmm2 + ; movups .cty2,xmm3 +; +end if +if (Ext = MMX) | (Ext = SSE) + movq mm0,.cby2 + movq mm1,.cby1 + movq mm2,.cey2 + movq mm3,.cey1 + movq mm4,.cty1 + movq mm5,.cty2 + movq mm6,.cz1 + movq mm7,.cz2 + paddd mm0,.dby23 + paddd mm1,.dby13 + paddd mm2,.dey23 + paddd mm3,.dey13 + paddd mm4,.dty13 + paddd mm5,.dty23 + paddd mm6,.dz13 + paddd mm7,.dz23 + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey2,mm2 + movq .cey1,mm3 + movq .cty1,mm4 + movq .cty2,mm5 + movq .cz1,mm6 + movq .cz2,mm7 +end if +If Ext = NON + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx23 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby23 + add .cby2,edx + + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex23 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey23 + add .cey2,eax + + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx23 + add .cx2,ebx + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz23 + add .cz2,edx + + mov eax,.dtx13 + add .ctx1,eax + mov ebx,.dtx23 + add .ctx2,ebx + mov edx,.dty13 + add .cty1,edx + mov eax,.dty23 + add .cty2,eax +end if + inc ecx + cmp cx,.y3 + jl .loop23 + .loop23_done: + + mov esp,ebp +ret 50 + +.call_line: + + pushad + ; xmm0= cby1,cbx1,cz1,cx1 + ; xmm1= cty1,ctx1,cey1,cex1 +if Ext >= SSE2 + sub esp,8 + shufps xmm1,xmm1,10110001b + shufps xmm3,xmm3,10110001b + movlps [esp],xmm1 +else + push dword .cty1 + push .ctx1 +end if + push dword .cz1 +if Ext>=SSE2 + sub esp,8 + movlps [esp],xmm3 +else + push dword .cty2 + push .ctx2 +end if + push dword .cz2 +if Ext>=SSE2 + sub esp,32 + movhps [esp+24],xmm3 + shufps xmm2,xmm2,10110001b + movlps [esp+16],xmm2 + movhps [esp+8],xmm1 + shufps xmm0,xmm0,10110001b + movlps [esp],xmm0 ;================================ + +else + push dword .cey2 + push .cex2 + push dword .cby2 + push .cbx2 + push dword .cey1 + push .cex1 + push dword .cby1 + push .cbx1 +end if + + push .tex_ptr + push .z_buff + push .t_emap + push .t_bmap + + push ecx + + mov eax,.cx1 + sar eax,ROUND + mov ebx,.cx2 + sar ebx,ROUND + + call bump_tex_line_z + + popad +;end if +ret +bump_tex_line_z: +;--------------in: eax - x1 +;-------------- ebx - x2 +;-------------- edi - pointer to screen buffer +;stack - another parameters : +.y equ dword [ebp+4] +.bmap equ dword [ebp+8] ; bump map pointer +.emap equ dword [ebp+12] ; env map pointer +.z_buff equ dword [ebp+16] ; z buffer +.tex_map equ dword [ebp+20] ; texture pointer + +.bx1 equ [ebp+24] ; --- +.by1 equ [ebp+28] ; | +.ex1 equ [ebp+32] ; | +.ey1 equ [ebp+36] ; | +.bx2 equ [ebp+40] ; | +.by2 equ [ebp+44] ; |> b. map and e. map coords +.ex2 equ [ebp+48] ; |> shifted shl ROUND +.ey2 equ [ebp+52] ; --- +.z2 equ [ebp+56] +.tx2 equ [ebp+60] +.ty2 equ [ebp+64] +.z1 equ [ebp+68] +.tx1 equ [ebp+72] +.ty1 equ [ebp+76] + + + +.x1 equ [ebp-4] +.x2 equ [ebp-8] +.dbx equ [ebp-12] +.dby equ [ebp-16] +.dex equ [ebp-20] +.dey equ [ebp-24] +.dz equ [ebp-28] +.dtx equ [ebp-32] +.dty equ [ebp-36] + +.cbx equ [ebp-40] +.cby equ [ebp-44] +.cex equ [ebp-48] +.cey equ [ebp-52] +.cz equ [ebp-56] +.czbuff equ [ebp-60] +.ctx equ [ebp-64] +.cty equ [ebp-68] +.c_scr equ [ebp-72] + +.temp1 equ ebp-80 +.temp2 equ ebp-88 +.temp3 equ ebp-76 +.temp4 equ ebp-84 +.temp5 equ ebp-92 + + mov ebp,esp + + mov ecx,.y + or ecx,ecx + jl .bl_end + movzx edx,word[size_y_var] + cmp ecx,edx ;SIZE_Y + jge .bl_end + + cmp eax,ebx + jl .bl_ok + je .bl_end + + +if Ext=NON + mov edx,.bx1 + xchg edx,.bx2 + mov .bx1,edx + mov edx,.by1 + xchg edx,.by2 + mov .by1,edx + + mov edx,.ex1 + xchg edx,.ex2 + mov .ex1,edx + mov edx,.ey1 + xchg edx,.ey2 + mov .ey1,edx + + mov edx,.tx1 + xchg edx,.tx2 + mov .tx1,edx + mov edx,.ty1 + xchg edx,.ty2 + mov .ty1,edx +end if +if Ext = MMX + movq mm0,.bx1 + movq mm1,.bx2 + movq mm2,.ex1 + movq mm3,.ex2 + movq mm4,.tx1 + movq mm5,.tx2 + movq .bx2,mm0 + movq .bx1,mm1 + movq .ex1,mm3 + movq .ex2,mm2 + movq .tx1,mm5 + movq .tx2,mm4 +end if +if Ext>=SSE + movups xmm0,.bx1 + movups xmm1,.bx2 + movups .bx1,xmm1 + movups .bx2,xmm0 + movq mm0,.tx1 + movq mm1,.tx2 + movq .tx1,mm1 + movq .tx2,mm0 +end if +;if Ext>=SSE2 +; movaps xmm4,xmm0 +; movaps xmm0,xmm2 +; movaps xmm2,xmm4 +; movaps xmm5,xmm1 +; movaps xmm1,xmm3 +; movaps xmm3,xmm5 +;else + + xchg eax,ebx + mov edx,.z1 + xchg edx,.z2 + mov .z1,edx +;end if + .bl_ok: +;if Ext >= SSE2 +; shufps xmm0,xmm0,11100001b +; shufps xmm2,xmm2,11100001b +; movlps .bx1,xmm0 +; movlps .bx2,xmm2 + + +; shufps xmm0,xmm0,00011011b +; shufps xmm2,xmm2,00011011b +; movd eax,xmm0 +; movd ebx,xmm2 +; shufps xmm0,xmm0,11000110b +; shufps xmm2,xmm2,11000110b +; movd .z1,xmm0 +; movd .z2,xmm2 +; shufps xmm1,xmm1,10110001b +; shufps xmm3,xmm3,10110001b +; movlps .ex1,xmm1 +; movlps .ex2,xmm2 +; movhps .tx1,xmm1 +; movhps .tx2,xmm2 + +; xchg eax,ebx +; mov edx,.z1 +; xchg edx,.z2 +; mov .z1,edx + + +;end if + + push eax + push ebx ;store x1, x2 + movzx ebx,word[size_x_var] + ; mov eax,.x1 + cmp dword .x1,ebx ;dword .x1,SIZE_X + jge .bl_end + cmp dword .x2,0 + jle .bl_end + + mov ebx,.x2 + sub ebx,.x1 + +if Ext>=SSE + + sub esp,28 + cvtsi2ss xmm3,ebx ;rcps + shufps xmm3,xmm3,0 +; float using SSE variant ::--> +; movups xmm0,.bx1 ; new +; movups xmm1,.bx2 ; new + + cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point + movlhps xmm0,xmm0 + cvtpi2ps xmm0,.ex1 ;mm2 + cvtpi2ps xmm1,.bx2 ;mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,.ex2 ;mm3 + subps xmm1,xmm0 + + divps xmm1,xmm3 + + shufps xmm1,xmm1,10110001b +; movups .dey,xmm1 ; new + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dey,mm0 + movq .dby,mm1 + + movd mm2,.z1 + movd mm3,.z2 + + cvtpi2ps xmm0,.tx1 ;mm0 + movlhps xmm0,xmm0 + cvtpi2ps xmm0,mm2 + cvtpi2ps xmm1,.tx2 ;mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 +; movups xmm0,,z1 ; new +; movups xmm1,.z2 ; new + subps xmm1,xmm0 + + divps xmm1,xmm3 + +; movups .dz,xmm1 ;new + + shufps xmm1,xmm1,10110100b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movd .dz,mm0 + movq .dty,mm1 + +else + + mov eax,.bx2 ; calc .dbx + sub eax,.bx1 + cdq + idiv ebx + push eax + + mov eax,.by2 ; calc .dby + sub eax,.by1 + cdq + idiv ebx + push eax + + mov eax,.ex2 ; calc .dex + sub eax,.ex1 + cdq + idiv ebx + push eax + + mov eax,.ey2 ; calc .dey + sub eax,.ey1 + cdq + idiv ebx + push eax + + + mov eax,.z2 ; calc .dz + sub eax,.z1 + cdq + idiv ebx + push eax + + mov eax,.tx2 ; calc .dtx + sub eax,.tx1 + cdq + idiv ebx + push eax + + mov eax,.ty2 ; calc .dty + sub eax,.ty1 + cdq + idiv ebx + push eax + +end if + cmp dword .x1,0 ; set correctly begin variable + jge @f ; CLIPPING ON FUNCTION + ; cutting triangle exceedes screen + mov ebx,.x1 + neg ebx + +;if Ext >= SSE + +; cvtsi2ss xmm0,ebx +; shufps xmm0,xmm0,0 +; movups xmm1,.dey +; mulps xmm1,xmm0 +; shufps xmm1,xmm1,00011011b +; movups xmm2,.bx1 +; addps xmm2,xmm1 +; movups .bx1,xmm2 + + mov eax,.dz + imul ebx ; eax = .dz * abs(.x1) + add .z1,eax + mov dword .x1,0 + + mov eax,.dbx + imul ebx + add .bx1,eax + + mov eax,.dby + imul ebx + add .by1,eax + + mov eax,.dex + imul ebx + add .ex1,eax + + mov eax,.dey + imul ebx + add .ey1,eax + + mov eax,.dtx + imul ebx + add .tx1,eax + + mov eax,.dty + imul ebx + add .ty1,eax + + @@: + ; mov ebx,.x2 + movzx eax,word[size_x_var] + ; cmp dword .x2,SIZE_X + cmp dword .x2,eax ; eax,ebx + jl @f + mov dword .x2,eax ;SIZE_X + @@: + movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers + mul .y + add eax,.x1 + lea esi,[4*eax] + add esi,.z_buff ; z-buffer filled with dd variables + lea eax,[eax*3] + add edi,eax + + + mov ecx,.x2 + sub ecx,.x1 + ; init current variables + push dword .bx1 ; current b, e and t shifted shl ROUND .cbx + push dword .by1 ; .cby + push dword .ex1 ; .cex + push dword .ey1 ; .cey + + push dword .z1 ; current z shl CATMULL_SHIFT ; .cz + push esi ; .czbuff + + push dword .tx1 ; .ctx + push dword .ty1 ; .cty + push edi ; .c_scr +if Ext = SSE2 + mov eax,TEXTURE_SIZE + movd xmm1,eax + shufps xmm1,xmm1,0 + push dword TEX_X + push dword -TEX_X + push dword 1 + push dword -1 + movups xmm2,[esp] + movd xmm3,.bmap + shufps xmm3,xmm3,0 +end if + +if Ext>=MMX + movq mm7,.cty + movq mm6,.cby + movq mm5,.cey +; movq mm4,.dtyq +; movq mm3,.dbyq +end if + + .draw: + ; if TEX = SHIFTING ;bump drawing only in shifting mode + mov esi,.czbuff ; .czbuff current address in buffer + mov ebx,.cz ; .cz - cur z position + cmp ebx,dword[esi] + jge .skip +if Ext=NON + mov eax,.cby + shr eax,ROUND + mov esi,.cbx + shr esi,ROUND +else + movq mm1,mm6 + psrld mm1,ROUND + movd eax,mm1 + psrlq mm1,32 + movd esi,mm1 +end if + + shl eax,TEX_SHIFT + add esi,eax ;- ; esi - current bump map index + +if Ext = SSE2 + + movd xmm0,esi + shufps xmm0,xmm0,0 + paddd xmm0,xmm2 + pand xmm0,xmm1 + paddd xmm0,xmm3 + + movd ebx,xmm0 + movzx eax,byte[ebx] +; +; shufps xmm0,xmm0,11100001b + psrldq xmm0,4 + movd ebx,xmm0 + movzx ebx,byte[ebx] + sub eax,ebx +; +; shufps xmm0,xmm0,11111110b + psrldq xmm0,4 + movd ebx,xmm0 + movzx edx, byte [ebx] +; +; shufps xmm0,xmm0,11111111b + psrldq xmm0,4 + movd ebx,xmm0 + movzx ebx, byte [ebx] + sub edx,ebx +; +else +; mov ebx,esi +; dec ebx + lea ebx,[esi-1] + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx eax,byte [ebx] + +; mov ebx,esi +; inc ebx + lea ebx,[esi+1] + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx ebx,byte [ebx] + sub eax,ebx + +; mov ebx,esi +; sub ebx,TEX_X + lea ebx,[esi-TEX_X] + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx edx,byte [ebx] + +; mov ebx,esi +; add ebx,TEX_X + lea ebx,[esi+TEX_X] + and ebx,TEXTURE_SIZE + add ebx,.bmap + movzx ebx,byte [ebx] + sub edx,ebx +end if + + ; eax - horizontal sub modificated x coord + ; edx - vertical sub modificated y coord +if Ext=NON + mov ebx,.cex ;.cex - current env map X + shr ebx,ROUND + add eax,ebx + + + mov ebx,.cey ;.cey - current env map y + shr ebx,ROUND + add edx,ebx + +else + movq mm1,mm5 ; mm5 - copy of cur env coords + psrld mm1,ROUND + movd ebx,mm1 + psrlq mm1,32 + add eax,ebx + movd ebx,mm1 + add edx,ebx +; movq qword[.temp1],mm3 +; add eax,dword [.temp1] +; add edx,dword [.temp1+4] +end if + + or eax,eax + jl .black + cmp eax,TEX_X + jg .black + or edx,edx + jl .black + cmp edx,TEX_Y + jg .black + + shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze + add edx,eax ; proponuje nie stawiac czarnego pixela tylko + lea esi,[edx*3] ; niezaburzony. + add esi,.emap ; + lodsd + +if Ext=NON + mov edx,.cty + shr edx,ROUND ; sar + + mov edi,.ctx + shr edi,ROUND ; sar +else + movq mm1,mm7 + psrld mm1,ROUND + movd edx,mm1 + psrlq mm1,32 + movd edi,mm1 + +end if + + shl edx,TEX_SHIFT + add edi,edx + and edi,TEXTURE_SIZE + lea esi,[edi*3] + add esi,.tex_map + +if Ext=NON + mov edx,eax + lodsd + push ax + mul dl + mov dl,ah + pop ax + shr ax,8 + mul dh + mov al,dl + mov edi,.c_scr + stosw + shr edx,16 + shr eax,16 + mul dl + shr ax,8 + stosb +else + movd mm0,eax + pxor mm1,mm1 + punpcklbw mm0,mm1 + movd mm2,[esi] + punpcklbw mm2,mm1 + pmullw mm0,mm2 + psrlw mm0,8 + packuswb mm0,mm1 + mov edi,.c_scr + movd [edi],mm0 + +end if + + jmp .actual_zbuff ; actualize z buffer + @@: + .black: + xor eax,eax + mov edi,.c_scr + stosd + .actual_zbuff: + mov eax,.cz + mov edi,.czbuff + stosd + + .skip: + add dword .czbuff,4 + add dword .c_scr,3 + +if Ext=NON + mov eax,.dbx + add .cbx,eax + mov ebx,.dby + add .cby,ebx + + mov edx,.dex + add .cex,edx + mov eax,.dey + add .cey,eax + + mov ebx,.dtx + add .ctx,ebx + mov edx,.dty + add .cty,edx + +else + paddd mm7,.dty + paddd mm6,.dby + paddd mm5,.dey +end if + mov eax,.dz + add .cz,eax + + dec ecx + jnz .draw + + .bl_end: + mov esp,ebp +ret 76 +;Ext = MMX + +; else +; movq mm5, qword[.temp1] ;- +; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X +; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE +; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap +; movd ebx,mm5 +; psrlq mm5,32 +; end if diff --git a/programs/demos/view3ds/data.inc b/programs/demos/view3ds/data.inc index 117ebe3449..d8a36ce4ee 100644 --- a/programs/demos/view3ds/data.inc +++ b/programs/demos/view3ds/data.inc @@ -1,11 +1,14 @@ ; DATA AREA ************************************ - + if Ext > SSE2 + isSSE3 db 1 + end if i3 dw 3 i6 dd 6 i12 dd 12 i36 dd 36 i256 dw 256 i255d dd 255 + f1: dot_max dd 1.0 ; dot product max and min dot_min dd 0.0 env_const dd 1.05 @@ -25,7 +28,7 @@ y_offset dw SIZE_Y / 2 z_offset dw 0 rsscale dd 175.0 ; next real scale - vect_x dw SIZE_X / 2 + vect_x: dw SIZE_X / 2 vect_y dw SIZE_Y / 2 vect_z dw 0 size_y_var: @@ -110,9 +113,9 @@ dd ? db 7 - db 'catmull ' + db 'ray shadow' db 2 -catmull_flag db 1 +ray_shd_flag db 0 dd onoff_f db 8 @@ -165,7 +168,7 @@ emboss_flag db 0 db 16 db 'fire ' - db 3 + db 2 fire_flag db 0 dd blur_f @@ -350,7 +353,7 @@ base_vector: if Ext=SSE3 db ' (SSE3)' end if - db ' 0.073',0 + db ' 0.074',0 labellen: STRdata db '-1 ' lab_vert: @@ -425,8 +428,43 @@ lightsend: - +;if Ext >= SSE3 align 16 + point_light_coords: + dd 50.0 + dd 50.0 + dd -215.0 + dd 0.0 + + + dd 815.0 + dd 815.0 + dd -215.0 + dd 0.0 + + dd 1500.0 + dd 1500.0 + dd -215.0 + dd 0.0 +if 0 + aabb1: + .0 dd 1.0,1.0,1.0,0 + .1 dd -1.0,1.0,1.0,0 + .2 dd 1.0,-1.0,1.0,0 + .3 dd -1.0,-1.0,1.0,0 + .4 dd 1.0,1.0,-1.0,0 + .5 dd -1.0,1.0,-1.0,0 + .6 dd 1.0,-1.0,-1.0,0 + .7 dd -1.0,-1.0,-1.0,0 + +end if + + sign_mask: + times 4 dd 0x80000000 + f05xz: dd 0, 0, - 1.0 ,0 + + sign_z: + dd -1,-1,0x7fffffff,0 abs_mask: dd 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff emboss_bias: @@ -442,7 +480,11 @@ align 16 times 4 dd 510.0 the_one: times 4 dd 1.0 - aprox dd 0.0001 + + eps: times 4 dd 0.00000 + epsone dd 1.0001 + aprox dd 0.0001 + epsminus dd -0.0001 file_info: @@ -463,22 +505,13 @@ SourceFile: workarea rb 180 EndFile dd ? align 8 - sinbeta dd ?;+32 + sinbeta dd ?; cosbeta dd ? xsub dw ? - zsub dw ?;+40 + zsub dw ? ysub dw ? - xx1 dw ? - yy1 dw ? - zz1 dw ?;+48 xx1 + 4 - xx2 dw ? - yy2 dw ? - zz2 dw ? ; xx1 + 10 - xx3 dw ?;+56 - yy3 dw ? - zz3 dw ? ; xx1 + 16 col1 dd ? col2 dd ? col3 dd ? @@ -487,13 +520,9 @@ align 8 points_count_var dd ? ; triangles_count_var dd ? ; dont change order edges_count dd ? ; + tex_points_ptr dd ? - point_index1 dd ? ;-\ - point_index2 dd ? ; } don't change order - point_index3 dd ? ;-/ temp_col dw ? - temp1 dd ? ; > dont change - temp2 dd ? ; > order high dd ? rand_seed dw ? align 8 @@ -510,18 +539,14 @@ align 8 matrix rb 36 cos_tab rd 360 sin_tab rd 360 - align 16 + lights_aligned: + lights_aligned_end = $ + 16 * 12 + rb 16 * 12 - - points_count = 180000/6*3 - triangles_count = 180000 / 6 ;($-triangles)/6 -align 16 - label trizdd dword - label trizdq qword - triangles_with_z rw triangles_count*4 + 2 ; triangles triple dw + z position align 16 vectors rb 24 + align 16 bumpmap rb TEXTURE_SIZE + 1 align 16 @@ -535,25 +560,19 @@ align 16 align 16 color_map rb (TEXTURE_SIZE +100) * 3 align 16 - tex_points rb points_count * 4 ; bump_map and texture coords - ; each point word x, word y -align 16 - lights_aligned: - lights_aligned_end = $ + 16 * 12 - rb 16 * 12 + ; tex_points rb points_count * 4 ; bump_map and texture coords + ; ; each point word x, word y +;align 16 +; lights_aligned: +; lights_aligned_end = $ + 16 * 12 +; rb 16 * 12 if Ext >= SSE2 sse_repository rb 1024 end if - ; SourceFile: ; source file temporally in screen area - ; workarea dd ? - - ; screen rb SIZE_X * SIZE_Y * 3 ; screen buffer -;align 16 - ; Z_buffer rb SIZE_X * SIZE_Y * 4 procinfo: - rb 1024 ; process info + rb 2048 ; process info I_Param rb 256 memStack: rb 2000 diff --git a/programs/demos/view3ds/flat_cat.inc b/programs/demos/view3ds/flat_cat.inc index d13f8a3c4d..b51f653c7e 100644 --- a/programs/demos/view3ds/flat_cat.inc +++ b/programs/demos/view3ds/flat_cat.inc @@ -1,399 +1,399 @@ -CATMULL_SHIFT equ 16 - - -flat_triangle_z: -; procedure drawing triangle with Z cordinate interpolation ------ -; (Catmull alghoritm)-------------------------------------------- -; ----------------in - eax - x1 shl 16 + y1 ---------------------- -; -------------------- ebx - x2 shl 16 + y2 ---------------------- -; -------------------- ecx - x3 shl 16 + y3 ---------------------- -; -------------------- edx - color 0x00RRGGBB -------------------- -; -------------------- esi - pointer to Z-buffer ----------------- -; -------------------- edi - pointer to screen buffer------------- -; -------------------- stack : z coordinates -; -------------------- Z-buffer : each z variable as dword -; -------------------- (Z coor. as word) shl CATMULL_SHIFT -.z1 equ word[ebp+4] -.z2 equ word[ebp+6] ; each z coordinate as word integer -.z3 equ word[ebp+8] - -.col equ dword[ebp-4] -.x1 equ word[ebp-6] -.y1 equ word[ebp-8] -.x2 equ word[ebp-10] -.y2 equ word[ebp-12] -.x3 equ word[ebp-14] -.y3 equ word[ebp-16] - -.dx12 equ dword[ebp-20] -;.dz12 equ dword[ebp-24] -.dx13 equ dword[ebp-24] -.dz13 equ dword[ebp-28] -.dz12 equ dword[ebp-32] -;.dz13 equ dword[ebp-32] -.dx23 equ dword[ebp-36] -.dz13M equ [ebp-40] -.dz23 equ dword[ebp-44] -.zz1 equ dword[ebp-48] -.zz2 equ dword[ebp-52] -.zz2M equ qword[ebp-52] -.dz12M equ qword[ebp-32] -.dz23M equ qword[ebp-44] -;if Ext>=MMX -; emms -;end if - mov ebp,esp - - push edx ; store edx in variable .col - .sort2: - cmp ax,bx - jle .sort1 - xchg eax,ebx - mov dx,.z1 - xchg dx,.z2 - mov .z1,dx - .sort1: - cmp bx,cx - jle .sort3 - xchg ebx,ecx - mov dx,.z2 - xchg dx,.z3 - mov .z2,dx - jmp .sort2 - .sort3: - push eax ; store triangle coordinates in user friendly variables - push ebx - push ecx - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .ft_loop2_end - ; cmp ax,SIZE_Y - ; jle @f - ; cmp bx,SIZE_Y - ; jle @f - ; cmp cx,SIZE_Y - ; jge @f - ; ror eax,16 - ; ror ebx,16 - ; ror ecx,16 - ; cmp ax,SIZE_X - ; jle @f - ; cmp bx,SIZE_X - ; jle @f - ; cmp cx,SIZE_X - ; jle @f - ; jmp .ft_loop2_end - ;@@: - sub esp,52-12 - - mov bx,.y2 ; calc delta 12 - sub bx,.y1 - jnz .ft_dx12_make - mov .dx12,0 - mov .dz12,0 - jmp .ft_dx12_done - .ft_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - mov .dx12,eax - - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - mov .dz12,eax - .ft_dx12_done: - mov bx,.y3 ; calc delta 13 - sub bx,.y1 - jnz .ft_dx13_make - mov .dx13,0 - mov .dz13,0 - mov dword .dz13M,0 - jmp .ft_dx13_done - .ft_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - mov .dx13,eax - - mov ax,.z3 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - mov .dz13,eax - mov dword .dz13M,eax - .ft_dx13_done: - mov bx,.y3 ; calc delta 23 - sub bx,.y2 - jnz .gt_dx23_make - mov .dx23,0 - mov .dz23,0 - jmp .gt_dx23_done - .gt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - mov .dx23,eax - - mov ax,.z3 - sub ax,.z2 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - mov .dz23,eax - .gt_dx23_done: - - movsx edx,.z1 - shl edx,CATMULL_SHIFT - mov .zz1,edx - mov .zz2,edx - movsx eax,.x1 - shl eax,ROUND ; eax - x1 - mov ebx,eax ; ebx - x2 -;if Ext>=MMX -; movq mm0,.zz2M -;end if - mov cx,.y1 - cmp cx,.y2 - jge .ft_loop1_end - .ft_loop1: - - pushad - - push .col - push cx ; y - sar ebx,ROUND - push bx ; x2 - sar eax,ROUND - push ax ; x1 -;if Ext>=MMX -; sub esp,8 -; movq [esp],mm0 -;else - push .zz2 ; z2 shl CATMULL_SHIFT - push .zz1 ; z1 shl CATMULL_SHIFT -;end if - call flat_line_z - - popad - - add eax,.dx13 - add ebx,.dx12 -;if Ext>=MMX -; paddd mm0,.dz12M -;else - - mov edx,.dz13 - add .zz1,edx - mov edx,.dz12 - add .zz2,edx -;end if - inc cx - cmp cx,.y2 - jl .ft_loop1 - .ft_loop1_end: - - movsx edx,.z2 - shl edx,CATMULL_SHIFT - mov .zz2,edx - movsx ebx,.x2 - shl ebx,ROUND -;if Ext>=MMX -; movq mm0,.zz2M -;; push .dz13 ; exchange -;; pop .dz12 -;; push .dz23 ; exchange -;; pop .dz13 -;end if - mov cx,.y2 - cmp cx,.y3 - jge .ft_loop2_end - .ft_loop2: - pushad - - push .col - push cx - sar ebx,ROUND - push bx - sar eax,ROUND - push ax ; x1 -;if Ext>=MMX -; sub esp,8 -; movq [esp],mm0 -;else - push .zz2 ; z2 shl CATMULL_SHIFT - push .zz1 ; z1 shl CATMULL_SHIFT -;end if - call flat_line_z - - popad - - add eax,.dx13 - add ebx,.dx23 -;if Ext>=MMX -; paddd mm0,.dz23M -;else - mov edx,.dz13 - add .zz1,edx - mov edx,.dz23 - add .zz2,edx - -; mov edx,.dz13 -; add .zz1,edx -; mov edx,.dz12 -; add .zz2,edx -;end if - inc cx - cmp cx,.y3 - jl .ft_loop2 - .ft_loop2_end: - - mov esp,ebp -ret 6 - -flat_line_z: -;---------------- -;-------------in edi - pointer to screen buffer ---------------------------------- -;--------------- esi - pointer to z-buffer (each Z varible dword)----------------- -;----------stack - (each z coordinate shifted shl CATMULL_SHIFT)------------------ -.z1 equ dword [ebp+4] -.z2 equ dword [ebp+8] -.x1 equ word [ebp+12] -.x2 equ word [ebp+14] -.y equ word [ebp+16] -.col equ dword [ebp+18] - -.dz equ dword [ebp-4] - - mov ebp,esp -;; sub esp,4 - mov ax,.y - or ax,ax - jl .fl_quit - mov bx,[size_y_var] - dec bx - cmp ax,bx ;[size_y_var] - ; cmp ax,SIZE_Y-1 - jg .fl_quit - - ; cmp .x1,0 - ; jge .fl_ok1 - ; cmp .x2,0 - ; jl .fl_quit - ; .fl_ok1: - ; cmp .x1,SIZE_X - ; jle .fl_ok2 - ; cmp .x2,SIZE_X - ; jg .fl_quit - ; .fl_ok2: - mov ax,.x1 - cmp ax,.x2 - je .fl_quit - jl .fl_ok - - xchg ax,.x2 - mov .x1,ax - mov edx,.z1 - xchg edx,.z2 - mov .z1,edx - .fl_ok: - mov bx,[size_x_var] - dec bx - cmp .x1,bx ;SIZE_X-1 - jg .fl_quit - cmp .x2,0 - jle .fl_quit - - mov eax,.z2 - sub eax,.z1 - cdq - mov bx,.x2 - sub bx,.x1 - movsx ebx,bx - idiv ebx -;; mov .dz,eax ; calculated delta - shifted .dz - push eax - - cmp .x1,0 - jge @f - movsx ebx,.x1 - neg ebx - imul ebx - add .z1,eax - mov .x1,0 - @@: - movzx edx,word[size_x_var] - cmp .x2,dx ;[size_x_var] ;SIZE_X - jl @f - mov .x2,dx ;[size_x_var] ;SIZE_X - @@: - ; movzx edx,[size_x_var] ;SIZE_X - movsx eax,.y - mul edx ; edi = edi + (SIZE_X * y + x1)*3 - movsx edx,.x1 - add eax,edx - push eax - lea eax,[eax*3] - add edi,eax ; esi = esi + (SIZE_X * y + x1)*4 - pop eax - shl eax,2 - add esi,eax - - mov cx,.x2 - sub cx,.x1 - movzx ecx,cx - - mov eax,.col - mov ebx,.z1 ; ebx : curr. z - mov edx,.dz - dec ecx - jecxz .draw_last - .ddraw: - cmp ebx,dword[esi] - ; cmovl [edi],eax - ; cmovl [esi],ebx - jge @f - stosd - dec edi - mov dword[esi],ebx - jmp .no_skip - @@: - add edi,3 - .no_skip: - add esi,4 - add ebx,edx - loop .ddraw - - .draw_last: - cmp ebx,dword[esi] - jge .fl_quit - stosw - shr eax,16 - stosb - mov dword[esi],ebx - - .fl_quit: - - mov esp,ebp -ret 18 +CATMULL_SHIFT equ 16 + + +flat_triangle_z: +; procedure drawing triangle with Z cordinate interpolation ------ +; (Catmull alghoritm)-------------------------------------------- +; ----------------in - eax - x1 shl 16 + y1 ---------------------- +; -------------------- ebx - x2 shl 16 + y2 ---------------------- +; -------------------- ecx - x3 shl 16 + y3 ---------------------- +; -------------------- edx - color 0x00RRGGBB -------------------- +; -------------------- esi - pointer to Z-buffer ----------------- +; -------------------- edi - pointer to screen buffer------------- +; -------------------- stack : z coordinates +; -------------------- Z-buffer : each z variable as dword +; -------------------- (Z coor. as word) shl CATMULL_SHIFT +.z1 equ word[ebp+4] +.z2 equ word[ebp+6] ; each z coordinate as word integer +.z3 equ word[ebp+8] + +.col equ dword[ebp-4] +.x1 equ word[ebp-6] +.y1 equ word[ebp-8] +.x2 equ word[ebp-10] +.y2 equ word[ebp-12] +.x3 equ word[ebp-14] +.y3 equ word[ebp-16] + +.dx12 equ dword[ebp-20] +;.dz12 equ dword[ebp-24] +.dx13 equ dword[ebp-24] +.dz13 equ dword[ebp-28] +.dz12 equ dword[ebp-32] +;.dz13 equ dword[ebp-32] +.dx23 equ dword[ebp-36] +.dz13M equ [ebp-40] +.dz23 equ dword[ebp-44] +.zz1 equ dword[ebp-48] +.zz2 equ dword[ebp-52] +.zz2M equ qword[ebp-52] +.dz12M equ qword[ebp-32] +.dz23M equ qword[ebp-44] +;if Ext>=MMX +; emms +;end if + mov ebp,esp + + push edx ; store edx in variable .col + .sort2: + cmp ax,bx + jle .sort1 + xchg eax,ebx + mov dx,.z1 + xchg dx,.z2 + mov .z1,dx + .sort1: + cmp bx,cx + jle .sort3 + xchg ebx,ecx + mov dx,.z2 + xchg dx,.z3 + mov .z2,dx + jmp .sort2 + .sort3: + push eax ; store triangle coordinates in user friendly variables + push ebx + push ecx + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .ft_loop2_end + ; cmp ax,SIZE_Y + ; jle @f + ; cmp bx,SIZE_Y + ; jle @f + ; cmp cx,SIZE_Y + ; jge @f + ; ror eax,16 + ; ror ebx,16 + ; ror ecx,16 + ; cmp ax,SIZE_X + ; jle @f + ; cmp bx,SIZE_X + ; jle @f + ; cmp cx,SIZE_X + ; jle @f + ; jmp .ft_loop2_end + ;@@: + sub esp,52-12 + + mov bx,.y2 ; calc delta 12 + sub bx,.y1 + jnz .ft_dx12_make + mov .dx12,0 + mov .dz12,0 + jmp .ft_dx12_done + .ft_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + mov .dx12,eax + + mov ax,.z2 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + mov .dz12,eax + .ft_dx12_done: + mov bx,.y3 ; calc delta 13 + sub bx,.y1 + jnz .ft_dx13_make + mov .dx13,0 + mov .dz13,0 + mov dword .dz13M,0 + jmp .ft_dx13_done + .ft_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + mov .dx13,eax + + mov ax,.z3 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + mov .dz13,eax + mov dword .dz13M,eax + .ft_dx13_done: + mov bx,.y3 ; calc delta 23 + sub bx,.y2 + jnz .gt_dx23_make + mov .dx23,0 + mov .dz23,0 + jmp .gt_dx23_done + .gt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + mov .dx23,eax + + mov ax,.z3 + sub ax,.z2 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + mov .dz23,eax + .gt_dx23_done: + + movsx edx,.z1 + shl edx,CATMULL_SHIFT + mov .zz1,edx + mov .zz2,edx + movsx eax,.x1 + shl eax,ROUND ; eax - x1 + mov ebx,eax ; ebx - x2 +;if Ext>=MMX +; movq mm0,.zz2M +;end if + mov cx,.y1 + cmp cx,.y2 + jge .ft_loop1_end + .ft_loop1: + + pushad + + push .col + push cx ; y + sar ebx,ROUND + push bx ; x2 + sar eax,ROUND + push ax ; x1 +;if Ext>=MMX +; sub esp,8 +; movq [esp],mm0 +;else + push .zz2 ; z2 shl CATMULL_SHIFT + push .zz1 ; z1 shl CATMULL_SHIFT +;end if + call flat_line_z + + popad + + add eax,.dx13 + add ebx,.dx12 +;if Ext>=MMX +; paddd mm0,.dz12M +;else + + mov edx,.dz13 + add .zz1,edx + mov edx,.dz12 + add .zz2,edx +;end if + inc cx + cmp cx,.y2 + jl .ft_loop1 + .ft_loop1_end: + + movsx edx,.z2 + shl edx,CATMULL_SHIFT + mov .zz2,edx + movsx ebx,.x2 + shl ebx,ROUND +;if Ext>=MMX +; movq mm0,.zz2M +;; push .dz13 ; exchange +;; pop .dz12 +;; push .dz23 ; exchange +;; pop .dz13 +;end if + mov cx,.y2 + cmp cx,.y3 + jge .ft_loop2_end + .ft_loop2: + pushad + + push .col + push cx + sar ebx,ROUND + push bx + sar eax,ROUND + push ax ; x1 +;if Ext>=MMX +; sub esp,8 +; movq [esp],mm0 +;else + push .zz2 ; z2 shl CATMULL_SHIFT + push .zz1 ; z1 shl CATMULL_SHIFT +;end if + call flat_line_z + + popad + + add eax,.dx13 + add ebx,.dx23 +;if Ext>=MMX +; paddd mm0,.dz23M +;else + mov edx,.dz13 + add .zz1,edx + mov edx,.dz23 + add .zz2,edx + +; mov edx,.dz13 +; add .zz1,edx +; mov edx,.dz12 +; add .zz2,edx +;end if + inc cx + cmp cx,.y3 + jl .ft_loop2 + .ft_loop2_end: + + mov esp,ebp +ret 6 + +flat_line_z: +;---------------- +;-------------in edi - pointer to screen buffer ---------------------------------- +;--------------- esi - pointer to z-buffer (each Z varible dword)----------------- +;----------stack - (each z coordinate shifted shl CATMULL_SHIFT)------------------ +.z1 equ dword [ebp+4] +.z2 equ dword [ebp+8] +.x1 equ word [ebp+12] +.x2 equ word [ebp+14] +.y equ word [ebp+16] +.col equ dword [ebp+18] + +.dz equ dword [ebp-4] + + mov ebp,esp +;; sub esp,4 + mov ax,.y + or ax,ax + jl .fl_quit + mov bx,[size_y_var] + dec bx + cmp ax,bx ;[size_y_var] + ; cmp ax,SIZE_Y-1 + jg .fl_quit + + ; cmp .x1,0 + ; jge .fl_ok1 + ; cmp .x2,0 + ; jl .fl_quit + ; .fl_ok1: + ; cmp .x1,SIZE_X + ; jle .fl_ok2 + ; cmp .x2,SIZE_X + ; jg .fl_quit + ; .fl_ok2: + mov ax,.x1 + cmp ax,.x2 + je .fl_quit + jl .fl_ok + + xchg ax,.x2 + mov .x1,ax + mov edx,.z1 + xchg edx,.z2 + mov .z1,edx + .fl_ok: + mov bx,[size_x_var] + dec bx + cmp .x1,bx ;SIZE_X-1 + jg .fl_quit + cmp .x2,0 + jle .fl_quit + + mov eax,.z2 + sub eax,.z1 + cdq + mov bx,.x2 + sub bx,.x1 + movsx ebx,bx + idiv ebx +;; mov .dz,eax ; calculated delta - shifted .dz + push eax + + cmp .x1,0 + jge @f + movsx ebx,.x1 + neg ebx + imul ebx + add .z1,eax + mov .x1,0 + @@: + movzx edx,word[size_x_var] + cmp .x2,dx ;[size_x_var] ;SIZE_X + jl @f + mov .x2,dx ;[size_x_var] ;SIZE_X + @@: + ; movzx edx,[size_x_var] ;SIZE_X + movsx eax,.y + mul edx ; edi = edi + (SIZE_X * y + x1)*3 + movsx edx,.x1 + add eax,edx + push eax + lea eax,[eax*3] + add edi,eax ; esi = esi + (SIZE_X * y + x1)*4 + pop eax + shl eax,2 + add esi,eax + + mov cx,.x2 + sub cx,.x1 + movzx ecx,cx + + mov eax,.col + mov ebx,.z1 ; ebx : curr. z + mov edx,.dz + dec ecx + jecxz .draw_last + .ddraw: + cmp ebx,dword[esi] + ; cmovl [edi],eax + ; cmovl [esi],ebx + jge @f + stosd + dec edi + mov dword[esi],ebx + jmp .no_skip + @@: + add edi,3 + .no_skip: + add esi,4 + add ebx,edx + loop .ddraw + + .draw_last: + cmp ebx,dword[esi] + jge .fl_quit + stosw + shr eax,16 + stosb + mov dword[esi],ebx + + .fl_quit: + + mov esp,ebp +ret 18 diff --git a/programs/demos/view3ds/grd_cat.inc b/programs/demos/view3ds/grd_cat.inc index df72c4a2fb..5480b12b6d 100644 --- a/programs/demos/view3ds/grd_cat.inc +++ b/programs/demos/view3ds/grd_cat.inc @@ -1,704 +1,704 @@ -ROUND equ 8 -CATMULL_SHIFT equ 8 -gouraud_triangle_z: - -;----procedure drawing gouraud triangle with z coordinate -;----interpolation ( Catmull alghoritm )----------------- -;------------------in - eax - x1 shl 16 + y1 ------------ -;---------------------- ebx - x2 shl 16 + y2 ------------ -;---------------------- ecx - x3 shl 16 + y3 ------------ -;---------------------- esi - pointer to Z-buffer-------- -;---------------------- Z-buffer filled with dd variables -;---------------------- shifted CATMULL_SHIFT------------ -;---------------------- edi - pointer to screen buffer--- -;---------------------- stack : colors------------------- -;----------------- procedure don't save registers !!----- -.col1r equ ebp+4 ; each color as word -.col1g equ ebp+6 ; each z coordinate as word -.col1b equ ebp+8 -.z1 equ ebp+10 -.col2r equ ebp+12 -.col2g equ ebp+14 -.col2b equ ebp+16 -.z2 equ ebp+18 -.col3r equ ebp+20 -.col3g equ ebp+22 -.col3b equ ebp+24 -.z3 equ ebp+26 - -.x1 equ word[ebp-2] -.y1 equ word[ebp-4] -.x2 equ word[ebp-6] -.y2 equ word[ebp-8] -.x3 equ word[ebp-10] -.y3 equ word[ebp-12] - -.dx12 equ dword[ebp-16] -.dz12 equ dword[ebp-20] -.dc12r equ dword[ebp-24] -.dc12g equ dword[ebp-28] -.dc12b equ dword[ebp-32] - -.dx13 equ dword[ebp-36] -.dz13 equ dword[ebp-40] -.dc13r equ dword[ebp-44] -.dc13g equ dword[ebp-48] -.dc13b equ dword[ebp-52] - -.dx23 equ dword[ebp-56] -.dz23 equ dword[ebp-60] -.dc23r equ dword[ebp-64] -.dc23g equ dword[ebp-68] -.dc23b equ dword[ebp-72] - -.zz1 equ dword[ebp-76] -.c1r equ dword[ebp-80] -.c1g equ dword[ebp-84] -.c1b equ dword[ebp-88] -.zz2 equ dword[ebp-92] -.c2r equ dword[ebp-96] -.c2g equ dword[ebp-100] -.c2b equ dword[ebp-104] -;.zz1 equ dword[ebp-100] -;.zz2 equ dword[ebp-104] - -.c1bM equ [ebp-88] -.c2bM equ [ebp-104] -.c1rM equ [ebp-80] -.c2rM equ [ebp-96] -.dc23bM equ [ebp-72] -.dc13bM equ [ebp-52] -.dc12bM equ [ebp-32] -.dc12rM equ [ebp-24] -.dc13rM equ [ebp-44] -.dc23rM equ [ebp-64] -if Ext=MMX - emms -end if - - mov ebp,esp - ; sub esp,84 - .sort3: ; sort triangle coordinates... - cmp ax,bx - jle .sort1 - xchg eax,ebx - mov edx,dword[.col1r] - xchg edx,dword[.col2r] - mov dword[.col1r],edx - mov edx,dword[.col1b] - xchg edx,dword[.col2b] - mov dword[.col1b],edx - .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - mov edx,dword[.col2r] - xchg edx,dword[.col3r] - mov dword[.col2r],edx - mov edx,dword[.col2b] - xchg edx,dword[.col3b] - mov dword[.col2b],edx - jmp .sort3 - .sort2: - push eax ; store in variables - push ebx - push ecx - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .gt_loop2_end - - mov bx,.y2 ; calc deltas - sub bx,.y1 - jnz .gt_dx12_make - ; mov .dx12,0 - ; mov .dz12,0 - ; mov .dc12r,0 - ; mov .dc12g,0 - ; mov .dc12b,0 - mov ecx,5 - @@: - push dword 0 - loop @b - jmp .gt_dx12_done - .gt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx12,eax - push eax - - mov ax,word[.z2] - sub ax,word[.z1] - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax - - mov ax,word[.col2r] - sub ax,word[.col1r] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc12r,eax - push eax - mov ax,word[.col2g] - sub ax,word[.col1g] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc12g,eax - push eax - mov ax,word[.col2b] ;;--- - sub ax,word[.col1b] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc12b,eax - push eax - .gt_dx12_done: - - mov bx,.y3 ; calc deltas - sub bx,.y1 - jnz .gt_dx13_make - ; mov .dx13,0 - ; mov .dz13,0 - ; mov .dc13r,0 - ; mov .dc13g,0 - ; mov .dc13b,0 - mov ecx,5 - @@: - push dword 0 - loop @b - jmp .gt_dx13_done - .gt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx13,eax - push eax - - mov ax,word[.z3] - sub ax,word[.z1] - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax - - mov ax,word[.col3r] - sub ax,word[.col1r] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc13r,eax - push eax - mov ax,word[.col3g] - sub ax,word[.col1g] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc13g,eax - push eax - mov ax,word[.col3b] - sub ax,word[.col1b] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc13b,eax - push eax - .gt_dx13_done: - - mov bx,.y3 ; calc deltas - sub bx,.y2 - jnz .gt_dx23_make - ; mov .dx23,0 - ; mov .dz23,0 - ; mov .dc23r,0 - ; mov .dc23g,0 - ; mov .dc23b,0 - mov ecx,5 - @@: - push dword 0 - loop @b - jmp .gt_dx23_done - .gt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx23,eax - push eax - - mov ax,word[.z3] - sub ax,word[.z2] - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax - - mov ax,word[.col3r] - sub ax,word[.col2r] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc23r,eax - push eax - mov ax,word[.col3g] - sub ax,word[.col2g] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc23g,eax - push eax - mov ax,word[.col3b] - sub ax,word[.col2b] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc23b,eax - push eax - .gt_dx23_done: - sub esp,32 - - movsx eax,.x1 ; eax - cur x1 - shl eax,ROUND ; ebx - cur x2 - mov ebx,eax - movsx edx,word[.z1] - shl edx,CATMULL_SHIFT - mov .zz1,edx - mov .zz2,edx - movzx edx,word[.col1r] - shl edx,ROUND - mov .c1r,edx - mov .c2r,edx - movzx edx,word[.col1g] - shl edx,ROUND - mov .c1g,edx - mov .c2g,edx - movzx edx,word[.col1b] - shl edx,ROUND - mov .c1b,edx - mov .c2b,edx - mov cx,.y1 - cmp cx,.y2 - jge .gt_loop1_end - - .gt_loop1: - pushad - ; macro .debug - - mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors - sar edx,ROUND - push dx - mov edx,.c2g - sar edx,ROUND - push dx - mov edx,.c2b - sar edx,ROUND - push dx - sar ebx,ROUND ; x2 - push bx - mov edx,.c1r - sar edx,ROUND - push dx - mov edx,.c1g - sar edx,ROUND - push dx - mov edx,.c1b - sar edx,ROUND - push dx - sar eax,ROUND - push ax ; x1 - push cx ; y - push .zz2 - push .zz1 - call gouraud_line_z - - popad -if Ext >= MMX - movq mm0,.c1bM - paddd mm0,qword .dc13bM - movq .c1bM,mm0 - movq mm1,.c2bM - paddd mm1,qword .dc12bM - movq .c2bM,mm1 - - movq mm0,.c1rM - paddd mm0,qword .dc13rM - movq .c1rM,mm0 - movq mm1,.c2rM - paddd mm1,qword .dc12rM - movq .c2rM,mm1 -else - mov edx,.dc13r - add .c1r,edx - mov edx,.dc13g - add .c1g,edx - mov edx,.dc13b - add .c1b,edx - mov edx,.dc12r - add .c2r,edx - mov edx,.dc12g - add .c2g,edx - mov edx,.dc12b - add .c2b,edx - - mov edx,.dz13 - add .zz1,edx - mov edx,.dz12 - add .zz2,edx -end if - add eax,.dx13 - add ebx,.dx12 - inc cx - cmp cx,.y2 - jl .gt_loop1 - - .gt_loop1_end: - mov cx,.y2 - cmp cx,.y3 - jge .gt_loop2_end - - movsx ebx,.x2 ; eax - cur x1 - shl ebx,ROUND ; ebx - cur x2 - movsx edx,word[.z2] - shl edx,CATMULL_SHIFT - mov .zz2,edx - movzx edx,word[.col2r] - shl edx,ROUND - mov .c2r,edx - movzx edx,word[.col2g] - shl edx,ROUND - mov .c2g,edx - movzx edx,word[.col2b] - shl edx,ROUND - mov .c2b,edx - - .gt_loop2: - pushad - ; macro .debug - - mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors - sar edx,ROUND - push dx - mov edx,.c2g - sar edx,ROUND - push dx - mov edx,.c2b - sar edx,ROUND - push dx - sar ebx,ROUND ; x2 - push bx - mov edx,.c1r - sar edx,ROUND - push dx - mov edx,.c1g - sar edx,ROUND - push dx - mov edx,.c1b - sar edx,ROUND - push dx - sar eax,ROUND - push ax ; x1 - push cx ; y - push .zz2 - push .zz1 - call gouraud_line_z - - popad - -if Ext >= MMX - movq mm0,.c1bM - paddd mm0,qword .dc13bM - movq .c1bM,mm0 - movq mm1,.c2bM - paddd mm1,qword .dc23bM - movq .c2bM,mm1 - - movq mm0,.c1rM - paddd mm0,qword .dc13rM - movq .c1rM,mm0 - movq mm1,.c2rM - paddd mm1,qword .dc23rM - movq .c2rM,mm1 -else - mov edx,.dc13r - add .c1r,edx - mov edx,.dc13g - add .c1g,edx - mov edx,.dc13b - add .c1b,edx - mov edx,.dc23r - add .c2r,edx - mov edx,.dc23g - add .c2g,edx - mov edx,.dc23b - add .c2b,edx - mov edx,.dz13 - add .zz1,edx - mov edx,.dz23 - add .zz2,edx -end if - add eax,.dx13 - add ebx,.dx23 - inc cx - cmp cx,.y3 - jl .gt_loop2 - .gt_loop2_end: - - mov esp,ebp -ret 24 -gouraud_line_z: -;----------------- procedure drawing gouraud line -;----------------- with z coordinate interpolation -;----------------- esi - pointer to Z_buffer -;----------------- edi - pointer to screen buffer -;----------------- stack: -.z1 equ dword[ebp+4] ; z coordiunate shifted left CATMULL_SHIFT -.z2 equ dword[ebp+8] -.y equ word[ebp+12] -.x1 equ ebp+14 -.c1b equ ebp+16 -.c1g equ ebp+18 -.c1r equ ebp+20 -.x2 equ ebp+22 -.c2b equ ebp+24 -.c2g equ ebp+26 -.c2r equ ebp+28 - -.dz equ dword[ebp-4] -.dc_b equ dword[ebp-8] -.dc_g equ dword[ebp-12] -.dc_r equ dword[ebp-16] -.c_z equ dword[ebp-20] -.cb equ dword[ebp-24] -.cg equ dword[ebp-28] -.cr equ dword[ebp-32] -;.cg2 equ dword[ebp-36] - - -.crM equ ebp-32 -.cgM equ ebp-28 -.cbM equ ebp-24 - -.dc_rM equ ebp-16 -.dc_gM equ ebp-12 -.dc_bM equ ebp-8 - mov ebp,esp - - mov ax,.y - or ax,ax - jl .gl_quit - mov bx,[size_y_var] - dec bx - cmp ax,bx ;SIZE_Y - jge .gl_quit - - mov eax,dword[.x1] - cmp ax,word[.x2] - je .gl_quit - jl @f - - xchg eax,dword[.x2] - mov dword[.x1],eax - mov eax,dword[.c1g] - xchg eax,dword[.c2g] - mov dword[.c1g],eax - mov eax,.z1 - xchg eax,.z2 - mov .z1,eax - @@: - mov bx,[size_x_var] - dec bx - cmp word[.x1],bx ;SIZE_X - jge .gl_quit - cmp word[.x2],0 - jle .gl_quit - - mov eax,.z2 - sub eax,.z1 - cdq - mov bx,word[.x2] ; dz = z2-z1/x2-x1 - sub bx,word[.x1] - movsx ebx,bx - idiv ebx - push eax - - mov ax,word[.c2b] - sub ax,word[.c1b] - cwde - shl eax,ROUND - cdq - idiv ebx - push eax - - mov ax,word[.c2g] - sub ax,word[.c1g] - cwde - shl eax,ROUND - cdq - idiv ebx - push eax - - mov ax,word[.c2r] - sub ax,word[.c1r] - cwde - shl eax,ROUND ; dc_r = c2r-c1r/x2-x1 - cdq - idiv ebx - push eax - - cmp word[.x1],0 ; clipping on function - jg @f - mov eax,.dz - movsx ebx,word[.x1] - neg ebx - imul ebx - add .z1,eax - mov word[.x1],0 - - mov eax,.dc_r - imul ebx - sar eax,ROUND - add word[.c1r],ax - - mov eax,.dc_g - imul ebx - sar eax,ROUND - add word[.c1g],ax - - mov eax,.dc_b - imul ebx - sar eax,ROUND - add word[.c1b],ax - - @@: - mov bx,[size_x_var] - dec bx - cmp word[.x2],bx ;SIZE_X - jl @f - mov word[.x2],bx ;SIZE_X - @@: - sub esp,16 ; calculate memory begin - movzx edx,word[size_x_var] ;SIZE_X ; in buffers - movzx eax,.y - mul edx - movzx edx,word[.x1] - add eax,edx - push eax - lea eax,[eax*3] - add edi,eax - pop eax - shl eax,2 - add esi,eax - - mov cx,word[.x2] - sub cx,word[.x1] - movzx ecx,cx - mov ebx,.z1 ; ebx - currrent z shl CATMULL_SIFT -;if Ext >= SSE -; mov .cz,edx -;end if - mov edx,.dz ; edx - delta z - movzx eax,word[.c1r] - shl eax,ROUND - mov .cr,eax - movzx eax,word[.c1g] - shl eax,ROUND - mov .cg,eax - movzx eax,word[.c1b] - shl eax,ROUND - mov .cb,eax -if Ext = MMX -; mov .c_z,edx - movd mm2,[.dc_bM] ; delta color blue MMX - movd mm3,[.cbM] ; current blue MMX - movq mm5,[.dc_rM] - movq mm4,[.crM] - pxor mm6,mm6 -end if - - - .ddraw: -;if Ext = MMX -; movq mm0,mm3 -; psrsq mm0,32 -; movd ebx,mm0 -;end if - cmp ebx,dword[esi] ; esi - z_buffer - jge @f ; edi - Screen buffer -if Ext = MMX - movq mm0,mm3 ; mm0, mm1 - temp registers - psrld mm0,ROUND - movq mm1,mm4 - psrld mm1,ROUND - packssdw mm1,mm0 - packuswb mm1,mm6 -; movd [edi],mm1 - movd eax,mm1 - stosw - shr eax,16 - stosb -else - mov eax,.cr - sar eax,ROUND - stosb - mov eax,.cg - sar eax,ROUND - stosb - mov eax,.cb - sar eax,ROUND - stosb -end if - mov dword[esi],ebx -;if Ext = NON - jmp .no_skip -;end if - @@: - add edi,3 - .no_skip: - add esi,4 -;if Ext=NON - add ebx,edx -;end if -if Ext=MMX - paddd mm3,mm2 - paddd mm4,mm5 -else - mov eax,.dc_g - add .cg,eax - mov eax,.dc_b - add .cb,eax - mov eax,.dc_r - add .cr,eax -end if - loop .ddraw - - .gl_quit: - mov esp,ebp -ret 26 +ROUND equ 8 +CATMULL_SHIFT equ 8 +gouraud_triangle_z: + +;----procedure drawing gouraud triangle with z coordinate +;----interpolation ( Catmull alghoritm )----------------- +;------------------in - eax - x1 shl 16 + y1 ------------ +;---------------------- ebx - x2 shl 16 + y2 ------------ +;---------------------- ecx - x3 shl 16 + y3 ------------ +;---------------------- esi - pointer to Z-buffer-------- +;---------------------- Z-buffer filled with dd variables +;---------------------- shifted CATMULL_SHIFT------------ +;---------------------- edi - pointer to screen buffer--- +;---------------------- stack : colors------------------- +;----------------- procedure don't save registers !!----- +.col1r equ ebp+4 ; each color as word +.col1g equ ebp+6 ; each z coordinate as word +.col1b equ ebp+8 +.z1 equ ebp+10 +.col2r equ ebp+12 +.col2g equ ebp+14 +.col2b equ ebp+16 +.z2 equ ebp+18 +.col3r equ ebp+20 +.col3g equ ebp+22 +.col3b equ ebp+24 +.z3 equ ebp+26 + +.x1 equ word[ebp-2] +.y1 equ word[ebp-4] +.x2 equ word[ebp-6] +.y2 equ word[ebp-8] +.x3 equ word[ebp-10] +.y3 equ word[ebp-12] + +.dx12 equ dword[ebp-16] +.dz12 equ dword[ebp-20] +.dc12r equ dword[ebp-24] +.dc12g equ dword[ebp-28] +.dc12b equ dword[ebp-32] + +.dx13 equ dword[ebp-36] +.dz13 equ dword[ebp-40] +.dc13r equ dword[ebp-44] +.dc13g equ dword[ebp-48] +.dc13b equ dword[ebp-52] + +.dx23 equ dword[ebp-56] +.dz23 equ dword[ebp-60] +.dc23r equ dword[ebp-64] +.dc23g equ dword[ebp-68] +.dc23b equ dword[ebp-72] + +.zz1 equ dword[ebp-76] +.c1r equ dword[ebp-80] +.c1g equ dword[ebp-84] +.c1b equ dword[ebp-88] +.zz2 equ dword[ebp-92] +.c2r equ dword[ebp-96] +.c2g equ dword[ebp-100] +.c2b equ dword[ebp-104] +;.zz1 equ dword[ebp-100] +;.zz2 equ dword[ebp-104] + +.c1bM equ [ebp-88] +.c2bM equ [ebp-104] +.c1rM equ [ebp-80] +.c2rM equ [ebp-96] +.dc23bM equ [ebp-72] +.dc13bM equ [ebp-52] +.dc12bM equ [ebp-32] +.dc12rM equ [ebp-24] +.dc13rM equ [ebp-44] +.dc23rM equ [ebp-64] +if Ext=MMX + emms +end if + + mov ebp,esp + ; sub esp,84 + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + mov edx,dword[.col1r] + xchg edx,dword[.col2r] + mov dword[.col1r],edx + mov edx,dword[.col1b] + xchg edx,dword[.col2b] + mov dword[.col1b],edx + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + mov edx,dword[.col2r] + xchg edx,dword[.col3r] + mov dword[.col2r],edx + mov edx,dword[.col2b] + xchg edx,dword[.col3b] + mov dword[.col2b],edx + jmp .sort3 + .sort2: + push eax ; store in variables + push ebx + push ecx + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .gt_loop2_end + + mov bx,.y2 ; calc deltas + sub bx,.y1 + jnz .gt_dx12_make + ; mov .dx12,0 + ; mov .dz12,0 + ; mov .dc12r,0 + ; mov .dc12g,0 + ; mov .dc12b,0 + mov ecx,5 + @@: + push dword 0 + loop @b + jmp .gt_dx12_done + .gt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx12,eax + push eax + + mov ax,word[.z2] + sub ax,word[.z1] + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax + + mov ax,word[.col2r] + sub ax,word[.col1r] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc12r,eax + push eax + mov ax,word[.col2g] + sub ax,word[.col1g] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc12g,eax + push eax + mov ax,word[.col2b] ;;--- + sub ax,word[.col1b] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc12b,eax + push eax + .gt_dx12_done: + + mov bx,.y3 ; calc deltas + sub bx,.y1 + jnz .gt_dx13_make + ; mov .dx13,0 + ; mov .dz13,0 + ; mov .dc13r,0 + ; mov .dc13g,0 + ; mov .dc13b,0 + mov ecx,5 + @@: + push dword 0 + loop @b + jmp .gt_dx13_done + .gt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx13,eax + push eax + + mov ax,word[.z3] + sub ax,word[.z1] + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax + + mov ax,word[.col3r] + sub ax,word[.col1r] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc13r,eax + push eax + mov ax,word[.col3g] + sub ax,word[.col1g] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc13g,eax + push eax + mov ax,word[.col3b] + sub ax,word[.col1b] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc13b,eax + push eax + .gt_dx13_done: + + mov bx,.y3 ; calc deltas + sub bx,.y2 + jnz .gt_dx23_make + ; mov .dx23,0 + ; mov .dz23,0 + ; mov .dc23r,0 + ; mov .dc23g,0 + ; mov .dc23b,0 + mov ecx,5 + @@: + push dword 0 + loop @b + jmp .gt_dx23_done + .gt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx23,eax + push eax + + mov ax,word[.z3] + sub ax,word[.z2] + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax + + mov ax,word[.col3r] + sub ax,word[.col2r] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc23r,eax + push eax + mov ax,word[.col3g] + sub ax,word[.col2g] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc23g,eax + push eax + mov ax,word[.col3b] + sub ax,word[.col2b] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc23b,eax + push eax + .gt_dx23_done: + sub esp,32 + + movsx eax,.x1 ; eax - cur x1 + shl eax,ROUND ; ebx - cur x2 + mov ebx,eax + movsx edx,word[.z1] + shl edx,CATMULL_SHIFT + mov .zz1,edx + mov .zz2,edx + movzx edx,word[.col1r] + shl edx,ROUND + mov .c1r,edx + mov .c2r,edx + movzx edx,word[.col1g] + shl edx,ROUND + mov .c1g,edx + mov .c2g,edx + movzx edx,word[.col1b] + shl edx,ROUND + mov .c1b,edx + mov .c2b,edx + mov cx,.y1 + cmp cx,.y2 + jge .gt_loop1_end + + .gt_loop1: + pushad + ; macro .debug + + mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors + sar edx,ROUND + push dx + mov edx,.c2g + sar edx,ROUND + push dx + mov edx,.c2b + sar edx,ROUND + push dx + sar ebx,ROUND ; x2 + push bx + mov edx,.c1r + sar edx,ROUND + push dx + mov edx,.c1g + sar edx,ROUND + push dx + mov edx,.c1b + sar edx,ROUND + push dx + sar eax,ROUND + push ax ; x1 + push cx ; y + push .zz2 + push .zz1 + call gouraud_line_z + + popad +if Ext >= MMX + movq mm0,.c1bM + paddd mm0,qword .dc13bM + movq .c1bM,mm0 + movq mm1,.c2bM + paddd mm1,qword .dc12bM + movq .c2bM,mm1 + + movq mm0,.c1rM + paddd mm0,qword .dc13rM + movq .c1rM,mm0 + movq mm1,.c2rM + paddd mm1,qword .dc12rM + movq .c2rM,mm1 +else + mov edx,.dc13r + add .c1r,edx + mov edx,.dc13g + add .c1g,edx + mov edx,.dc13b + add .c1b,edx + mov edx,.dc12r + add .c2r,edx + mov edx,.dc12g + add .c2g,edx + mov edx,.dc12b + add .c2b,edx + + mov edx,.dz13 + add .zz1,edx + mov edx,.dz12 + add .zz2,edx +end if + add eax,.dx13 + add ebx,.dx12 + inc cx + cmp cx,.y2 + jl .gt_loop1 + + .gt_loop1_end: + mov cx,.y2 + cmp cx,.y3 + jge .gt_loop2_end + + movsx ebx,.x2 ; eax - cur x1 + shl ebx,ROUND ; ebx - cur x2 + movsx edx,word[.z2] + shl edx,CATMULL_SHIFT + mov .zz2,edx + movzx edx,word[.col2r] + shl edx,ROUND + mov .c2r,edx + movzx edx,word[.col2g] + shl edx,ROUND + mov .c2g,edx + movzx edx,word[.col2b] + shl edx,ROUND + mov .c2b,edx + + .gt_loop2: + pushad + ; macro .debug + + mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors + sar edx,ROUND + push dx + mov edx,.c2g + sar edx,ROUND + push dx + mov edx,.c2b + sar edx,ROUND + push dx + sar ebx,ROUND ; x2 + push bx + mov edx,.c1r + sar edx,ROUND + push dx + mov edx,.c1g + sar edx,ROUND + push dx + mov edx,.c1b + sar edx,ROUND + push dx + sar eax,ROUND + push ax ; x1 + push cx ; y + push .zz2 + push .zz1 + call gouraud_line_z + + popad + +if Ext >= MMX + movq mm0,.c1bM + paddd mm0,qword .dc13bM + movq .c1bM,mm0 + movq mm1,.c2bM + paddd mm1,qword .dc23bM + movq .c2bM,mm1 + + movq mm0,.c1rM + paddd mm0,qword .dc13rM + movq .c1rM,mm0 + movq mm1,.c2rM + paddd mm1,qword .dc23rM + movq .c2rM,mm1 +else + mov edx,.dc13r + add .c1r,edx + mov edx,.dc13g + add .c1g,edx + mov edx,.dc13b + add .c1b,edx + mov edx,.dc23r + add .c2r,edx + mov edx,.dc23g + add .c2g,edx + mov edx,.dc23b + add .c2b,edx + mov edx,.dz13 + add .zz1,edx + mov edx,.dz23 + add .zz2,edx +end if + add eax,.dx13 + add ebx,.dx23 + inc cx + cmp cx,.y3 + jl .gt_loop2 + .gt_loop2_end: + + mov esp,ebp +ret 24 +gouraud_line_z: +;----------------- procedure drawing gouraud line +;----------------- with z coordinate interpolation +;----------------- esi - pointer to Z_buffer +;----------------- edi - pointer to screen buffer +;----------------- stack: +.z1 equ dword[ebp+4] ; z coordiunate shifted left CATMULL_SHIFT +.z2 equ dword[ebp+8] +.y equ word[ebp+12] +.x1 equ ebp+14 +.c1b equ ebp+16 +.c1g equ ebp+18 +.c1r equ ebp+20 +.x2 equ ebp+22 +.c2b equ ebp+24 +.c2g equ ebp+26 +.c2r equ ebp+28 + +.dz equ dword[ebp-4] +.dc_b equ dword[ebp-8] +.dc_g equ dword[ebp-12] +.dc_r equ dword[ebp-16] +.c_z equ dword[ebp-20] +.cb equ dword[ebp-24] +.cg equ dword[ebp-28] +.cr equ dword[ebp-32] +;.cg2 equ dword[ebp-36] + + +.crM equ ebp-32 +.cgM equ ebp-28 +.cbM equ ebp-24 + +.dc_rM equ ebp-16 +.dc_gM equ ebp-12 +.dc_bM equ ebp-8 + mov ebp,esp + + mov ax,.y + or ax,ax + jl .gl_quit + mov bx,[size_y_var] + dec bx + cmp ax,bx ;SIZE_Y + jge .gl_quit + + mov eax,dword[.x1] + cmp ax,word[.x2] + je .gl_quit + jl @f + + xchg eax,dword[.x2] + mov dword[.x1],eax + mov eax,dword[.c1g] + xchg eax,dword[.c2g] + mov dword[.c1g],eax + mov eax,.z1 + xchg eax,.z2 + mov .z1,eax + @@: + mov bx,[size_x_var] + dec bx + cmp word[.x1],bx ;SIZE_X + jge .gl_quit + cmp word[.x2],0 + jle .gl_quit + + mov eax,.z2 + sub eax,.z1 + cdq + mov bx,word[.x2] ; dz = z2-z1/x2-x1 + sub bx,word[.x1] + movsx ebx,bx + idiv ebx + push eax + + mov ax,word[.c2b] + sub ax,word[.c1b] + cwde + shl eax,ROUND + cdq + idiv ebx + push eax + + mov ax,word[.c2g] + sub ax,word[.c1g] + cwde + shl eax,ROUND + cdq + idiv ebx + push eax + + mov ax,word[.c2r] + sub ax,word[.c1r] + cwde + shl eax,ROUND ; dc_r = c2r-c1r/x2-x1 + cdq + idiv ebx + push eax + + cmp word[.x1],0 ; clipping on function + jg @f + mov eax,.dz + movsx ebx,word[.x1] + neg ebx + imul ebx + add .z1,eax + mov word[.x1],0 + + mov eax,.dc_r + imul ebx + sar eax,ROUND + add word[.c1r],ax + + mov eax,.dc_g + imul ebx + sar eax,ROUND + add word[.c1g],ax + + mov eax,.dc_b + imul ebx + sar eax,ROUND + add word[.c1b],ax + + @@: + mov bx,[size_x_var] + dec bx + cmp word[.x2],bx ;SIZE_X + jl @f + mov word[.x2],bx ;SIZE_X + @@: + sub esp,16 ; calculate memory begin + movzx edx,word[size_x_var] ;SIZE_X ; in buffers + movzx eax,.y + mul edx + movzx edx,word[.x1] + add eax,edx + push eax + lea eax,[eax*3] + add edi,eax + pop eax + shl eax,2 + add esi,eax + + mov cx,word[.x2] + sub cx,word[.x1] + movzx ecx,cx + mov ebx,.z1 ; ebx - currrent z shl CATMULL_SIFT +;if Ext >= SSE +; mov .cz,edx +;end if + mov edx,.dz ; edx - delta z + movzx eax,word[.c1r] + shl eax,ROUND + mov .cr,eax + movzx eax,word[.c1g] + shl eax,ROUND + mov .cg,eax + movzx eax,word[.c1b] + shl eax,ROUND + mov .cb,eax +if Ext = MMX +; mov .c_z,edx + movd mm2,[.dc_bM] ; delta color blue MMX + movd mm3,[.cbM] ; current blue MMX + movq mm5,[.dc_rM] + movq mm4,[.crM] + pxor mm6,mm6 +end if + + + .ddraw: +;if Ext = MMX +; movq mm0,mm3 +; psrsq mm0,32 +; movd ebx,mm0 +;end if + cmp ebx,dword[esi] ; esi - z_buffer + jge @f ; edi - Screen buffer +if Ext = MMX + movq mm0,mm3 ; mm0, mm1 - temp registers + psrld mm0,ROUND + movq mm1,mm4 + psrld mm1,ROUND + packssdw mm1,mm0 + packuswb mm1,mm6 +; movd [edi],mm1 + movd eax,mm1 + stosw + shr eax,16 + stosb +else + mov eax,.cr + sar eax,ROUND + stosb + mov eax,.cg + sar eax,ROUND + stosb + mov eax,.cb + sar eax,ROUND + stosb +end if + mov dword[esi],ebx +;if Ext = NON + jmp .no_skip +;end if + @@: + add edi,3 + .no_skip: + add esi,4 +;if Ext=NON + add ebx,edx +;end if +if Ext=MMX + paddd mm3,mm2 + paddd mm4,mm5 +else + mov eax,.dc_g + add .cg,eax + mov eax,.dc_b + add .cb,eax + mov eax,.dc_r + add .cr,eax +end if + loop .ddraw + + .gl_quit: + mov esp,ebp +ret 26 diff --git a/programs/demos/view3ds/grd_tex.inc b/programs/demos/view3ds/grd_tex.inc index be6c28e299..ffa326d39c 100644 --- a/programs/demos/view3ds/grd_tex.inc +++ b/programs/demos/view3ds/grd_tex.inc @@ -1,1016 +1,1016 @@ - - -CATMULL_SHIFT equ 8 -ROUND equ 8 -;NON=0 -;MMX=1 -;Ext=MMX -;TEX_SIZE=0x3fff -;SIZE_X equ 512 -;SIZE_Y equ 512 -;ROUND = 8 -;TEX_SHIFT equ 6 - -; procedure drawing textured triangle with Gouraud shading -; Z-buffer alghoritm included, Z coord interpolation ---- -; I set the color by this way -- (col1 * col2)/256 ------ -;------------------in - eax - x1 shl 16 + y1 ------------ -;---------------------- ebx - x2 shl 16 + y2 ------------ -;---------------------- ecx - x3 shl 16 + y3 ------------ -;---------------------- esi - pointer to Z-buffer-------- -;---------------------- edx - pointer to texture--------- -;---------------------- Z-buffer filled with dd variables -;---------------------- shifted CATMULL_SHIFT------------ -;---------------------- edi - pointer to screen buffer--- -;---------------------- stack : colors------------------- - - - -tex_plus_grd_triangle: -; parameters : - .tex_y3 equ [ebp+38] ; 36 bytes through stack - .tex_x3 equ [ebp+36] - .tex_y2 equ [ebp+34] - .tex_x2 equ [ebp+32] - .tex_y1 equ [ebp+30] - .tex_x1 equ [ebp+28] - - .z3 equ [ebp+26] - .col3b equ [ebp+24] - .col3g equ [ebp+22] - .col3r equ [ebp+20] - - .z2 equ [ebp+18] - .col2b equ [ebp+16] - .col2g equ [ebp+14] - .col2r equ [ebp+12] - - .z1 equ [ebp+10] - .col1b equ [ebp+8] - .col1g equ [ebp+6] - .col1r equ [ebp+4] - -; local variables: - - .tex_ptr equ dword[ebp-4] - .z_ptr equ dword[ebp-8] - .scr_buff equ dword[ebp-12] - - .x1 equ word[ebp-14] ;dw ? ;equ word[ebp-10] - .y1 equ word[ebp-16] ;dw ? ;equ word[ebp-12] - .x2 equ word[ebp-18] ;dw ? ;equ word[ebp-14] - .y2 equ word[ebp-20] ;dw ? ;equ word[ebp-16] - .x3 equ word[ebp-22] ;dw ? ;equ word[ebp-18] - .y3 equ word[ebp-24] ;dw ? ;equ word[ebp-20] - - .dx12 equ dword[ebp-28] ;dd ? - .tex_dx12 equ dword[ebp-32] ;dd ? - .tex_dy12 equ [ebp-36] ;dd ? - .dz12 equ dword[ebp-40] ;dd ? - .dc12r equ [ebp-44] ;dd ? - .dc12g equ dword[ebp-48] ;dd ? - .dc12b equ [ebp-52] ;dd ? - - .dx23 equ dword[ebp-56] ;dd ? - .tex_dx23 equ dword[ebp-60] ;dd ? - .tex_dy23 equ [ebp-64] ;dd ? - .dz23 equ dword[ebp-68] ;dd ? - .dc23r equ [ebp-72] ;dd ? - .dc23g equ dword[ebp-76] ;dd ? - .dc23b equ [ebp-80] ;dword[ebp-8]dd ? - - .dx13 equ dword[ebp-84] ;dd ? - .tex_dx13 equ dword[ebp-88] ;dd ? - .tex_dy13 equ [ebp-92] ;dd ? - .dz13 equ dword[ebp-96] ;dd ? - .dc13r equ [ebp-100] ;dd ? - .dc13g equ dword[ebp-104] ;dd ? - .dc13b equ [ebp-108] ;dd ? - - .scan_x1 equ dword[ebp-112] ;dd ? - .scan_y1 equ [ebp-116] ;dd ? - .zz1 equ dword[ebp-120] ;dw ? - .cur1r equ [ebp-124] ;dw ? - .cur1g equ dword[ebp-128] ;dw ? - .cur1b equ [ebp-132] ;dw ? - - .scan_x2 equ dword[ebp-136] ;dd ? - .scan_y2 equ [ebp-140] ;dd ? - .zz2 equ dword[ebp-144] ;dw ? - .cur2r equ [ebp-148] ;dw ? - .cur2g equ dword[ebp-152] ;dw ? - .cur2b equ [ebp-156] ;dw ? - - - mov ebp,esp - - ; mov .tex_ptr,edx - ; mov .z_ptr,esi - ; mov .scr_buff,edi - push edx esi edi -; push esi -; push edi - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .loop2_end - - .sort3: - cmp ax,bx - jle .sort1 - xchg eax,ebx -if Ext>=MMX - movq mm0, .col1r ; exchange r, g, b, z - movq mm1, .col2r - movq .col1r ,mm1 - movq .col2r ,mm0 -else - mov edx,dword .col1r ; exchange both r and g - xchg edx,dword .col2r - mov dword .col1r ,edx - - mov edx,dword .col1b ; b and z - xchg edx,dword .col2b - mov dword .col1b ,edx -end if - - mov edx,dword .tex_x1 - xchg edx,dword .tex_x2 - mov dword .tex_x1 ,edx - - .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - -if Ext>=MMX - movq mm0, .col2r ; exchange r, g, b, z - movq mm1, .col3r - movq .col3r ,mm0 - movq .col2r ,mm1 -else - - mov edx,dword .col2r ; r, g - xchg edx,dword .col3r - mov dword .col2r,edx - - mov edx,dword .col2b ; b, z - xchg edx,dword .col3b - mov dword .col2b,edx -end if - - mov edx,dword .tex_x2 - xchg edx,dword .tex_x3 - mov dword .tex_x2,edx - - jmp .sort3 - - .sort2: - - push eax ebx ecx ; store in variables - ; push ebx - ; push ecx - -;****************** delta computng zone ************** -;+++++++++ first zone - mov bx,.y2 ; calc delta12 - sub bx,.y1 - jnz .dx12_make - mov ecx,7 - @@: - push dword 0 - loop @b - jmp .dx12_done - .dx12_make: - - - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx12,eax - push eax - -if 0 ; Ext=SSE - movd mm0,.col1r ; 2 words r, g - pxor mm1,mm1 - punpcklwd mm0,mm1 - cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 - movd mm0,.col1g ; 2 words b, z - punpcklwd mm0,mm1 - cvtpi2ps xmm0,mm0 - ; xmm0=four float double words - divss xmm0,.pack3 - ;convert and insert mm0 to lower xmm1 .. -end if - - mov ax,word .tex_x2 - sub ax,word .tex_x1 - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dx12r,eax - push eax - - mov ax,word .tex_y2 - sub ax,word .tex_y1 - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dx12,eax - push eax - - mov ax,word .z2 - sub ax,word .z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz12,eax - push eax ; .dza12 - - mov ax,word .col2r - sub ax,word .col1r - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc12r,eax - push eax - - mov ax,word .col2g - sub ax,word .col1g - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc12g,eax - push eax - - mov ax,word .col2b ;;--- - sub ax,word .col1b - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .dc12b,eax - push eax - -;+++++++++++++++++ second zone +++++++++++++ - .dx12_done: - - mov bx,.y3 ; calc delta23 - sub bx,.y2 - jnz .dx23_make - mov ecx,7 - @@: - push dword 0 - loop @b - jmp .dx23_done - - .dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx23,eax - push eax - - mov ax,word .tex_x3 - sub ax,word .tex_x2 - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dx23,eax - push eax - - mov ax,word .tex_y3 - sub ax,word .tex_y2 - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dy23,eax - push eax - - mov ax,word .z3 - sub ax,word .z2 - cwde ; - shl eax,CATMULL_SHIFT ; 2222222 - cdq ; 2 2 - idiv ebx ; 2 -; mov .dz23,eax ; 2 - push eax ; .dza12 ; 2 - ; 2 - mov ax,word .col3r ; 2 - sub ax,word .col2r ; 2222222 - cwde ; second delta - shl eax,ROUND ; - cdq ; - idiv ebx ; -; mov .dc23r,eax ; - push eax - - mov ax,word .col3g - sub ax,word .col2g - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .dc23g,eax - push eax - - mov ax,word .col3b ;;--- - sub ax,word .col2b - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .dc23b,eax - push eax - - .dx23_done: -;++++++++++++++++++third zone++++++++++++++++++++++++ - mov bx,.y3 ; calc delta13 - sub bx,.y1 - jnz .dx13_make - mov ecx,7 - @@: - push dword 0 - loop @b - jmp .dx13_done - .dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx -; mov .dx13,eax - push eax - - mov ax,word .tex_x3 ; triangle b - sub ax,word .tex_x1 - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dx13r,eax - push eax - - mov ax,word .tex_y3 - sub ax,word .tex_y1 - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .tex_dy13,eax - push eax - - mov ax,word .z3 - sub ax,word .z1 ; 333333333 - cwde ; 3 3 - shl eax,CATMULL_SHIFT ; 3 - cdq ; 3 - idiv ebx ; 3 -; mov .dz13,eax ; 3 - push eax ; .dza12 ; 3 - ; 3 - mov ax,word .col3r ; 3333333333 - sub ax,word .col1r ; 3 - cwde ; 3 - shl eax,ROUND ; 3 - cdq ; 3 - idiv ebx ; 3 - ; mov .dc13r,eax ; 3 3 - push eax ; 33333333 - - mov ax,word .col3g - sub ax,word .col1g - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dc13g,eax - push eax - - mov ax,word .col3b ;;--- - sub ax,word .col1b - cwde - shl eax,ROUND - cdq - idiv ebx -; mov .dc13b,eax - push eax - - .dx13_done: - -; <<<<<<< ::delta zone end+++++++++++++++++++++ >>>>>>>> - sub esp,55 ;(12*4) - - movsx eax,.x1 ; eax - cur x1 - shl eax,ROUND ; ebx - cur x2 - mov ebx,eax - movsx edx,word .z1 - shl edx,CATMULL_SHIFT - mov .zz1,edx - mov .zz2,edx - - movzx edi,word .col1r - shl edi,ROUND - mov .cur1r,edi - mov .cur2r,edi - movzx esi,word .col1g - shl esi,ROUND - mov .cur1g,esi - mov .cur2g,esi - movzx edx,word .col1b - shl edx,ROUND - mov .cur1b,edx - mov .cur2b,edx - - movzx edi,word .tex_x1 - shl edi,ROUND - mov .scan_x1,edi - mov .scan_x2,edi - movzx edx,word .tex_y1 - shl edx,ROUND - mov .scan_y1,edx - mov .scan_y2,edx - - mov cx,.y1 - cmp cx,.y2 - jge .loop1_end - .loop_1: - ; push eax ebx ebp - pushad - - push .tex_ptr - push .scr_buff - push .z_ptr - push cx - - push .zz2 - - push .scan_x2 - push dword .scan_y2 - push dword .cur2r - push .cur2g - push dword .cur2b - - push .zz1 - - push .scan_x1 - push dword .scan_y1 - push dword .cur1r - push .cur1g - push dword .cur1b - - sar eax,ROUND - sar ebx,ROUND - call horizontal_tex_grd_line - - ; pop ebp ebx eax - popad - -if (Ext = MMX)|(Ext=SSE) - movq mm0,.cur1b - movq mm1,.cur1r - movq mm2,.scan_y1 - movq mm3,.cur2b - movq mm4,.cur2r - movq mm5,.scan_y2 - paddd mm0,.dc13b - paddd mm1,.dc13r - paddd mm2,.tex_dy13 - paddd mm3,.dc12b - paddd mm4,.dc12r - paddd mm5,.tex_dy12 - movq .cur1b,mm0 - movq .cur1r,mm1 - movq .scan_y1,mm2 - movq .cur2b,mm3 - movq .cur2r,mm4 - movq .scan_y2,mm5 -end if -if Ext >= SSE2 - movups xmm0,.cur1b - movups xmm1,.dc13b - movups xmm2,.cur2b - movups xmm3,.dc12b - movq mm2,.scan_y1 - movq mm5,.scan_y2 - paddd xmm0,xmm1 - paddd xmm2,xmm3 - paddd mm2,.tex_dy13 - paddd mm5,.tex_dy12 - movq .scan_y1,mm2 - movq .scan_y2,mm5 - movups .cur1b,xmm0 - movups .cur2b,xmm2 -end if - -if Ext = NON - mov edx,.dc13b - add .cur1b,edx - mov esi,.dc13g - add .cur1g,esi - mov edi,.dc13r - add .cur1r,edi - mov edx,.dz13 - add .zz1,edx - mov edx,.tex_dx13 - add .scan_x1,edx - mov esi,.tex_dy13 - add .scan_y1,esi - - mov edi,.dc12b - add .cur2b,edi - mov esi,.dc12g - add .cur2g,esi - mov edx,.dc12r - add .cur2r,edx - mov edi,.tex_dx12 - add .scan_x2,edi - mov esi,.tex_dy12 - add .scan_y2,esi - mov edx,.dz12 - add .zz2,edx -end if - add eax,.dx13 - add ebx,.dx12 - inc cx - cmp cx,.y2 - jl .loop_1 - .loop1_end: - movzx ecx,.y2 - cmp cx,.y3 - jge .loop2_end - - movsx ebx,.x2 ; eax - cur x1 - shl ebx,ROUND ; ebx - cur x2 - - movsx edx,word .z2 - shl edx,CATMULL_SHIFT -; mov .zz1,edx - mov .zz2,edx - - movzx edi,word .col2r - shl edi,ROUND - ; mov .cur1r,edi - mov .cur2r,edi - movzx esi,word .col2g - shl esi,ROUND - ; mov .cur1g,esi - mov .cur2g,esi - movzx edx,word .col2b - shl edx,ROUND - ; mov .cur1b,edx - mov .cur2b,edx - - movzx edi,word .tex_x2 - shl edi,ROUND - ; mov .scan_x1,edi - mov .scan_x2,edi - movzx edx,word .tex_y2 - shl edx,ROUND - ; mov .scan_y1,edx - mov .scan_y2,edx - - .loop_2: - pushad - - push .tex_ptr - push .scr_buff - push .z_ptr - push cx - - push .zz2 - - push .scan_x2 - push dword .scan_y2 - push dword .cur2r - push .cur2g - push dword .cur2b - - push .zz1 - - push .scan_x1 - push dword .scan_y1 - push dword .cur1r - push .cur1g - push dword .cur1b - - sar eax,ROUND - sar ebx,ROUND - call horizontal_tex_grd_line - - popad - -if (Ext = MMX)|(Ext=SSE) - movq mm0,.cur1b - movq mm1,.cur1r - movq mm2,.scan_y1 - movq mm3,.cur2b - movq mm4,.cur2r - movq mm5,.scan_y2 - paddd mm0,.dc13b - paddd mm1,.dc13r - paddd mm2,.tex_dy13 - paddd mm3,.dc23b - paddd mm4,.dc23r - paddd mm5,.tex_dy23 - movq .cur1b,mm0 - movq .cur1r,mm1 - movq .scan_y1,mm2 - movq .cur2b,mm3 - movq .cur2r,mm4 - movq .scan_y2,mm5 -end if -if Ext >= SSE2 - movups xmm0,.cur1b - movups xmm1,.dc13b - movups xmm2,.cur2b - movups xmm3,.dc23b - movq mm2,.scan_y1 - movq mm5,.scan_y2 - paddd xmm0,xmm1 - paddd xmm2,xmm3 - paddd mm2,.tex_dy13 - paddd mm5,.tex_dy23 - movq .scan_y1,mm2 - movq .scan_y2,mm5 - movups .cur1b,xmm0 - movups .cur2b,xmm2 -end if -if Ext = NON - mov edx,.dc13b - add .cur1b,edx - mov esi,.dc13g - add .cur1g,esi - mov edi,.dc13r - add .cur1r,edi - mov edx,.tex_dx13 - add .scan_x1,edx - mov esi,.tex_dy13 - add .scan_y1,esi - mov edx,.dz13 - add .zz1,edx - - mov edi,.dc23b - add .cur2b,edi - mov esi,.dc23g - add .cur2g,esi - mov edx,.dc23r - add .cur2r,edx - mov edi,.tex_dx23 - add .scan_x2,edi - mov esi,.tex_dy23 - add .scan_y2,esi - mov edx,.dz23 - add .zz2,edx -end if - add eax,.dx13 - add ebx,.dx23 - inc cx - cmp cx,.y3 - jl .loop_2 - -.loop2_end: - mov esp,ebp -ret 36 -horizontal_tex_grd_line: -;in: -; eax : x1, ebx : x2 - -.tex_ptr equ [ebp+62] -.screen equ [ebp+58] -.z_buffer equ [ebp+54] -.y equ [ebp+52] - -.z2 equ [ebp+48] -.tex_x2 equ [ebp+44] -.tex_y2 equ [ebp+40] -.r2 equ [ebp+36] -.g2 equ [ebp+32] -.b2 equ [ebp+28] - -.z1 equ [ebp+24] -.tex_x1 equ [ebp+20] -.tex_y1 equ [ebp+16] -.r1 equ [ebp+12] -.g1 equ [ebp+8] -.b1 equ [ebp+4] - -.x1 equ word[ebp-2] -.x2 equ word[ebp-4] -.dz equ dword[ebp-8] -.db equ dword[ebp-12] -.dg equ dword[ebp-16] -.dr equ dword[ebp-20] -.dtex_x equ dword[ebp-24] -.dtex_y equ dword[ebp-28] - -.c_ty equ [ebp-32] -.c_tx equ [ebp-36] -.cb equ [ebp-40] -.cg equ [ebp-44] -.cr equ [ebp-48] -.t_col equ [ebp-52] - -.dtex_yM equ qword[ebp-28] -.drM equ qword[ebp-20] -.dbM equ qword[ebp-12] - - mov ebp,esp - ; sub esp,30 - - mov cx,word .y - or cx,cx - jl .quit_l - - cmp cx,word[size_y_var] ;SIZE_Y - jge .quit_l - - cmp ax,bx - je .quit_l - jl @f - - xchg eax,ebx - -if Ext=NON - mov ecx,dword .r1 - xchg ecx, .r2 - mov dword .r1, ecx - - mov ecx,dword .g1 - xchg ecx, .g2 - mov dword .g1, ecx - - mov ecx,dword .b1 - xchg ecx, .b2 - mov dword .b1, ecx - - mov ecx,dword .tex_x1 - xchg ecx, .tex_x2 - mov dword .tex_x1, ecx - - mov ecx,dword .tex_y1 - xchg ecx, .tex_y2 - mov dword .tex_y1, ecx - - mov ecx,dword .z1 - xchg ecx, .z2 - mov dword .z1, ecx -end if -if (Ext=MMX) - movq mm0,.b1 ; b, g - movq mm1,.b2 - movq .b1, mm1 - movq .b2, mm0 - movq mm2,.r1 ; r, y - movq mm3,.r2 - movq .r1,mm3 - movq .r2,mm2 - movq mm4,.tex_x1 ; x, z - movq mm5,.tex_x2 - movq .tex_x1,mm5 - movq .tex_x2,mm4 - -end if -if Ext>=SSE - movups xmm0,.b1 - movups xmm1,.b2 - movups .b1,xmm1 - movups .b2,xmm0 - movq mm4,.tex_x1 ; x, z - movq mm5,.tex_x2 - movq .tex_x1,mm5 - movq .tex_x2,mm4 -end if - - @@: - or bx,bx - jle .quit_l - cmp ax,word[size_x_var] ;SIZE_X - jge .quit_l - - push ax - push bx - - mov eax,.z2 ; delta zone************ - sub eax,.z1 - cdq - mov bx,.x2 - sub bx,.x1 - movsx ebx,bx - idiv ebx - push eax ; .dz - - mov eax,.b2 - sub eax,.b1 - cdq - idiv ebx - push eax ; .db - - mov eax,.g2 - sub eax,.g1 - cdq - idiv ebx - push eax ; .dg - - mov eax,.r2 - sub eax,.r1 - cdq - idiv ebx - push eax ; .dr - - mov eax,.tex_x2 - sub eax,.tex_x1 - cdq - idiv ebx - push eax ; .dtex_x - - mov eax,.tex_y2 - sub eax,.tex_y1 - cdq - idiv ebx - push eax ; .dtey_x - - cmp .x1,0 - jg @f - - mov eax,.dz ; clipping - movsx ebx,.x1 - neg ebx - imul ebx - add .z1,eax - mov .x1,0 - - mov eax,.dr - imul ebx - add .r1,eax -;if Ext=NON - mov eax,.dg - imul ebx - add .g1,eax - - mov eax,.db - imul ebx - add .b1,eax - - mov eax,.dtex_x - imul ebx - add .tex_x1,eax - - mov eax,.dtex_y - imul ebx - add .tex_y1,eax - @@: - movsx edx,word[size_x_var] ;SIZE_X - cmp .x2,dx - jl @f - mov .x2,dx - @@: -; calc line addres begin in screen and Z buffer - movsx eax,word .y - mul edx - movsx edx,.x1 - add eax,edx - - mov esi,eax - shl esi,2 - add esi,.z_buffer - - lea eax,[eax*3] - mov edi,.screen - add edi,eax - - mov cx,.x2 - sub cx,.x1 - movzx ecx,cx - -; init current variables - push dword .tex_y1 -;if Ext=NON - push dword .tex_x1 - - push dword .b1 - push dword .g1 - push dword .r1 - -if Ext>=MMX - movq mm4,.cr ; lo -> r,g - movq mm6,.cb ; hi -> b, tex_x - pxor mm0,mm0 -end if - mov ebx,.z1 - .ddraw: - cmp ebx,dword[esi] - jge @f - mov eax,.c_ty -; if ROUNDTEX_SHIFT -; shr eax,ROUND-TEX_SHIFT -; end if - shr eax,ROUND - shl Eax,TEX_SHIFT - mov edx,.c_tx ; calc texture pixel mem addres - shr edx,ROUND - add eax,edx - and eax,TEXTURE_SIZE ; cutting - lea eax,[3*eax] - add eax,.tex_ptr - mov dword[esi],ebx -if Ext = NON - mov eax,dword[eax] - ; mov .tex_col,eax - push ax - shl eax,8 - pop ax - mov edx,.cr - sar edx,ROUND - mul dl ; al*dl - shr ax,8 - stosb - ror eax,16 - push ax - mov edx,.cg - sar edx,ROUND - mul dl - shr ax,8 - stosb - pop ax - shr ax,8 - mov edx,.cb - sar edx,ROUND - mul dl - shr ax,8 - stosb - jmp .no_skip -else - movd mm1,[eax] - punpcklbw mm1,mm0 - movq mm3,mm4 ;.cr ; lo -> r,g - movq mm5,mm6 ;.cb ; lo -> b,tex_x - psrld mm3,ROUND ; - psrld mm5,ROUND ; - packssdw mm3,mm5 - pmullw mm1,mm3 - psrlw mm1,8 - packuswb mm1,mm0 - movd [edi],mm1 -end if - mov dword[esi],ebx -if Ext = NON - jmp .no_skip -end if - @@: - add edi,3 - .no_skip: - add esi,4 - add ebx,.dz - - mov eax,.dtex_x - add .c_tx, eax - mov edx,.dtex_y - add .c_ty, edx -if Ext=NON - mov eax,.dr - add .cr,eax - mov edx,.dg - add .cg,edx - mov eax,.db - add .cb,eax - -else - paddd mm4,.drM - paddd mm6,.dbM -;; paddd mm7,.dtex_y ; mm4 - b, g -;; movq .c_tx,mm7 - ; mm6 - r, x -end if ; mm7 - y, x - - dec ecx - jnz .ddraw - - .quit_l: - - mov esp,ebp -ret 42+20 ; horizontal line - + + +CATMULL_SHIFT equ 8 +ROUND equ 8 +;NON=0 +;MMX=1 +;Ext=MMX +;TEX_SIZE=0x3fff +;SIZE_X equ 512 +;SIZE_Y equ 512 +;ROUND = 8 +;TEX_SHIFT equ 6 + +; procedure drawing textured triangle with Gouraud shading +; Z-buffer alghoritm included, Z coord interpolation ---- +; I set the color by this way -- (col1 * col2)/256 ------ +;------------------in - eax - x1 shl 16 + y1 ------------ +;---------------------- ebx - x2 shl 16 + y2 ------------ +;---------------------- ecx - x3 shl 16 + y3 ------------ +;---------------------- esi - pointer to Z-buffer-------- +;---------------------- edx - pointer to texture--------- +;---------------------- Z-buffer filled with dd variables +;---------------------- shifted CATMULL_SHIFT------------ +;---------------------- edi - pointer to screen buffer--- +;---------------------- stack : colors------------------- + + + +tex_plus_grd_triangle: +; parameters : + .tex_y3 equ [ebp+38] ; 36 bytes through stack + .tex_x3 equ [ebp+36] + .tex_y2 equ [ebp+34] + .tex_x2 equ [ebp+32] + .tex_y1 equ [ebp+30] + .tex_x1 equ [ebp+28] + + .z3 equ [ebp+26] + .col3b equ [ebp+24] + .col3g equ [ebp+22] + .col3r equ [ebp+20] + + .z2 equ [ebp+18] + .col2b equ [ebp+16] + .col2g equ [ebp+14] + .col2r equ [ebp+12] + + .z1 equ [ebp+10] + .col1b equ [ebp+8] + .col1g equ [ebp+6] + .col1r equ [ebp+4] + +; local variables: + + .tex_ptr equ dword[ebp-4] + .z_ptr equ dword[ebp-8] + .scr_buff equ dword[ebp-12] + + .x1 equ word[ebp-14] ;dw ? ;equ word[ebp-10] + .y1 equ word[ebp-16] ;dw ? ;equ word[ebp-12] + .x2 equ word[ebp-18] ;dw ? ;equ word[ebp-14] + .y2 equ word[ebp-20] ;dw ? ;equ word[ebp-16] + .x3 equ word[ebp-22] ;dw ? ;equ word[ebp-18] + .y3 equ word[ebp-24] ;dw ? ;equ word[ebp-20] + + .dx12 equ dword[ebp-28] ;dd ? + .tex_dx12 equ dword[ebp-32] ;dd ? + .tex_dy12 equ [ebp-36] ;dd ? + .dz12 equ dword[ebp-40] ;dd ? + .dc12r equ [ebp-44] ;dd ? + .dc12g equ dword[ebp-48] ;dd ? + .dc12b equ [ebp-52] ;dd ? + + .dx23 equ dword[ebp-56] ;dd ? + .tex_dx23 equ dword[ebp-60] ;dd ? + .tex_dy23 equ [ebp-64] ;dd ? + .dz23 equ dword[ebp-68] ;dd ? + .dc23r equ [ebp-72] ;dd ? + .dc23g equ dword[ebp-76] ;dd ? + .dc23b equ [ebp-80] ;dword[ebp-8]dd ? + + .dx13 equ dword[ebp-84] ;dd ? + .tex_dx13 equ dword[ebp-88] ;dd ? + .tex_dy13 equ [ebp-92] ;dd ? + .dz13 equ dword[ebp-96] ;dd ? + .dc13r equ [ebp-100] ;dd ? + .dc13g equ dword[ebp-104] ;dd ? + .dc13b equ [ebp-108] ;dd ? + + .scan_x1 equ dword[ebp-112] ;dd ? + .scan_y1 equ [ebp-116] ;dd ? + .zz1 equ dword[ebp-120] ;dw ? + .cur1r equ [ebp-124] ;dw ? + .cur1g equ dword[ebp-128] ;dw ? + .cur1b equ [ebp-132] ;dw ? + + .scan_x2 equ dword[ebp-136] ;dd ? + .scan_y2 equ [ebp-140] ;dd ? + .zz2 equ dword[ebp-144] ;dw ? + .cur2r equ [ebp-148] ;dw ? + .cur2g equ dword[ebp-152] ;dw ? + .cur2b equ [ebp-156] ;dw ? + + + mov ebp,esp + + ; mov .tex_ptr,edx + ; mov .z_ptr,esi + ; mov .scr_buff,edi + push edx esi edi +; push esi +; push edi + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .loop2_end + + .sort3: + cmp ax,bx + jle .sort1 + xchg eax,ebx +if Ext>=MMX + movq mm0, .col1r ; exchange r, g, b, z + movq mm1, .col2r + movq .col1r ,mm1 + movq .col2r ,mm0 +else + mov edx,dword .col1r ; exchange both r and g + xchg edx,dword .col2r + mov dword .col1r ,edx + + mov edx,dword .col1b ; b and z + xchg edx,dword .col2b + mov dword .col1b ,edx +end if + + mov edx,dword .tex_x1 + xchg edx,dword .tex_x2 + mov dword .tex_x1 ,edx + + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + +if Ext>=MMX + movq mm0, .col2r ; exchange r, g, b, z + movq mm1, .col3r + movq .col3r ,mm0 + movq .col2r ,mm1 +else + + mov edx,dword .col2r ; r, g + xchg edx,dword .col3r + mov dword .col2r,edx + + mov edx,dword .col2b ; b, z + xchg edx,dword .col3b + mov dword .col2b,edx +end if + + mov edx,dword .tex_x2 + xchg edx,dword .tex_x3 + mov dword .tex_x2,edx + + jmp .sort3 + + .sort2: + + push eax ebx ecx ; store in variables + ; push ebx + ; push ecx + +;****************** delta computng zone ************** +;+++++++++ first zone + mov bx,.y2 ; calc delta12 + sub bx,.y1 + jnz .dx12_make + mov ecx,7 + @@: + push dword 0 + loop @b + jmp .dx12_done + .dx12_make: + + + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx12,eax + push eax + +if 0 ; Ext=SSE + movd mm0,.col1r ; 2 words r, g + pxor mm1,mm1 + punpcklwd mm0,mm1 + cvtpi2ps xmm0,mm0 + movlhps xmm0,xmm0 + movd mm0,.col1g ; 2 words b, z + punpcklwd mm0,mm1 + cvtpi2ps xmm0,mm0 + ; xmm0=four float double words + divss xmm0,.pack3 + ;convert and insert mm0 to lower xmm1 .. +end if + + mov ax,word .tex_x2 + sub ax,word .tex_x1 + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .tex_dx12r,eax + push eax + + mov ax,word .tex_y2 + sub ax,word .tex_y1 + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .tex_dx12,eax + push eax + + mov ax,word .z2 + sub ax,word .z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + ; mov .dz12,eax + push eax ; .dza12 + + mov ax,word .col2r + sub ax,word .col1r + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc12r,eax + push eax + + mov ax,word .col2g + sub ax,word .col1g + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc12g,eax + push eax + + mov ax,word .col2b ;;--- + sub ax,word .col1b + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .dc12b,eax + push eax + +;+++++++++++++++++ second zone +++++++++++++ + .dx12_done: + + mov bx,.y3 ; calc delta23 + sub bx,.y2 + jnz .dx23_make + mov ecx,7 + @@: + push dword 0 + loop @b + jmp .dx23_done + + .dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx23,eax + push eax + + mov ax,word .tex_x3 + sub ax,word .tex_x2 + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .tex_dx23,eax + push eax + + mov ax,word .tex_y3 + sub ax,word .tex_y2 + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .tex_dy23,eax + push eax + + mov ax,word .z3 + sub ax,word .z2 + cwde ; + shl eax,CATMULL_SHIFT ; 2222222 + cdq ; 2 2 + idiv ebx ; 2 +; mov .dz23,eax ; 2 + push eax ; .dza12 ; 2 + ; 2 + mov ax,word .col3r ; 2 + sub ax,word .col2r ; 2222222 + cwde ; second delta + shl eax,ROUND ; + cdq ; + idiv ebx ; +; mov .dc23r,eax ; + push eax + + mov ax,word .col3g + sub ax,word .col2g + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .dc23g,eax + push eax + + mov ax,word .col3b ;;--- + sub ax,word .col2b + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .dc23b,eax + push eax + + .dx23_done: +;++++++++++++++++++third zone++++++++++++++++++++++++ + mov bx,.y3 ; calc delta13 + sub bx,.y1 + jnz .dx13_make + mov ecx,7 + @@: + push dword 0 + loop @b + jmp .dx13_done + .dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx +; mov .dx13,eax + push eax + + mov ax,word .tex_x3 ; triangle b + sub ax,word .tex_x1 + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .tex_dx13r,eax + push eax + + mov ax,word .tex_y3 + sub ax,word .tex_y1 + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .tex_dy13,eax + push eax + + mov ax,word .z3 + sub ax,word .z1 ; 333333333 + cwde ; 3 3 + shl eax,CATMULL_SHIFT ; 3 + cdq ; 3 + idiv ebx ; 3 +; mov .dz13,eax ; 3 + push eax ; .dza12 ; 3 + ; 3 + mov ax,word .col3r ; 3333333333 + sub ax,word .col1r ; 3 + cwde ; 3 + shl eax,ROUND ; 3 + cdq ; 3 + idiv ebx ; 3 + ; mov .dc13r,eax ; 3 3 + push eax ; 33333333 + + mov ax,word .col3g + sub ax,word .col1g + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dc13g,eax + push eax + + mov ax,word .col3b ;;--- + sub ax,word .col1b + cwde + shl eax,ROUND + cdq + idiv ebx +; mov .dc13b,eax + push eax + + .dx13_done: + +; <<<<<<< ::delta zone end+++++++++++++++++++++ >>>>>>>> + sub esp,55 ;(12*4) + + movsx eax,.x1 ; eax - cur x1 + shl eax,ROUND ; ebx - cur x2 + mov ebx,eax + movsx edx,word .z1 + shl edx,CATMULL_SHIFT + mov .zz1,edx + mov .zz2,edx + + movzx edi,word .col1r + shl edi,ROUND + mov .cur1r,edi + mov .cur2r,edi + movzx esi,word .col1g + shl esi,ROUND + mov .cur1g,esi + mov .cur2g,esi + movzx edx,word .col1b + shl edx,ROUND + mov .cur1b,edx + mov .cur2b,edx + + movzx edi,word .tex_x1 + shl edi,ROUND + mov .scan_x1,edi + mov .scan_x2,edi + movzx edx,word .tex_y1 + shl edx,ROUND + mov .scan_y1,edx + mov .scan_y2,edx + + mov cx,.y1 + cmp cx,.y2 + jge .loop1_end + .loop_1: + ; push eax ebx ebp + pushad + + push .tex_ptr + push .scr_buff + push .z_ptr + push cx + + push .zz2 + + push .scan_x2 + push dword .scan_y2 + push dword .cur2r + push .cur2g + push dword .cur2b + + push .zz1 + + push .scan_x1 + push dword .scan_y1 + push dword .cur1r + push .cur1g + push dword .cur1b + + sar eax,ROUND + sar ebx,ROUND + call horizontal_tex_grd_line + + ; pop ebp ebx eax + popad + +if (Ext = MMX)|(Ext=SSE) + movq mm0,.cur1b + movq mm1,.cur1r + movq mm2,.scan_y1 + movq mm3,.cur2b + movq mm4,.cur2r + movq mm5,.scan_y2 + paddd mm0,.dc13b + paddd mm1,.dc13r + paddd mm2,.tex_dy13 + paddd mm3,.dc12b + paddd mm4,.dc12r + paddd mm5,.tex_dy12 + movq .cur1b,mm0 + movq .cur1r,mm1 + movq .scan_y1,mm2 + movq .cur2b,mm3 + movq .cur2r,mm4 + movq .scan_y2,mm5 +end if +if Ext >= SSE2 + movups xmm0,.cur1b + movups xmm1,.dc13b + movups xmm2,.cur2b + movups xmm3,.dc12b + movq mm2,.scan_y1 + movq mm5,.scan_y2 + paddd xmm0,xmm1 + paddd xmm2,xmm3 + paddd mm2,.tex_dy13 + paddd mm5,.tex_dy12 + movq .scan_y1,mm2 + movq .scan_y2,mm5 + movups .cur1b,xmm0 + movups .cur2b,xmm2 +end if + +if Ext = NON + mov edx,.dc13b + add .cur1b,edx + mov esi,.dc13g + add .cur1g,esi + mov edi,.dc13r + add .cur1r,edi + mov edx,.dz13 + add .zz1,edx + mov edx,.tex_dx13 + add .scan_x1,edx + mov esi,.tex_dy13 + add .scan_y1,esi + + mov edi,.dc12b + add .cur2b,edi + mov esi,.dc12g + add .cur2g,esi + mov edx,.dc12r + add .cur2r,edx + mov edi,.tex_dx12 + add .scan_x2,edi + mov esi,.tex_dy12 + add .scan_y2,esi + mov edx,.dz12 + add .zz2,edx +end if + add eax,.dx13 + add ebx,.dx12 + inc cx + cmp cx,.y2 + jl .loop_1 + .loop1_end: + movzx ecx,.y2 + cmp cx,.y3 + jge .loop2_end + + movsx ebx,.x2 ; eax - cur x1 + shl ebx,ROUND ; ebx - cur x2 + + movsx edx,word .z2 + shl edx,CATMULL_SHIFT +; mov .zz1,edx + mov .zz2,edx + + movzx edi,word .col2r + shl edi,ROUND + ; mov .cur1r,edi + mov .cur2r,edi + movzx esi,word .col2g + shl esi,ROUND + ; mov .cur1g,esi + mov .cur2g,esi + movzx edx,word .col2b + shl edx,ROUND + ; mov .cur1b,edx + mov .cur2b,edx + + movzx edi,word .tex_x2 + shl edi,ROUND + ; mov .scan_x1,edi + mov .scan_x2,edi + movzx edx,word .tex_y2 + shl edx,ROUND + ; mov .scan_y1,edx + mov .scan_y2,edx + + .loop_2: + pushad + + push .tex_ptr + push .scr_buff + push .z_ptr + push cx + + push .zz2 + + push .scan_x2 + push dword .scan_y2 + push dword .cur2r + push .cur2g + push dword .cur2b + + push .zz1 + + push .scan_x1 + push dword .scan_y1 + push dword .cur1r + push .cur1g + push dword .cur1b + + sar eax,ROUND + sar ebx,ROUND + call horizontal_tex_grd_line + + popad + +if (Ext = MMX)|(Ext=SSE) + movq mm0,.cur1b + movq mm1,.cur1r + movq mm2,.scan_y1 + movq mm3,.cur2b + movq mm4,.cur2r + movq mm5,.scan_y2 + paddd mm0,.dc13b + paddd mm1,.dc13r + paddd mm2,.tex_dy13 + paddd mm3,.dc23b + paddd mm4,.dc23r + paddd mm5,.tex_dy23 + movq .cur1b,mm0 + movq .cur1r,mm1 + movq .scan_y1,mm2 + movq .cur2b,mm3 + movq .cur2r,mm4 + movq .scan_y2,mm5 +end if +if Ext >= SSE2 + movups xmm0,.cur1b + movups xmm1,.dc13b + movups xmm2,.cur2b + movups xmm3,.dc23b + movq mm2,.scan_y1 + movq mm5,.scan_y2 + paddd xmm0,xmm1 + paddd xmm2,xmm3 + paddd mm2,.tex_dy13 + paddd mm5,.tex_dy23 + movq .scan_y1,mm2 + movq .scan_y2,mm5 + movups .cur1b,xmm0 + movups .cur2b,xmm2 +end if +if Ext = NON + mov edx,.dc13b + add .cur1b,edx + mov esi,.dc13g + add .cur1g,esi + mov edi,.dc13r + add .cur1r,edi + mov edx,.tex_dx13 + add .scan_x1,edx + mov esi,.tex_dy13 + add .scan_y1,esi + mov edx,.dz13 + add .zz1,edx + + mov edi,.dc23b + add .cur2b,edi + mov esi,.dc23g + add .cur2g,esi + mov edx,.dc23r + add .cur2r,edx + mov edi,.tex_dx23 + add .scan_x2,edi + mov esi,.tex_dy23 + add .scan_y2,esi + mov edx,.dz23 + add .zz2,edx +end if + add eax,.dx13 + add ebx,.dx23 + inc cx + cmp cx,.y3 + jl .loop_2 + +.loop2_end: + mov esp,ebp +ret 36 +horizontal_tex_grd_line: +;in: +; eax : x1, ebx : x2 + +.tex_ptr equ [ebp+62] +.screen equ [ebp+58] +.z_buffer equ [ebp+54] +.y equ [ebp+52] + +.z2 equ [ebp+48] +.tex_x2 equ [ebp+44] +.tex_y2 equ [ebp+40] +.r2 equ [ebp+36] +.g2 equ [ebp+32] +.b2 equ [ebp+28] + +.z1 equ [ebp+24] +.tex_x1 equ [ebp+20] +.tex_y1 equ [ebp+16] +.r1 equ [ebp+12] +.g1 equ [ebp+8] +.b1 equ [ebp+4] + +.x1 equ word[ebp-2] +.x2 equ word[ebp-4] +.dz equ dword[ebp-8] +.db equ dword[ebp-12] +.dg equ dword[ebp-16] +.dr equ dword[ebp-20] +.dtex_x equ dword[ebp-24] +.dtex_y equ dword[ebp-28] + +.c_ty equ [ebp-32] +.c_tx equ [ebp-36] +.cb equ [ebp-40] +.cg equ [ebp-44] +.cr equ [ebp-48] +.t_col equ [ebp-52] + +.dtex_yM equ qword[ebp-28] +.drM equ qword[ebp-20] +.dbM equ qword[ebp-12] + + mov ebp,esp + ; sub esp,30 + + mov cx,word .y + or cx,cx + jl .quit_l + + cmp cx,word[size_y_var] ;SIZE_Y + jge .quit_l + + cmp ax,bx + je .quit_l + jl @f + + xchg eax,ebx + +if Ext=NON + mov ecx,dword .r1 + xchg ecx, .r2 + mov dword .r1, ecx + + mov ecx,dword .g1 + xchg ecx, .g2 + mov dword .g1, ecx + + mov ecx,dword .b1 + xchg ecx, .b2 + mov dword .b1, ecx + + mov ecx,dword .tex_x1 + xchg ecx, .tex_x2 + mov dword .tex_x1, ecx + + mov ecx,dword .tex_y1 + xchg ecx, .tex_y2 + mov dword .tex_y1, ecx + + mov ecx,dword .z1 + xchg ecx, .z2 + mov dword .z1, ecx +end if +if (Ext=MMX) + movq mm0,.b1 ; b, g + movq mm1,.b2 + movq .b1, mm1 + movq .b2, mm0 + movq mm2,.r1 ; r, y + movq mm3,.r2 + movq .r1,mm3 + movq .r2,mm2 + movq mm4,.tex_x1 ; x, z + movq mm5,.tex_x2 + movq .tex_x1,mm5 + movq .tex_x2,mm4 + +end if +if Ext>=SSE + movups xmm0,.b1 + movups xmm1,.b2 + movups .b1,xmm1 + movups .b2,xmm0 + movq mm4,.tex_x1 ; x, z + movq mm5,.tex_x2 + movq .tex_x1,mm5 + movq .tex_x2,mm4 +end if + + @@: + or bx,bx + jle .quit_l + cmp ax,word[size_x_var] ;SIZE_X + jge .quit_l + + push ax + push bx + + mov eax,.z2 ; delta zone************ + sub eax,.z1 + cdq + mov bx,.x2 + sub bx,.x1 + movsx ebx,bx + idiv ebx + push eax ; .dz + + mov eax,.b2 + sub eax,.b1 + cdq + idiv ebx + push eax ; .db + + mov eax,.g2 + sub eax,.g1 + cdq + idiv ebx + push eax ; .dg + + mov eax,.r2 + sub eax,.r1 + cdq + idiv ebx + push eax ; .dr + + mov eax,.tex_x2 + sub eax,.tex_x1 + cdq + idiv ebx + push eax ; .dtex_x + + mov eax,.tex_y2 + sub eax,.tex_y1 + cdq + idiv ebx + push eax ; .dtey_x + + cmp .x1,0 + jg @f + + mov eax,.dz ; clipping + movsx ebx,.x1 + neg ebx + imul ebx + add .z1,eax + mov .x1,0 + + mov eax,.dr + imul ebx + add .r1,eax +;if Ext=NON + mov eax,.dg + imul ebx + add .g1,eax + + mov eax,.db + imul ebx + add .b1,eax + + mov eax,.dtex_x + imul ebx + add .tex_x1,eax + + mov eax,.dtex_y + imul ebx + add .tex_y1,eax + @@: + movsx edx,word[size_x_var] ;SIZE_X + cmp .x2,dx + jl @f + mov .x2,dx + @@: +; calc line addres begin in screen and Z buffer + movsx eax,word .y + mul edx + movsx edx,.x1 + add eax,edx + + mov esi,eax + shl esi,2 + add esi,.z_buffer + + lea eax,[eax*3] + mov edi,.screen + add edi,eax + + mov cx,.x2 + sub cx,.x1 + movzx ecx,cx + +; init current variables + push dword .tex_y1 +;if Ext=NON + push dword .tex_x1 + + push dword .b1 + push dword .g1 + push dword .r1 + +if Ext>=MMX + movq mm4,.cr ; lo -> r,g + movq mm6,.cb ; hi -> b, tex_x + pxor mm0,mm0 +end if + mov ebx,.z1 + .ddraw: + cmp ebx,dword[esi] + jge @f + mov eax,.c_ty +; if ROUNDTEX_SHIFT +; shr eax,ROUND-TEX_SHIFT +; end if + shr eax,ROUND + shl Eax,TEX_SHIFT + mov edx,.c_tx ; calc texture pixel mem addres + shr edx,ROUND + add eax,edx + and eax,TEXTURE_SIZE ; cutting + lea eax,[3*eax] + add eax,.tex_ptr + mov dword[esi],ebx +if Ext = NON + mov eax,dword[eax] + ; mov .tex_col,eax + push ax + shl eax,8 + pop ax + mov edx,.cr + sar edx,ROUND + mul dl ; al*dl + shr ax,8 + stosb + ror eax,16 + push ax + mov edx,.cg + sar edx,ROUND + mul dl + shr ax,8 + stosb + pop ax + shr ax,8 + mov edx,.cb + sar edx,ROUND + mul dl + shr ax,8 + stosb + jmp .no_skip +else + movd mm1,[eax] + punpcklbw mm1,mm0 + movq mm3,mm4 ;.cr ; lo -> r,g + movq mm5,mm6 ;.cb ; lo -> b,tex_x + psrld mm3,ROUND ; + psrld mm5,ROUND ; + packssdw mm3,mm5 + pmullw mm1,mm3 + psrlw mm1,8 + packuswb mm1,mm0 + movd [edi],mm1 +end if + mov dword[esi],ebx +if Ext = NON + jmp .no_skip +end if + @@: + add edi,3 + .no_skip: + add esi,4 + add ebx,.dz + + mov eax,.dtex_x + add .c_tx, eax + mov edx,.dtex_y + add .c_ty, edx +if Ext=NON + mov eax,.dr + add .cr,eax + mov edx,.dg + add .cg,edx + mov eax,.db + add .cb,eax + +else + paddd mm4,.drM + paddd mm6,.dbM +;; paddd mm7,.dtex_y ; mm4 - b, g +;; movq .c_tx,mm7 + ; mm6 - r, x +end if ; mm7 - y, x + + dec ecx + jnz .ddraw + + .quit_l: + + mov esp,ebp +ret 42+20 ; horizontal line + diff --git a/programs/demos/view3ds/history.txt b/programs/demos/view3ds/history.txt index a264ebbe7f..8fe503636c 100644 --- a/programs/demos/view3ds/history.txt +++ b/programs/demos/view3ds/history.txt @@ -1,11 +1,16 @@ +View3ds 0.073 - may 2021 +1. I introduced procedure for searching nonredundand edges. +2. Writing some info about object: vertices, triangles unique edges + count. +----------------------------------------------------------------------------------- View3ds 0.072 - march 2021 1. New displaying model - texturing with bilinear filtering and transparency - simultanusly. Note that filtering is done only inside polygon. To better + simultanusly. Note that filtering is done only inside polygon. To better quality of image there is a need to use floats coordinates of texture to pass as arguments to single triangle rendering proc. 2. Optimizations. -3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and +3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and transparented texturing with filtering rendering models are disabled. ----------------------------------------------------------------------------------- diff --git a/programs/demos/view3ds/readme.txt b/programs/demos/view3ds/readme.txt index b4279a8f63..07d9339f2f 100644 --- a/programs/demos/view3ds/readme.txt +++ b/programs/demos/view3ds/readme.txt @@ -1,20 +1,16 @@ -View3ds 0.073 - tiny viewer to .3ds and .asc files with several graphics +View3ds 0.074 - tiny viewer to .3ds and .asc files with several graphics effects implementation. What's new? -1. I introduced procedure for searching nonredundand edges. -2. Writing some info about object: vertices, triangles unique edges - count. +1. Fixed emboss bug in grd lines displaying model. +2. Grd line exceedes screen problem fix. +3. New rendering model - ray casted shadows and appropiate button to + set 'on' this option. Note that is non real time model, especially when + complex object is computed. I took effort to introduce accelerating + structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled + for now - seems to work incorrect(slow). -1. New displaying model - texturing with bilinear filtering and transparency - simultanusly. Note that filtering is done only inside polygon. To better - quality of image there is a need to use floats coordinates of texture to pass - as arguments to single triangle rendering proc. -2. Optimizations. -3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and - transparented texturing with filtering rendering models are disabled. - Buttons description: 1. rotary: choosing rotary axle: x, y, x+y. 2. shd. model: choosing shading model: flat, grd (smooth), env (spherical @@ -26,28 +22,28 @@ Buttons description: ptex (real Phong + texturing + transparency). 3. speed: idle, full. 4,5. zoom in, out: no comment. -6. catmull: disabled +6. ray shadow: calc ray casted shadows. 7. culling: backface culling on/ off. 8. rand. light: Randomize 3 unlinear lights( so called Phong's illumination). 9. Blur: blur N times; N=0,1,2,3,4,5 10.11,12,13. loseless operations (rotary 90, 180 degrees). -12. emboss: Do emboss effect( flat bumps ), use 'bumps deep' button to do edges +12. emboss: Do emboss effect( flat bumps ), use 'bumps deep' button to do edges more deep. 13. fire: do motion blur ( looks like fire ). -14. move: changes meaning x,y,z +/- buttons -> obj: moving object, camr: moving +14. move: changes meaning x,y,z +/- buttons -> obj: moving object, camr: moving camera, wave: x,y +/- increase, decrease wave effect frequency and amplitude. 15. generate: Generates some objects: node, Thorn Crown, heart... 16. bumps: random, according to texture. 17. bumps deep -> create bumps deeper or lighter. -18. re-map tex -> re-map texture and bump map coordinates, to change spherical +18. re-map tex -> re-map texture and bump map coordinates, to change spherical mapping around axle use 'xchg' and 'mirror' buttons, then press 're-map tex' button. 19. bright + -> increase picture brightness. 20. bright - -> decrease picture brightness. 21. wav effect -> do effect based sine function. 22. editor -> setting editing option. If is "on" then red bars are draw according to each vertex, Pressing and moving left mouse button (cursor must be on handler)- change - vertex position. If left mouse button is released apply current position. You may also + vertex position. If left mouse button is released apply current position. You may also decrease whole handlers count by enable culling (using appropriate button) - some back handlers become hidden. - Maciej Guba V 2021 + Maciej Guba IX 2021 diff --git a/programs/demos/view3ds/tex_cat.inc b/programs/demos/view3ds/tex_cat.inc index e0017f673e..ecc6615f8a 100644 --- a/programs/demos/view3ds/tex_cat.inc +++ b/programs/demos/view3ds/tex_cat.inc @@ -1,611 +1,611 @@ -;TEX_X = 512 -;TEX_Y = 512 -;ROUND equ 8 -;SIZE_X = 512 -;SIZE_Y = 512 -;TEX_SHIFT = 9 -CATMULL_SHIFT equ 8 - -;------------------------------------------------------------------------ -;- Procedure drawing textured triangle using Catmull Z-buffer algorithm - -;------------------------------------------------------------------------ -tex_triangle_z: -;----------in - eax - x1 shl 16 + y1 -;-------------- ebx - x2 shl 16 + y2 -;---------------ecx - x3 shl 16 + y3 -;---------------edx - pointer to Z-buffer -;---------------esi - pointer to texture buffer -;---------------edi - pointer to screen buffer -;-------------stack - texture coordinates -;------------------ - z coordinates -.tex_x1 equ ebp+4 -.tex_y1 equ ebp+6 -.tex_x2 equ ebp+8 -.tex_y2 equ ebp+10 -.tex_x3 equ ebp+12 -.tex_y3 equ ebp+14 -.z1 equ word[ebp+16] -.z2 equ word[ebp+18] -.z3 equ word[ebp+20] - -.tex_ptr equ dword[ebp-4] ; pointer to texture -.z_ptr equ dword[ebp-8] ; pointer to z-buffer -.x1 equ word[ebp-10] -.y1 equ word[ebp-12] -.x2 equ word[ebp-14] -.y2 equ word[ebp-16] -.x3 equ word[ebp-18] -.y3 equ word[ebp-20] - -.dx12 equ dword[ebp-24] -.tex_dx12 equ dword[ebp-28] -.tex_dy12 equ dword[ebp-32] -.dz12 equ dword[ebp-36] - -.dx13 equ dword[ebp-40] -.tex_dx13 equ dword[ebp-44] -.tex_dy13 equ dword[ebp-48] -.dz13 equ dword[ebp-52] - -.dx23 equ dword[ebp-56] -.tex_dx23 equ dword[ebp-60] -.tex_dy23 equ dword[ebp-64] -.dz23 equ dword[ebp-68] - -.scan_x1 equ dword[ebp-72] -.scan_x2 equ dword[ebp-76] -.scan_y1 equ dword[ebp-80] -.scan_y2 equ dword[ebp-84] -.cz1 equ dword[ebp-88] -.cz2 equ dword[ebp-92] - - mov ebp,esp - push esi ; store memory pointers - push edx -.tt_sort3: - cmp ax,bx ;sort all parameters - jle .tt_sort1 - xchg eax,ebx - mov edx,dword [.tex_x1] - xchg edx,dword [.tex_x2] - mov dword[.tex_x1],edx - mov dx,.z1 - xchg dx,.z2 - mov .z1,dx -.tt_sort1: - cmp bx,cx - jle .tt_sort2 - xchg ebx,ecx - mov edx,dword [.tex_x2] - xchg edx,dword [.tex_x3] - mov dword [.tex_x2],edx - mov dx,.z2 - xchg dx,.z3 - mov .z2,dx - jmp .tt_sort3 -.tt_sort2: - - push eax ; and store to user friendly variables - push ebx - push ecx - - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .tt_loop2_end - ; cmp ax,SIZE_Y - ; jl @f - ; cmp bx,SIZE_Y - ; jl @f - ; cmp cx,SIZE_Y - ; jl @f - ror eax,16 - ror ebx,16 - ror ecx,16 - ; cmp ax,SIZE_X - ; jl @f - ; cmp bx,SIZE_X - ; jl @f - ; cmp cx,SIZE_X - ; jl @f - ; jmp .tt_loop2_end - @@: - mov eax,dword[.tex_x1] ; texture coords must be in [0..TEX_X(Y)] - mov ebx,dword[.tex_x2] - mov ecx,dword[.tex_x3] - mov edx,eax - or edx,ebx - or edx,ecx - test edx,80008000h - jne .tt_loop2_end - cmp ax,TEX_X - jge .tt_loop2_end - cmp bx,TEX_X - jge .tt_loop2_end - cmp cx,TEX_X - jge .tt_loop2_end - ror eax,16 - ror ebx,16 - ror ecx,16 - cmp ax,TEX_Y - jge .tt_loop2_end - cmp bx,TEX_Y - jge .tt_loop2_end - cmp cx,TEX_Y - jge .tt_loop2_end - - - movsx ebx,.y2 ; calc delta - sub bx,.y1 - jnz .tt_dx12_make - xor edx,edx - mov ecx,4 - @@: - push edx - loop @b - jmp .tt_dx12_done - .tt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dx12,eax ; dx12 = (x2-x1)/(y2-y1) - push eax - - mov ax,word[.tex_x2] - sub ax,word[.tex_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov [.tex_dx12],eax ; tex_dx12 = (tex_x2-tex_x1)/(y2-y1) - push eax - - mov ax,word[.tex_y2] - sub ax,word[.tex_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov [.tex_dy12],eax ; tex_dy12 = (tex_y2-tex_y1)/(y2-y1) - push eax - - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax - .tt_dx12_done: - - movsx ebx,.y3 ; calc delta - sub bx,.y1 - jnz .tt_dx13_make - xor edx,edx - mov ecx,4 - @@: - push edx - loop @b - jmp .tt_dx13_done - .tt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dx12,eax ; dx13 = (x3-x1)/(y3-y1) - push eax - - mov ax,word[.tex_x3] - sub ax,word[.tex_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov [.tex_dx12],eax ; tex_dx13 = (tex_x3-tex_x1)/(y3-y1) - push eax - - mov ax,word[.tex_y3] - sub ax,word[.tex_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov [.tex_dy12],eax ; tex_dy13 = (tex_y3-tex_y1)/(y3-y1) - push eax - - mov ax,.z3 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax - .tt_dx13_done: - - mov bx,.y3 ; calc delta - sub bx,.y2 - jnz .tt_dx23_make - xor edx,edx - mov ecx,4 - @@: - push edx - loop @b - jmp .tt_dx23_done - .tt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - shl eax,ROUND - cdq - movzx ebx,bx - idiv ebx - ; mov .dx23,eax ; dx23 = (x3-x2)/(y3-y2) - push eax - - mov ax,word[.tex_x3] - sub ax,word[.tex_x2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov [.tex_dx23],eax ; tex_dx23 = (tex_x3-tex_x2)/(y3-y2) - push eax - - mov ax,word[.tex_y3] - sub ax,word[.tex_y2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov [.tex_dy23],eax ; tex_dy23 = (tex_y3-tex_y2)/(y3-y2) - push eax - - mov ax,.z3 - sub ax,.z2 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax - .tt_dx23_done: - - movsx eax,.x1 ;eax - cur x1 - shl eax,ROUND ;ebx - cur x2 - mov ebx,eax - - movsx edx, word[.tex_x1] - shl edx,ROUND - ; mov [.scan_x1],edx - ; mov [.scan_x2],edx - push edx - push edx - movsx edx, word[.tex_y1] - shl edx,ROUND - ; mov [.scan_y1],edx - ; mov [.scan_y2],edx - push edx - push edx - movsx edx,.z1 - shl edx,CATMULL_SHIFT - push edx - push edx - mov cx,.y1 - cmp cx,.y2 - jge .tt_loop1_end - - .tt_loop1: - pushad - - push .z_ptr - push .cz1 ; z coords shifted shl catmull_shift - push .cz2 - push .scan_y2 - push .scan_x2 - push .scan_y1 - push .scan_x1 - push esi ;[.tex_ptr] - - push cx - sar ebx,ROUND - push bx - sar eax,ROUND - push ax - call textured_line_z - - popad - mov edx,.dz13 - add .cz1,edx - mov edx,.dz12 - add .cz2,edx - - mov edx, .tex_dx13 - add .scan_x1, edx - mov edx, .tex_dx12 - add .scan_x2, edx - mov edx, .tex_dy13 - add .scan_y1, edx - mov edx, .tex_dy12 - add .scan_y2, edx - - add eax, .dx13 - add ebx, .dx12 - inc cx - cmp cx,.y2 - jl .tt_loop1 - - .tt_loop1_end: - - - mov cx,.y2 - cmp cx,.y3 - jge .tt_loop2_end - - movsx ebx,.x2 - shl ebx,ROUND - movsx edx,.z2 - shl edx,CATMULL_SHIFT - mov .cz2,edx - movzx edx, word [.tex_x2] - shl edx,ROUND - mov .scan_x2,edx - movzx edx, word[.tex_y2] - shl edx,ROUND - mov .scan_y2,edx - -.tt_loop2: - - pushad - - push .z_ptr - push .cz1 ; z coords shifted shl catmull_shift - push .cz2 - - push .scan_y2 - push .scan_x2 - push .scan_y1 - push .scan_x1 - push esi ;[.tex_ptr] - - push cx - sar ebx,ROUND - push bx - sar eax,ROUND - push ax - call textured_line_z - - popad - - - mov edx,.dz13 - add .cz1,edx - mov edx,.dz23 - add .cz2,edx - - mov edx, .tex_dx13 - add .scan_x1, edx - mov edx, .tex_dx23 - add .scan_x2, edx - mov edx, .tex_dy13 - add .scan_y1, edx - mov edx, .tex_dy23 - add .scan_y2, edx - - add eax, .dx13 - add ebx, .dx23 - inc cx - cmp cx,.y3 - jl .tt_loop2 - -.tt_loop2_end: - -.tt_end: - mov esp,ebp -ret 18 - -textured_line_z: -;-----in -edi screen buffer pointer -;------------ stack: - .x1 equ word [ebp+4] - .x2 equ word [ebp+6] - .y equ word [ebp+8] - - .tex_ptr equ dword [ebp+10] - .tex_x1 equ ebp+14 - .tex_y1 equ ebp+18 - .tex_x2 equ ebp+22 - .tex_y2 equ ebp+26 - .z2 equ dword [ebp+30] ;z1, z2 coords shifted shl CATMULL_SHIFT - .z1 equ dword [ebp+34] - .z_ptr equ dword [ebp+38] - - .tex_dy equ dword [ebp-4] - .tex_dx equ dword [ebp-8] - .dz equ dword [ebp-12] - .cz equ dword [ebp-16] - .c_tex_x equ dword [ebp-20] ; current tex x - .m_sft1 equ ebp-28 - .m_sft2 equ ebp-32 -; .c_tex_xM equ ebp+14 - .tex_dxM equ ebp-8 - - mov ebp,esp - - mov ax,.y - or ax,ax - jl .tl_quit - mov bx,[size_y_var] - dec bx - cmp ax,bx ;SIZE_Y - jge .tl_quit - - mov ax,.x1 - cmp ax,.x2 - je .tl_quit - jl .tl_ok - - xchg ax,.x2 ; sort params - mov .x1,ax -if Ext >= MMX - movq mm0,[.tex_x1] - movq mm1,[.tex_x2] - movq [.tex_x2],mm0 - movq [.tex_x1],mm1 - -else - mov eax,dword[.tex_x1] - xchg eax,dword[.tex_x2] - mov dword[.tex_x1],eax - - mov eax,dword[.tex_y1] - xchg eax,dword[.tex_y2] - mov dword[.tex_y1],eax - -end if - - mov eax,.z1 - xchg eax,.z2 - mov .z1,eax - - .tl_ok: - mov cx,[size_x_var] - dec cx - cmp .x1,cx ;SIZE_X - jge .tl_quit - cmp .x2,0 - jle .tl_quit - - mov bx,.x2 - sub bx,.x1 - movsx ebx,bx - - mov eax,dword[.tex_y2] ; calc .dty - sub eax,dword[.tex_y1] - cdq - idiv ebx - push eax - - mov eax,dword[.tex_x2] ; calc .dtx - sub eax,dword[.tex_x1] - cdq - idiv ebx - push eax - - mov eax,.z2 ; calc .dz - sub eax,.z1 - cdq - idiv ebx - push eax - - cmp .x1,0 ; clipping - jg @f - - movsx ebx,.x1 - neg ebx - imul ebx ; eax = .dz * abs(.x1) - add .z1,eax - mov .x1,0 - - mov eax,.tex_dy - imul ebx - add dword[.tex_y1],eax - - mov eax,.tex_dx - imul ebx - add dword[.tex_x1],eax - - @@: - cmp .x2,cx ;SIZE_X - jl @f - mov .x2,cx ;SIZE_X - @@: - - movsx ebx,.y ; calc mem begin in buffers - movzx eax,word[size_x_var] ;SIZE_X - mul ebx - movsx ebx,.x1 - add eax,ebx - mov ebx,eax - - lea eax,[eax*3] - add edi,eax ; edi - scr buff - shl ebx,2 - add .z_ptr,ebx ; z buffer pointer - - mov cx,.x2 - sub cx,.x1 - movzx ecx,cx - -;if Ext >= MMX -; movq mm0,[.tex_x1] -; movq mm4,mm0 -; movq mm1,qword[.tex_dxM] -; mov ebx,.z1 -; mov eax,.dz -;else - mov eax,dword[.tex_x1] - mov ebx,dword[.tex_y1] - push .z1 ; .cz - push eax ;.c_tex_x -;end if - mov edx,.z_ptr - - .tl_loop: - -;if Ext >= MMX -; cmp ebx,[edx] ; ebx - current z -; jge @f -; movq mm2,mm0 -; psrad mm2,ROUND -; movq mm3,mm2 -; psrlq mm2,32-TEX_SHIFT -; paddd mm3,mm2 -; movd esi,mm3 -; mov dword[edx],ebx ; renew z buffer -;else - ; eax - temp - mov eax,.cz ; ebx - cur tex y shl ROUND - cmp eax,[edx] ; ecx - l.lenght - jge @f ; ebx - cur tex_y ; edx - temp - mov esi,ebx ; edi - scr buff - sar esi,ROUND ; esi - tex_ptr temp - shl esi,TEX_SHIFT ; .z_ptr - cur pointer to z buff - mov eax,.c_tex_x ; .cz - cur z coord shl CATMULL_SHIFT - sar eax,ROUND - add esi,eax - mov eax,.cz - mov dword[edx],eax ; renew z buffer -;end if - and esi,TEXTURE_SIZE - lea esi,[esi*3] - add esi,.tex_ptr - movsd - dec edi - jmp .no_skip - @@: - add edi,3 - .no_skip: - add edx,4 -;if Ext >= MMX -; add ebx,eax -; paddd mm0,mm1 -;else - mov eax,.dz - add .cz,eax - mov eax,.tex_dx - add .c_tex_x,eax - add ebx,.tex_dy -;end if - loop .tl_loop - .tl_quit: - - mov esp,ebp - -ret 30+8 - +;TEX_X = 512 +;TEX_Y = 512 +;ROUND equ 8 +;SIZE_X = 512 +;SIZE_Y = 512 +;TEX_SHIFT = 9 +CATMULL_SHIFT equ 8 + +;------------------------------------------------------------------------ +;- Procedure drawing textured triangle using Catmull Z-buffer algorithm - +;------------------------------------------------------------------------ +tex_triangle_z: +;----------in - eax - x1 shl 16 + y1 +;-------------- ebx - x2 shl 16 + y2 +;---------------ecx - x3 shl 16 + y3 +;---------------edx - pointer to Z-buffer +;---------------esi - pointer to texture buffer +;---------------edi - pointer to screen buffer +;-------------stack - texture coordinates +;------------------ - z coordinates +.tex_x1 equ ebp+4 +.tex_y1 equ ebp+6 +.tex_x2 equ ebp+8 +.tex_y2 equ ebp+10 +.tex_x3 equ ebp+12 +.tex_y3 equ ebp+14 +.z1 equ word[ebp+16] +.z2 equ word[ebp+18] +.z3 equ word[ebp+20] + +.tex_ptr equ dword[ebp-4] ; pointer to texture +.z_ptr equ dword[ebp-8] ; pointer to z-buffer +.x1 equ word[ebp-10] +.y1 equ word[ebp-12] +.x2 equ word[ebp-14] +.y2 equ word[ebp-16] +.x3 equ word[ebp-18] +.y3 equ word[ebp-20] + +.dx12 equ dword[ebp-24] +.tex_dx12 equ dword[ebp-28] +.tex_dy12 equ dword[ebp-32] +.dz12 equ dword[ebp-36] + +.dx13 equ dword[ebp-40] +.tex_dx13 equ dword[ebp-44] +.tex_dy13 equ dword[ebp-48] +.dz13 equ dword[ebp-52] + +.dx23 equ dword[ebp-56] +.tex_dx23 equ dword[ebp-60] +.tex_dy23 equ dword[ebp-64] +.dz23 equ dword[ebp-68] + +.scan_x1 equ dword[ebp-72] +.scan_x2 equ dword[ebp-76] +.scan_y1 equ dword[ebp-80] +.scan_y2 equ dword[ebp-84] +.cz1 equ dword[ebp-88] +.cz2 equ dword[ebp-92] + + mov ebp,esp + push esi ; store memory pointers + push edx +.tt_sort3: + cmp ax,bx ;sort all parameters + jle .tt_sort1 + xchg eax,ebx + mov edx,dword [.tex_x1] + xchg edx,dword [.tex_x2] + mov dword[.tex_x1],edx + mov dx,.z1 + xchg dx,.z2 + mov .z1,dx +.tt_sort1: + cmp bx,cx + jle .tt_sort2 + xchg ebx,ecx + mov edx,dword [.tex_x2] + xchg edx,dword [.tex_x3] + mov dword [.tex_x2],edx + mov dx,.z2 + xchg dx,.z3 + mov .z2,dx + jmp .tt_sort3 +.tt_sort2: + + push eax ; and store to user friendly variables + push ebx + push ecx + + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .tt_loop2_end + ; cmp ax,SIZE_Y + ; jl @f + ; cmp bx,SIZE_Y + ; jl @f + ; cmp cx,SIZE_Y + ; jl @f + ror eax,16 + ror ebx,16 + ror ecx,16 + ; cmp ax,SIZE_X + ; jl @f + ; cmp bx,SIZE_X + ; jl @f + ; cmp cx,SIZE_X + ; jl @f + ; jmp .tt_loop2_end + @@: + mov eax,dword[.tex_x1] ; texture coords must be in [0..TEX_X(Y)] + mov ebx,dword[.tex_x2] + mov ecx,dword[.tex_x3] + mov edx,eax + or edx,ebx + or edx,ecx + test edx,80008000h + jne .tt_loop2_end + cmp ax,TEX_X + jge .tt_loop2_end + cmp bx,TEX_X + jge .tt_loop2_end + cmp cx,TEX_X + jge .tt_loop2_end + ror eax,16 + ror ebx,16 + ror ecx,16 + cmp ax,TEX_Y + jge .tt_loop2_end + cmp bx,TEX_Y + jge .tt_loop2_end + cmp cx,TEX_Y + jge .tt_loop2_end + + + movsx ebx,.y2 ; calc delta + sub bx,.y1 + jnz .tt_dx12_make + xor edx,edx + mov ecx,4 + @@: + push edx + loop @b + jmp .tt_dx12_done + .tt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dx12,eax ; dx12 = (x2-x1)/(y2-y1) + push eax + + mov ax,word[.tex_x2] + sub ax,word[.tex_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov [.tex_dx12],eax ; tex_dx12 = (tex_x2-tex_x1)/(y2-y1) + push eax + + mov ax,word[.tex_y2] + sub ax,word[.tex_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov [.tex_dy12],eax ; tex_dy12 = (tex_y2-tex_y1)/(y2-y1) + push eax + + mov ax,.z2 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax + .tt_dx12_done: + + movsx ebx,.y3 ; calc delta + sub bx,.y1 + jnz .tt_dx13_make + xor edx,edx + mov ecx,4 + @@: + push edx + loop @b + jmp .tt_dx13_done + .tt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dx12,eax ; dx13 = (x3-x1)/(y3-y1) + push eax + + mov ax,word[.tex_x3] + sub ax,word[.tex_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov [.tex_dx12],eax ; tex_dx13 = (tex_x3-tex_x1)/(y3-y1) + push eax + + mov ax,word[.tex_y3] + sub ax,word[.tex_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov [.tex_dy12],eax ; tex_dy13 = (tex_y3-tex_y1)/(y3-y1) + push eax + + mov ax,.z3 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax + .tt_dx13_done: + + mov bx,.y3 ; calc delta + sub bx,.y2 + jnz .tt_dx23_make + xor edx,edx + mov ecx,4 + @@: + push edx + loop @b + jmp .tt_dx23_done + .tt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + shl eax,ROUND + cdq + movzx ebx,bx + idiv ebx + ; mov .dx23,eax ; dx23 = (x3-x2)/(y3-y2) + push eax + + mov ax,word[.tex_x3] + sub ax,word[.tex_x2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov [.tex_dx23],eax ; tex_dx23 = (tex_x3-tex_x2)/(y3-y2) + push eax + + mov ax,word[.tex_y3] + sub ax,word[.tex_y2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov [.tex_dy23],eax ; tex_dy23 = (tex_y3-tex_y2)/(y3-y2) + push eax + + mov ax,.z3 + sub ax,.z2 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax + .tt_dx23_done: + + movsx eax,.x1 ;eax - cur x1 + shl eax,ROUND ;ebx - cur x2 + mov ebx,eax + + movsx edx, word[.tex_x1] + shl edx,ROUND + ; mov [.scan_x1],edx + ; mov [.scan_x2],edx + push edx + push edx + movsx edx, word[.tex_y1] + shl edx,ROUND + ; mov [.scan_y1],edx + ; mov [.scan_y2],edx + push edx + push edx + movsx edx,.z1 + shl edx,CATMULL_SHIFT + push edx + push edx + mov cx,.y1 + cmp cx,.y2 + jge .tt_loop1_end + + .tt_loop1: + pushad + + push .z_ptr + push .cz1 ; z coords shifted shl catmull_shift + push .cz2 + push .scan_y2 + push .scan_x2 + push .scan_y1 + push .scan_x1 + push esi ;[.tex_ptr] + + push cx + sar ebx,ROUND + push bx + sar eax,ROUND + push ax + call textured_line_z + + popad + mov edx,.dz13 + add .cz1,edx + mov edx,.dz12 + add .cz2,edx + + mov edx, .tex_dx13 + add .scan_x1, edx + mov edx, .tex_dx12 + add .scan_x2, edx + mov edx, .tex_dy13 + add .scan_y1, edx + mov edx, .tex_dy12 + add .scan_y2, edx + + add eax, .dx13 + add ebx, .dx12 + inc cx + cmp cx,.y2 + jl .tt_loop1 + + .tt_loop1_end: + + + mov cx,.y2 + cmp cx,.y3 + jge .tt_loop2_end + + movsx ebx,.x2 + shl ebx,ROUND + movsx edx,.z2 + shl edx,CATMULL_SHIFT + mov .cz2,edx + movzx edx, word [.tex_x2] + shl edx,ROUND + mov .scan_x2,edx + movzx edx, word[.tex_y2] + shl edx,ROUND + mov .scan_y2,edx + +.tt_loop2: + + pushad + + push .z_ptr + push .cz1 ; z coords shifted shl catmull_shift + push .cz2 + + push .scan_y2 + push .scan_x2 + push .scan_y1 + push .scan_x1 + push esi ;[.tex_ptr] + + push cx + sar ebx,ROUND + push bx + sar eax,ROUND + push ax + call textured_line_z + + popad + + + mov edx,.dz13 + add .cz1,edx + mov edx,.dz23 + add .cz2,edx + + mov edx, .tex_dx13 + add .scan_x1, edx + mov edx, .tex_dx23 + add .scan_x2, edx + mov edx, .tex_dy13 + add .scan_y1, edx + mov edx, .tex_dy23 + add .scan_y2, edx + + add eax, .dx13 + add ebx, .dx23 + inc cx + cmp cx,.y3 + jl .tt_loop2 + +.tt_loop2_end: + +.tt_end: + mov esp,ebp +ret 18 + +textured_line_z: +;-----in -edi screen buffer pointer +;------------ stack: + .x1 equ word [ebp+4] + .x2 equ word [ebp+6] + .y equ word [ebp+8] + + .tex_ptr equ dword [ebp+10] + .tex_x1 equ ebp+14 + .tex_y1 equ ebp+18 + .tex_x2 equ ebp+22 + .tex_y2 equ ebp+26 + .z2 equ dword [ebp+30] ;z1, z2 coords shifted shl CATMULL_SHIFT + .z1 equ dword [ebp+34] + .z_ptr equ dword [ebp+38] + + .tex_dy equ dword [ebp-4] + .tex_dx equ dword [ebp-8] + .dz equ dword [ebp-12] + .cz equ dword [ebp-16] + .c_tex_x equ dword [ebp-20] ; current tex x + .m_sft1 equ ebp-28 + .m_sft2 equ ebp-32 +; .c_tex_xM equ ebp+14 + .tex_dxM equ ebp-8 + + mov ebp,esp + + mov ax,.y + or ax,ax + jl .tl_quit + mov bx,[size_y_var] + dec bx + cmp ax,bx ;SIZE_Y + jge .tl_quit + + mov ax,.x1 + cmp ax,.x2 + je .tl_quit + jl .tl_ok + + xchg ax,.x2 ; sort params + mov .x1,ax +if Ext >= MMX + movq mm0,[.tex_x1] + movq mm1,[.tex_x2] + movq [.tex_x2],mm0 + movq [.tex_x1],mm1 + +else + mov eax,dword[.tex_x1] + xchg eax,dword[.tex_x2] + mov dword[.tex_x1],eax + + mov eax,dword[.tex_y1] + xchg eax,dword[.tex_y2] + mov dword[.tex_y1],eax + +end if + + mov eax,.z1 + xchg eax,.z2 + mov .z1,eax + + .tl_ok: + mov cx,[size_x_var] + dec cx + cmp .x1,cx ;SIZE_X + jge .tl_quit + cmp .x2,0 + jle .tl_quit + + mov bx,.x2 + sub bx,.x1 + movsx ebx,bx + + mov eax,dword[.tex_y2] ; calc .dty + sub eax,dword[.tex_y1] + cdq + idiv ebx + push eax + + mov eax,dword[.tex_x2] ; calc .dtx + sub eax,dword[.tex_x1] + cdq + idiv ebx + push eax + + mov eax,.z2 ; calc .dz + sub eax,.z1 + cdq + idiv ebx + push eax + + cmp .x1,0 ; clipping + jg @f + + movsx ebx,.x1 + neg ebx + imul ebx ; eax = .dz * abs(.x1) + add .z1,eax + mov .x1,0 + + mov eax,.tex_dy + imul ebx + add dword[.tex_y1],eax + + mov eax,.tex_dx + imul ebx + add dword[.tex_x1],eax + + @@: + cmp .x2,cx ;SIZE_X + jl @f + mov .x2,cx ;SIZE_X + @@: + + movsx ebx,.y ; calc mem begin in buffers + movzx eax,word[size_x_var] ;SIZE_X + mul ebx + movsx ebx,.x1 + add eax,ebx + mov ebx,eax + + lea eax,[eax*3] + add edi,eax ; edi - scr buff + shl ebx,2 + add .z_ptr,ebx ; z buffer pointer + + mov cx,.x2 + sub cx,.x1 + movzx ecx,cx + +;if Ext >= MMX +; movq mm0,[.tex_x1] +; movq mm4,mm0 +; movq mm1,qword[.tex_dxM] +; mov ebx,.z1 +; mov eax,.dz +;else + mov eax,dword[.tex_x1] + mov ebx,dword[.tex_y1] + push .z1 ; .cz + push eax ;.c_tex_x +;end if + mov edx,.z_ptr + + .tl_loop: + +;if Ext >= MMX +; cmp ebx,[edx] ; ebx - current z +; jge @f +; movq mm2,mm0 +; psrad mm2,ROUND +; movq mm3,mm2 +; psrlq mm2,32-TEX_SHIFT +; paddd mm3,mm2 +; movd esi,mm3 +; mov dword[edx],ebx ; renew z buffer +;else + ; eax - temp + mov eax,.cz ; ebx - cur tex y shl ROUND + cmp eax,[edx] ; ecx - l.lenght + jge @f ; ebx - cur tex_y ; edx - temp + mov esi,ebx ; edi - scr buff + sar esi,ROUND ; esi - tex_ptr temp + shl esi,TEX_SHIFT ; .z_ptr - cur pointer to z buff + mov eax,.c_tex_x ; .cz - cur z coord shl CATMULL_SHIFT + sar eax,ROUND + add esi,eax + mov eax,.cz + mov dword[edx],eax ; renew z buffer +;end if + and esi,TEXTURE_SIZE + lea esi,[esi*3] + add esi,.tex_ptr + movsd + dec edi + jmp .no_skip + @@: + add edi,3 + .no_skip: + add edx,4 +;if Ext >= MMX +; add ebx,eax +; paddd mm0,mm1 +;else + mov eax,.dz + add .cz,eax + mov eax,.tex_dx + add .c_tex_x,eax + add ebx,.tex_dy +;end if + loop .tl_loop + .tl_quit: + + mov esp,ebp + +ret 30+8 + diff --git a/programs/demos/view3ds/two_tex.inc b/programs/demos/view3ds/two_tex.inc index af8723b5c9..f4d9c03357 100644 --- a/programs/demos/view3ds/two_tex.inc +++ b/programs/demos/view3ds/two_tex.inc @@ -1,1105 +1,1105 @@ - -;SIZE_X equ 350 -;SIZE_Y equ 350 -;ROUND equ 8 -;TEX_X equ 512 -;TEX_Y equ 512 -;TEXTURE_SIZE EQU (512*512)-1 -;TEX_SHIFT EQU 9 - -;CATMULL_SHIFT equ 8 -;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 -;Ext = SSE -;SSE = 3 -;MMX = 1 -;NON = 0 -;use32 -;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- -;------- DOS 13h mode demos -------------------------------------------- -;------- Procedure draws triangle with two overlapped textures, I use -- -;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- -;--------I calc texture pixel by this way: col1*col2/256 --------------- -two_tex_triangle_z: -;------------------in - eax - x1 shl 16 + y1 ----------- -;---------------------- ebx - x2 shl 16 + y2 ----------- -;---------------------- ecx - x3 shl 16 + y3 ----------- -;---------------------- edx - pointer to b. texture----- -;---------------------- esi - pointer to e. texture----- -;---------------------- edi - pointer to screen buffer-- -;---------------------- stack : b. tex coordinates------ -;---------------------- e. tex coordinates------ -;---------------------- Z position coordinates-- -;---------------------- pointer io Z buffer----- -;-- Z-buffer - filled with coordinates as dword -------- -;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- -.b_x1 equ ebp+4 ; procedure don't save registers !!! -.b_y1 equ ebp+6 ; each coordinate as word -.b_x2 equ ebp+8 -.b_y2 equ ebp+10 ; b - first texture -.b_x3 equ ebp+12 -.b_y3 equ ebp+14 ; e - second texture -.e_x1 equ ebp+16 -.e_y1 equ ebp+18 -.e_x2 equ ebp+20 -.e_y2 equ ebp+22 -.e_x3 equ ebp+24 -.e_y3 equ ebp+26 -.z1 equ word[ebp+28] -.z2 equ word[ebp+30] -.z3 equ word[ebp+32] -.z_buff equ dword[ebp+34] ; pointer to Z-buffer - - -.t_bmap equ dword[ebp-4] ; pointer to b. texture -.t_emap equ dword[ebp-8] ; pointer to e. texture -.x1 equ word[ebp-10] -.y1 equ word[ebp-12] -.x2 equ word[ebp-14] -.y2 equ word[ebp-16] -.x3 equ word[ebp-18] -.y3 equ word[ebp-20] - -.dx12 equ dword[ebp-24] -.dbx12 equ dword[ebp-28] -.dby12 equ dword[ebp-32] -.dby12q equ [ebp-32] -.dex12 equ dword[ebp-36] -.dey12 equ dword[ebp-40] -.dey12q equ [ebp-40] -.dz12 equ dword[ebp-44] - -.dx13 equ dword[ebp-48] -.dbx13 equ dword[ebp-52] -.dby13 equ dword[ebp-56] -.dby13q equ [ebp-56] -.dex13 equ dword[ebp-60] -.dey13 equ dword[ebp-64] -.dey13q equ [ebp-64] -.dz13 equ dword[ebp-68] - -.dx23 equ dword[ebp-72] -.dbx23 equ dword[ebp-76] -.dby23 equ dword[ebp-80] -.dby23q equ [ebp-80] -.dex23 equ dword[ebp-84] -.dey23 equ dword[ebp-88] -.dey23q equ [ebp-88] -.dz23 equ dword[ebp-92] - -.cx1 equ dword[ebp-96] ; current variables -.cx2 equ dword[ebp-100] -.cbx1 equ dword[ebp-104] -.cby1 equ [ebp-108] -.cex1 equ dword[ebp-112] -.cey1 equ [ebp-116] -.cbx2 equ dword[ebp-120] -.cby2 equ [ebp-124] -.cex2 equ dword[ebp-128] -.cey2 equ [ebp-132] - -.cz1 equ dword[ebp-136] -.cz2 equ dword[ebp-140] - - if Ext >= MMX - emms - else - cld - end if - mov ebp,esp - push edx esi ; store bump map -; push esi ; store e. map - ; sub esp,120 - .sort3: ; sort triangle coordinates... - cmp ax,bx - jle .sort1 - xchg eax,ebx - mov edx,dword[.b_x1] - xchg edx,dword[.b_x2] - mov dword[.b_x1],edx - mov edx,dword[.e_x1] - xchg edx,dword[.e_x2] - mov dword[.e_x1],edx - mov dx,.z1 - xchg dx,.z2 - mov .z1,dx - .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - mov edx,dword[.b_x2] - xchg edx,dword[.b_x3] - mov dword[.b_x2],edx - mov edx,dword[.e_x2] - xchg edx,dword[.e_x3] - mov dword[.e_x2],edx - mov dx,.z2 - xchg dx,.z3 - mov .z2,dx - jmp .sort3 - .sort2: - push eax ebx ecx ; store triangle coords in variables -; push ebx -; push ecx - - mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that - and edx,ebx ; if *all* of them are negative a sign flag is raised - and edx,ecx - and edx,eax - test edx,80008000h ; Check both X&Y at once - jne .loop23_done - ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that - ; or edx,ebx ; if any *one* of them is negative a sign flag is raised - ; or edx,ecx - ; test edx,80000000h ; Check only X - ; jne .loop23_done - - ; cmp .x1,SIZE_X ; { - ; jg .loop23_done - ; cmp .x2,SIZE_X ; This can be optimized with effort - ; jg .loop23_done - ; cmp .x3,SIZE_X - ; jg .loop23_done ; { - - - mov bx,.y2 ; calc delta 12 - sub bx,.y1 - jnz .bt_dx12_make - mov ecx,6 - xor edx,edx - @@: - push edx ;dword 0 - loop @b - jmp .bt_dx12_done - .bt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx -; mov .dx12,eax - push eax - -if Ext=SSE - - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps - ; mov eax,255 - cvtsi2ss xmm4,[i255d] ;eax - divss xmm3,xmm4 - rcpss xmm3,xmm3 - ; mulss xmm3,xmm4 - shufps xmm3,xmm3,0 - - movd mm0,[.b_x1] - movd mm1,[.b_x2] - movd mm2,[.e_x1] - movd mm3,[.e_x2] - ; psubsw mm3,mm2 - ; psubsw mm1,mm0 - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - ; pslld mm0,ROUND - ; pslld mm1,ROUND - ; pslld mm2,ROUND - ; pslld mm3,ROUND - cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - subps xmm1,xmm0 - - ; pxor mm4,mm4 - ; movq mm5,mm1 - ; movq mm6,mm1 - ; pcmpeqb mm5,mm4 -; psubd mm1,mm0 -; psubd mm3,mm2 - - ; movq mm0,[.b_x1] ; bx1 by1 bx2 by2 - ; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2 - ; pxor - ; punpcklhd mm0,mm1 ; lwd ; - ; psubw mm1,mm0 ; mm1, mm0 - ; pxor mm2,mm2 - ; pmovmaskb eax,mm1 - ; and eax,10101010b - ; pcmpgtw mm2,mm1 - ; punpcklwd mm1,mm2 - ; psllw mm0,ROUND - ; psllw mm1,ROUND - ; movq mm2,mm0 - ; psrlq mm0,32 - -; cvtpi2ps xmm0,mm1 -; movlhps xmm0,xmm0 -; cvtpi2ps xmm0,mm3 - ; divps xmm1,xmm3 - mulps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey12q,mm0 - movq .dby12q,mm1 - -; movd .dex12,mm0 -; psrlq mm0,32 -; movd .dey12,mm0 -; movhlps xmm1,xmm1 -; cvtps2pi mm0,xmm1 -; movd .dbx12,mm0 -; psrlq mm0,32 -; movd .dby12,mm0 - -else - mov ax,word[.b_x2] - sub ax,word[.b_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx12,eax - push eax - - mov ax,word[.b_y2] - sub ax,word[.b_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby12,eax - push eax - - ; mov eax,.dbx12 - ; mov ebx,.dby12 - ; int3 - - mov ax,word[.e_x2] - sub ax,word[.e_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex12,eax - push eax - - mov ax,word[.e_y2] - sub ax,word[.e_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey12,eax - push eax - -end if - mov ax,.z2 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - push eax - .bt_dx12_done: - - mov bx,.y3 ; calc delta13 - sub bx,.y1 - jnz .bt_dx13_make - mov ecx,6 - xor edx,edx - @@: - push edx ;dword 0 - loop @b - jmp .bt_dx13_done - .bt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx13,eax - push eax - -if Ext=SSE - - cvtsi2ss xmm3,ebx - ; mov eax,255 - cvtsi2ss xmm4,[i255d] - divss xmm3,xmm4 - rcpss xmm3,xmm3 -; mulss xmm3,xmm4 - shufps xmm3,xmm3,0 - sub esp,16 - - movd mm0,[.b_x1] - movd mm1,[.b_x3] - movd mm2,[.e_x1] - movd mm3,[.e_x3] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - subps xmm1,xmm0 - - ; divps xmm1,xmm3 - mulps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey13q,mm0 - movq .dby13q,mm1 - -else - - mov ax,word[.b_x3] - sub ax,word[.b_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx13,eax - push eax - - mov ax,word[.b_y3] - sub ax,word[.b_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby13,eax - push eax - - mov ax,word[.e_x3] - sub ax,word[.e_x1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex13,eax - push eax - - mov ax,word[.e_y3] - sub ax,word[.e_y1] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey13,eax - push eax - -end if - - mov ax,.z3 - sub ax,.z1 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz13,eax - push eax - .bt_dx13_done: - - mov bx,.y3 ; calc delta23 - sub bx,.y2 - jnz .bt_dx23_make - mov ecx,6 - xor edx,edx - @@: - push edx ;dword 0 - loop @b - jmp .bt_dx23_done - .bt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND - cdq - idiv ebx - ; mov .dx23,eax - push eax - -if Ext=SSE - - cvtsi2ss xmm3,ebx - ; mov eax,255 - cvtsi2ss xmm4,[i255d] ;eax - divss xmm3,xmm4 - shufps xmm3,xmm3,0 - sub esp,16 - - movd mm0,[.b_x2] - movd mm1,[.b_x3] - movd mm2,[.e_x2] - movd mm3,[.e_x3] - - pxor mm4,mm4 - punpcklwd mm0,mm4 - punpcklwd mm1,mm4 - punpcklwd mm2,mm4 - punpcklwd mm3,mm4 - - cvtpi2ps xmm0,mm0 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,mm2 - cvtpi2ps xmm1,mm1 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,mm3 - subps xmm1,xmm0 - - divps xmm1,xmm3 - shufps xmm1,xmm1,10110001b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dey23q,mm0 - movq .dby23q,mm1 - -else - - mov ax,word[.b_x3] - sub ax,word[.b_x2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dbx23,eax - push eax - - mov ax,word[.b_y3] - sub ax,word[.b_y2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dby23,eax - push eax - - mov ax,word[.e_x3] - sub ax,word[.e_x2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dex23,eax - push eax - - mov ax,word[.e_y3] - sub ax,word[.e_y2] - cwde - shl eax,ROUND - cdq - idiv ebx - ; mov .dey23,eax - push eax -end if - mov ax,.z3 - sub ax,.z2 - cwde - shl eax,CATMULL_SHIFT - cdq - idiv ebx - ; mov .dz23,eax - push eax - ; sub esp,40 - .bt_dx23_done: - movsx eax,.x1 - shl eax,ROUND - ; mov .cx1,eax - ; mov .cx2,eax - push eax eax - ; push eax - - movsx eax,word[.b_x1] - shl eax,ROUND - mov .cbx1,eax - mov .cbx2,eax - ; push eax eax - ; push eax - - movsx eax,word[.b_y1] - shl eax,ROUND - mov .cby1,eax - mov .cby2,eax - ; push eax eax - ; push eax - - movsx eax,word[.e_x1] - shl eax,ROUND - mov .cex1,eax - mov .cex2,eax - ; push eax eax - ;push eax - - movsx eax,word[.e_y1] - shl eax,ROUND - mov .cey1,eax - mov .cey2,eax - sub esp,32 - ; push eax eax - ;push eax - - movsx eax,.z1 - shl eax,CATMULL_SHIFT - ; mov .cz1,eax - ; mov .cz2,eax - push eax eax - ;push eax - - movsx ecx,.y1 - cmp cx,.y2 - jge .loop12_done - .loop12: - call .call_line - - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx12 - add .cx2,ebx -if Ext>= SSE2 - movups xmm0,.cey1 - movups xmm1,.cey2 - movups xmm2,.dey12q - movups xmm3,.dey13q - paddd xmm0,xmm3 - paddd xmm1,xmm2 - movups .cey1,xmm0 - movups .cey2,xmm1 -else if (Ext = MMX) | (Ext=SSE) - movq mm0,.cby2 ; with this optimization object - movq mm1,.cby1 ; looks bit annoying - movq mm2,.cey2 - movq mm3,.cey1 - paddd mm0,.dby12q - paddd mm1,.dby13q - paddd mm2,.dey12q - paddd mm3,.dey13q - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey1,mm3 - movq .cey2,mm2 -else - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx12 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby12 - add .cby2,edx - - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex12 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey12 - add .cey2,eax - -end if - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz12 - add .cz2,edx - - inc ecx - cmp cx,.y2 - jl .loop12 - .loop12_done: - - movsx ecx,.y2 - cmp cx,.y3 - jge .loop23_done - - movsx eax,.z2 - shl eax,CATMULL_SHIFT - mov .cz2,eax - - movsx eax,.x2 - shl eax,ROUND - mov .cx2,eax - - movzx eax,word[.b_x2] - shl eax,ROUND - mov .cbx2,eax - - movzx eax,word[.b_y2] - shl eax,ROUND - mov .cby2,eax - - movzx eax,word[.e_x2] - shl eax,ROUND - mov .cex2,eax - - movzx eax,word[.e_y2] - shl eax,ROUND - mov .cey2,eax - - .loop23: - call .call_line -;if Ext = NON - mov eax,.dx13 - add .cx1,eax - mov ebx,.dx23 - add .cx2,ebx -if Ext>= SSE2 - movups xmm0,.cey1 - movups xmm1,.cey2 - movups xmm2,.dey23q - movups xmm3,.dey13q - paddd xmm0,xmm3 - paddd xmm1,xmm2 - movups .cey1,xmm0 - movups .cey2,xmm1 -else if (Ext = MMX) | ( Ext = SSE) - movq mm0,.cby2 ; with this mmx optimization object looks bit - movq mm1,.cby1 ; annoying - movq mm2,.cey2 - movq mm3,.cey1 - paddd mm0,.dby23q - paddd mm1,.dby13q - paddd mm2,.dey23q - paddd mm3,.dey13q - movq .cby2,mm0 - movq .cby1,mm1 - movq .cey2,mm2 - movq .cey1,mm3 - -else - mov edx,.dbx13 - add .cbx1,edx - mov eax,.dbx23 - add .cbx2,eax - mov ebx,.dby13 - add .cby1,ebx - mov edx,.dby23 - add .cby2,edx - - mov eax,.dex13 - add .cex1,eax - mov ebx,.dex23 - add .cex2,ebx - mov edx,.dey13 - add .cey1,edx - mov eax,.dey23 - add .cey2,eax -end if - - mov ebx,.dz13 - add .cz1,ebx - mov edx,.dz23 - add .cz2,edx -;else -; movq mm0,.db13q -; movq mm1,.cbx1q - - inc ecx - cmp cx,.y3 - jl .loop23 - .loop23_done: - - mov esp,ebp -ret 34 - -.call_line: - - pushad - - push .cz1 - push .cz2 - push .z_buff - push .t_bmap - push .t_emap - push dword .cey2 - push .cex2 - push dword .cey1 - push .cex1 - push dword .cby2 - push .cbx2 - push dword .cby1 - push .cbx1 - push ecx - - mov eax,.cx1 - sar eax,ROUND - mov ebx,.cx2 - sar ebx,ROUND - - call two_tex_line_z - - popad -ret -two_tex_line_z: -;--------------in: eax - x1 -;-------------- ebx - x2 -;-------------- edi - pointer to screen buffer -;stack - another parameters : -.y equ dword [ebp+4] -.bx1 equ [ebp+8] ; --- -.by1 equ [ebp+12] ; | -.bx2 equ [ebp+16] ; | -.by2 equ [ebp+20] ; |> b. texture and e. texture coords -.ex1 equ [ebp+24] ; |> shifted shl ROUND -.ey1 equ [ebp+28] ; | -.ex2 equ [ebp+32] ; | -.ey2 equ [ebp+36] ; --- -.emap equ [ebp+40] ; b texture offset -.bmap equ [ebp+44] ; e texture offset -.z_buff equ dword [ebp+48] -.z2 equ dword [ebp+52] ; -- |> z coords shifted -.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT - -.x1 equ dword [ebp-4] -.x2 equ dword [ebp-8] -.dbx equ [ebp-12] -.dex equ [ebp-16] -.dby equ [ebp-20] -.dey equ [ebp-24] -.dz equ dword [ebp-28] -.cbx equ [ebp-32] -.cex equ [ebp-36] -.cby equ [ebp-40] -.cey equ [ebp-44] -.cz equ dword [ebp-48] -.czbuff equ dword [ebp-52] - - mov ebp,esp - - mov ecx,.y - or ecx,ecx - jl .bl_end - mov dx,word[size_y_var] - dec dx - cmp cx,dx ;word[size_y_var] ;SIZE_Y - jge .bl_end - - cmp eax,ebx - jl @f - je .bl_end - - xchg eax,ebx -if Ext=NON - mov edx,.bx1 - xchg edx,.bx2 - mov .bx1,edx - mov edx,.by1 - xchg edx,.by2 - mov .by1,edx - - mov edx,.ex1 - xchg edx,.ex2 - mov .ex1,edx - mov edx,.ey1 - xchg edx,.ey2 - mov .ey1,edx -else - movq mm0,.bx1 - movq mm1,.ex1 - movq mm2,.bx2 - movq mm3,.ex2 - movq .bx2,mm0 - movq .ex2,mm1 - movq .bx1,mm2 - movq .ex1,mm3 -end if - mov edx,.z1 - xchg edx,.z2 - mov .z1,edx - @@: - push eax ebx ;store x1, x2 - mov ebx,.x1 - movzx edx,word[size_x_var] - dec edx - cmp ebx,edx - ; cmp bx,word[size_x_var] ;SIZE_X - jg .bl_end - cmp .x2,0 - jle .bl_end - - mov ebx,.x2 - sub ebx,.x1 - -if Ext >= SSE - - sub esp,16 - cvtsi2ss xmm3,ebx ;rcps - shufps xmm3,xmm3,0 - - ; movq mm0,.bx1q - ; movq mm1,.bx2q - ; movq mm2,.ex1q - ; movq mm3,.ex2q - ; psubd mm1,mm0 - ; psubd mm3,mm2 - ; cvtpi2ps xmm1,mm1 - ; movlhps xmm1,xmm1 - ; cvtpi2ps xmm1,mm3 - - cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1 - movlhps xmm0,xmm0 - cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1 - cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2 - movlhps xmm1,xmm1 - cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2 - subps xmm1,xmm0 - ; hi lo - divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex - - shufps xmm1,xmm1,11011000b - cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords - movhlps xmm1,xmm1 - cvtps2pi mm1,xmm1 - movq .dex,mm0 ; hi - lo -> dbx, dex - movq .dey,mm1 ; hi - lo -> dby, dey - -else - - mov eax,.bx2 ; calc .dbx - sub eax,.bx1 - cdq - idiv ebx - push eax - - mov eax,.ex2 ; calc .dby - sub eax,.ex1 - cdq - idiv ebx - push eax - - mov eax,.by2 ; calc .dex - sub eax,.by1 - cdq - idiv ebx - push eax - - mov eax,.ey2 ; calc .dey - sub eax,.ey1 - cdq - idiv ebx - push eax - -end if - - mov eax,.z2 ; calc .dz - sub eax,.z1 - cdq - idiv ebx - push eax - - cmp .x1,0 ; set correctly begin variable - jge @f ; CLIPPING ON FUNCTION - ; cutting triangle exceedes screen - mov ebx,.x1 - neg ebx - imul ebx ; eax = .dz * abs(.x1) - add .z1,eax - mov .x1,0 - - mov eax,.dbx - imul ebx - add .bx1,eax - - mov eax,.dby - imul ebx - add .by1,eax - - mov eax,.dex - imul ebx - add .ex1,eax - - mov eax,.dey - imul ebx - add .ey1,eax - @@: - movzx eax,word[size_x_var] ;SIZE_X ;word[size_x_var] - mov ebx,.x2 - cmp eax,ebx - jg @f - mov .x2,eax - @@: - ; movd mm0,eax - ; movd mm1,.x2 - ; pminsw mm0,mm1 - ; movd .x2,mm0 - ; cmp .x2,SIZE_X ;eax | - ; jl @f |> this dont work idk cause - ; mov .x2,SIZE_X ;eax | - @@: - ; movzx eax,word[size_x_var] ;calc memory begin in buffers - mov ebx,.y - mul ebx - mov ebx,.x1 - add eax,ebx - mov ebx,eax - lea eax,[eax*3] - add edi,eax ; edi - screen - mov esi,.z_buff ; z-buffer filled with dd variables - shl ebx,2 - add esi,ebx ; esi - Z buffer - - mov ecx,.x2 - sub ecx,.x1 - ; init current variables - push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi - push dword .ex1 - push dword .by1 - push dword .ey1 - - push .z1 ; current z shl CATMULL_SHIFT - push esi - -if Ext >= MMX - pxor mm0,mm0 - movq mm3,.cex ; hi - lo -> cbx; cex - movq mm4,.cey ; hi - lo -> cby; cey -; movq mm5,mm3 -; movq mm6,mm4 -; psrad mm5,ROUND -; psrad mm6,ROUND -; movq .ceyq,mm5 -; movq .cbyq,mm6 - mov edx,.czbuff -else - cld -end if - .draw: - ; if TEX = SHIFTING ;bump drawing only in shifting mode -if Ext=NON - mov esi,.czbuff ; .czbuff current address in buffer - mov ebx,.cz ; .cz - cur z position - cmp ebx,dword[esi] -else - mov ebx,.cz - cmp ebx,dword[edx] -end if - jge .skip - -if Ext=NON - mov eax,.cby - mov esi,.cbx - sar eax,ROUND - sar esi,ROUND - shl eax,TEX_SHIFT ;- - add esi,eax - lea esi,[esi*3] ;- ; esi - current b. texture addres - add esi,.bmap - - mov ebx,.cex ;.cex - current env map X - mov eax,.cey ;.cey - current env map y - sar ebx,ROUND - sar eax,ROUND - - shl eax,TEX_SHIFT - add ebx,eax - lea ebx,[ebx*3] - add ebx,.emap - - -else - movq mm5,mm4 ;.cey - psrad mm5,ROUND - pslld mm5,TEX_SHIFT - movq mm6,mm3 ;.cex - psrad mm6,ROUND - paddd mm5,mm6 - movq mm6,mm5 - paddd mm5,mm5 - paddd mm5,mm6 - paddd mm5,.emap - movd esi,mm5 - psrlq mm5,32 - movd ebx,mm5 -end if -if Ext>=MMX - movd mm1,[esi] - movd mm2,[ebx] - punpcklbw mm1,mm0 - punpcklbw mm2,mm0 - pmullw mm1,mm2 - psrlw mm1,8 - packuswb mm1,mm0 - movd [edi],mm1 - mov ebx,.cz - mov dword[edx],ebx -else - cld ; esi - tex e. - lodsb ; ebx - tex b. - mov dl,[ebx] - mul dl - shr ax,8 - stosb - inc ebx - lodsb - mov dl,[ebx] - mul dl - shr ax,8 - stosb - inc ebx - lodsb - mov dl,[ebx] - mul dl - shr ax,8 - stosb - mov ebx,.cz - mov esi,.czbuff - mov dword[esi],ebx - jmp .no_skip -end if - .skip: - add edi,3 - - if Ext = NON - .no_skip: - add .czbuff,4 - mov eax,.dbx - add .cbx,eax - mov eax,.dby - add .cby,eax - mov eax,.dex - add .cex,eax - mov eax,.dey - add .cey,eax - else - add edx,4 - paddd mm3,.dex - paddd mm4,.dey - ; movq mm5,mm3 - ; movq mm6,mm4 - ; psrad mm5,ROUND - ; psrad mm6,ROUND - ; movq .cex,mm3 - ; movq .cey,mm4 - end if - mov eax,.dz - add .cz,eax - if Ext = NON - dec ecx - jnz .draw - else - loop .draw - end if - - .bl_end: - mov esp,ebp -ret 56 - + +;SIZE_X equ 350 +;SIZE_Y equ 350 +;ROUND equ 8 +;TEX_X equ 512 +;TEX_Y equ 512 +;TEXTURE_SIZE EQU (512*512)-1 +;TEX_SHIFT EQU 9 + +;CATMULL_SHIFT equ 8 +;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 +;Ext = SSE +;SSE = 3 +;MMX = 1 +;NON = 0 +;use32 +;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- +;------- DOS 13h mode demos -------------------------------------------- +;------- Procedure draws triangle with two overlapped textures, I use -- +;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- +;--------I calc texture pixel by this way: col1*col2/256 --------------- +two_tex_triangle_z: +;------------------in - eax - x1 shl 16 + y1 ----------- +;---------------------- ebx - x2 shl 16 + y2 ----------- +;---------------------- ecx - x3 shl 16 + y3 ----------- +;---------------------- edx - pointer to b. texture----- +;---------------------- esi - pointer to e. texture----- +;---------------------- edi - pointer to screen buffer-- +;---------------------- stack : b. tex coordinates------ +;---------------------- e. tex coordinates------ +;---------------------- Z position coordinates-- +;---------------------- pointer io Z buffer----- +;-- Z-buffer - filled with coordinates as dword -------- +;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- +.b_x1 equ ebp+4 ; procedure don't save registers !!! +.b_y1 equ ebp+6 ; each coordinate as word +.b_x2 equ ebp+8 +.b_y2 equ ebp+10 ; b - first texture +.b_x3 equ ebp+12 +.b_y3 equ ebp+14 ; e - second texture +.e_x1 equ ebp+16 +.e_y1 equ ebp+18 +.e_x2 equ ebp+20 +.e_y2 equ ebp+22 +.e_x3 equ ebp+24 +.e_y3 equ ebp+26 +.z1 equ word[ebp+28] +.z2 equ word[ebp+30] +.z3 equ word[ebp+32] +.z_buff equ dword[ebp+34] ; pointer to Z-buffer + + +.t_bmap equ dword[ebp-4] ; pointer to b. texture +.t_emap equ dword[ebp-8] ; pointer to e. texture +.x1 equ word[ebp-10] +.y1 equ word[ebp-12] +.x2 equ word[ebp-14] +.y2 equ word[ebp-16] +.x3 equ word[ebp-18] +.y3 equ word[ebp-20] + +.dx12 equ dword[ebp-24] +.dbx12 equ dword[ebp-28] +.dby12 equ dword[ebp-32] +.dby12q equ [ebp-32] +.dex12 equ dword[ebp-36] +.dey12 equ dword[ebp-40] +.dey12q equ [ebp-40] +.dz12 equ dword[ebp-44] + +.dx13 equ dword[ebp-48] +.dbx13 equ dword[ebp-52] +.dby13 equ dword[ebp-56] +.dby13q equ [ebp-56] +.dex13 equ dword[ebp-60] +.dey13 equ dword[ebp-64] +.dey13q equ [ebp-64] +.dz13 equ dword[ebp-68] + +.dx23 equ dword[ebp-72] +.dbx23 equ dword[ebp-76] +.dby23 equ dword[ebp-80] +.dby23q equ [ebp-80] +.dex23 equ dword[ebp-84] +.dey23 equ dword[ebp-88] +.dey23q equ [ebp-88] +.dz23 equ dword[ebp-92] + +.cx1 equ dword[ebp-96] ; current variables +.cx2 equ dword[ebp-100] +.cbx1 equ dword[ebp-104] +.cby1 equ [ebp-108] +.cex1 equ dword[ebp-112] +.cey1 equ [ebp-116] +.cbx2 equ dword[ebp-120] +.cby2 equ [ebp-124] +.cex2 equ dword[ebp-128] +.cey2 equ [ebp-132] + +.cz1 equ dword[ebp-136] +.cz2 equ dword[ebp-140] + + if Ext >= MMX + emms + else + cld + end if + mov ebp,esp + push edx esi ; store bump map +; push esi ; store e. map + ; sub esp,120 + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + mov edx,dword[.b_x1] + xchg edx,dword[.b_x2] + mov dword[.b_x1],edx + mov edx,dword[.e_x1] + xchg edx,dword[.e_x2] + mov dword[.e_x1],edx + mov dx,.z1 + xchg dx,.z2 + mov .z1,dx + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + mov edx,dword[.b_x2] + xchg edx,dword[.b_x3] + mov dword[.b_x2],edx + mov edx,dword[.e_x2] + xchg edx,dword[.e_x3] + mov dword[.e_x2],edx + mov dx,.z2 + xchg dx,.z3 + mov .z2,dx + jmp .sort3 + .sort2: + push eax ebx ecx ; store triangle coords in variables +; push ebx +; push ecx + + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .loop23_done + ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that + ; or edx,ebx ; if any *one* of them is negative a sign flag is raised + ; or edx,ecx + ; test edx,80000000h ; Check only X + ; jne .loop23_done + + ; cmp .x1,SIZE_X ; { + ; jg .loop23_done + ; cmp .x2,SIZE_X ; This can be optimized with effort + ; jg .loop23_done + ; cmp .x3,SIZE_X + ; jg .loop23_done ; { + + + mov bx,.y2 ; calc delta 12 + sub bx,.y1 + jnz .bt_dx12_make + mov ecx,6 + xor edx,edx + @@: + push edx ;dword 0 + loop @b + jmp .bt_dx12_done + .bt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx +; mov .dx12,eax + push eax + +if Ext=SSE + + sub esp,16 + cvtsi2ss xmm3,ebx ;rcps + ; mov eax,255 + cvtsi2ss xmm4,[i255d] ;eax + divss xmm3,xmm4 + rcpss xmm3,xmm3 + ; mulss xmm3,xmm4 + shufps xmm3,xmm3,0 + + movd mm0,[.b_x1] + movd mm1,[.b_x2] + movd mm2,[.e_x1] + movd mm3,[.e_x2] + ; psubsw mm3,mm2 + ; psubsw mm1,mm0 + pxor mm4,mm4 + punpcklwd mm0,mm4 + punpcklwd mm1,mm4 + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + ; pslld mm0,ROUND + ; pslld mm1,ROUND + ; pslld mm2,ROUND + ; pslld mm3,ROUND + cvtpi2ps xmm0,mm0 + movlhps xmm0,xmm0 + cvtpi2ps xmm0,mm2 + cvtpi2ps xmm1,mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + subps xmm1,xmm0 + + ; pxor mm4,mm4 + ; movq mm5,mm1 + ; movq mm6,mm1 + ; pcmpeqb mm5,mm4 +; psubd mm1,mm0 +; psubd mm3,mm2 + + ; movq mm0,[.b_x1] ; bx1 by1 bx2 by2 + ; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2 + ; pxor + ; punpcklhd mm0,mm1 ; lwd ; + ; psubw mm1,mm0 ; mm1, mm0 + ; pxor mm2,mm2 + ; pmovmaskb eax,mm1 + ; and eax,10101010b + ; pcmpgtw mm2,mm1 + ; punpcklwd mm1,mm2 + ; psllw mm0,ROUND + ; psllw mm1,ROUND + ; movq mm2,mm0 + ; psrlq mm0,32 + +; cvtpi2ps xmm0,mm1 +; movlhps xmm0,xmm0 +; cvtpi2ps xmm0,mm3 + ; divps xmm1,xmm3 + mulps xmm1,xmm3 + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dey12q,mm0 + movq .dby12q,mm1 + +; movd .dex12,mm0 +; psrlq mm0,32 +; movd .dey12,mm0 +; movhlps xmm1,xmm1 +; cvtps2pi mm0,xmm1 +; movd .dbx12,mm0 +; psrlq mm0,32 +; movd .dby12,mm0 + +else + mov ax,word[.b_x2] + sub ax,word[.b_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dbx12,eax + push eax + + mov ax,word[.b_y2] + sub ax,word[.b_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dby12,eax + push eax + + ; mov eax,.dbx12 + ; mov ebx,.dby12 + ; int3 + + mov ax,word[.e_x2] + sub ax,word[.e_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dex12,eax + push eax + + mov ax,word[.e_y2] + sub ax,word[.e_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dey12,eax + push eax + +end if + mov ax,.z2 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + push eax + .bt_dx12_done: + + mov bx,.y3 ; calc delta13 + sub bx,.y1 + jnz .bt_dx13_make + mov ecx,6 + xor edx,edx + @@: + push edx ;dword 0 + loop @b + jmp .bt_dx13_done + .bt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx13,eax + push eax + +if Ext=SSE + + cvtsi2ss xmm3,ebx + ; mov eax,255 + cvtsi2ss xmm4,[i255d] + divss xmm3,xmm4 + rcpss xmm3,xmm3 +; mulss xmm3,xmm4 + shufps xmm3,xmm3,0 + sub esp,16 + + movd mm0,[.b_x1] + movd mm1,[.b_x3] + movd mm2,[.e_x1] + movd mm3,[.e_x3] + + pxor mm4,mm4 + punpcklwd mm0,mm4 + punpcklwd mm1,mm4 + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + + cvtpi2ps xmm0,mm0 + movlhps xmm0,xmm0 + cvtpi2ps xmm0,mm2 + cvtpi2ps xmm1,mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + subps xmm1,xmm0 + + ; divps xmm1,xmm3 + mulps xmm1,xmm3 + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dey13q,mm0 + movq .dby13q,mm1 + +else + + mov ax,word[.b_x3] + sub ax,word[.b_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dbx13,eax + push eax + + mov ax,word[.b_y3] + sub ax,word[.b_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dby13,eax + push eax + + mov ax,word[.e_x3] + sub ax,word[.e_x1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dex13,eax + push eax + + mov ax,word[.e_y3] + sub ax,word[.e_y1] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dey13,eax + push eax + +end if + + mov ax,.z3 + sub ax,.z1 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + ; mov .dz13,eax + push eax + .bt_dx13_done: + + mov bx,.y3 ; calc delta23 + sub bx,.y2 + jnz .bt_dx23_make + mov ecx,6 + xor edx,edx + @@: + push edx ;dword 0 + loop @b + jmp .bt_dx23_done + .bt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND + cdq + idiv ebx + ; mov .dx23,eax + push eax + +if Ext=SSE + + cvtsi2ss xmm3,ebx + ; mov eax,255 + cvtsi2ss xmm4,[i255d] ;eax + divss xmm3,xmm4 + shufps xmm3,xmm3,0 + sub esp,16 + + movd mm0,[.b_x2] + movd mm1,[.b_x3] + movd mm2,[.e_x2] + movd mm3,[.e_x3] + + pxor mm4,mm4 + punpcklwd mm0,mm4 + punpcklwd mm1,mm4 + punpcklwd mm2,mm4 + punpcklwd mm3,mm4 + + cvtpi2ps xmm0,mm0 + movlhps xmm0,xmm0 + cvtpi2ps xmm0,mm2 + cvtpi2ps xmm1,mm1 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,mm3 + subps xmm1,xmm0 + + divps xmm1,xmm3 + shufps xmm1,xmm1,10110001b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dey23q,mm0 + movq .dby23q,mm1 + +else + + mov ax,word[.b_x3] + sub ax,word[.b_x2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dbx23,eax + push eax + + mov ax,word[.b_y3] + sub ax,word[.b_y2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dby23,eax + push eax + + mov ax,word[.e_x3] + sub ax,word[.e_x2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dex23,eax + push eax + + mov ax,word[.e_y3] + sub ax,word[.e_y2] + cwde + shl eax,ROUND + cdq + idiv ebx + ; mov .dey23,eax + push eax +end if + mov ax,.z3 + sub ax,.z2 + cwde + shl eax,CATMULL_SHIFT + cdq + idiv ebx + ; mov .dz23,eax + push eax + ; sub esp,40 + .bt_dx23_done: + movsx eax,.x1 + shl eax,ROUND + ; mov .cx1,eax + ; mov .cx2,eax + push eax eax + ; push eax + + movsx eax,word[.b_x1] + shl eax,ROUND + mov .cbx1,eax + mov .cbx2,eax + ; push eax eax + ; push eax + + movsx eax,word[.b_y1] + shl eax,ROUND + mov .cby1,eax + mov .cby2,eax + ; push eax eax + ; push eax + + movsx eax,word[.e_x1] + shl eax,ROUND + mov .cex1,eax + mov .cex2,eax + ; push eax eax + ;push eax + + movsx eax,word[.e_y1] + shl eax,ROUND + mov .cey1,eax + mov .cey2,eax + sub esp,32 + ; push eax eax + ;push eax + + movsx eax,.z1 + shl eax,CATMULL_SHIFT + ; mov .cz1,eax + ; mov .cz2,eax + push eax eax + ;push eax + + movsx ecx,.y1 + cmp cx,.y2 + jge .loop12_done + .loop12: + call .call_line + + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx12 + add .cx2,ebx +if Ext>= SSE2 + movups xmm0,.cey1 + movups xmm1,.cey2 + movups xmm2,.dey12q + movups xmm3,.dey13q + paddd xmm0,xmm3 + paddd xmm1,xmm2 + movups .cey1,xmm0 + movups .cey2,xmm1 +else if (Ext = MMX) | (Ext=SSE) + movq mm0,.cby2 ; with this optimization object + movq mm1,.cby1 ; looks bit annoying + movq mm2,.cey2 + movq mm3,.cey1 + paddd mm0,.dby12q + paddd mm1,.dby13q + paddd mm2,.dey12q + paddd mm3,.dey13q + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey1,mm3 + movq .cey2,mm2 +else + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx12 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby12 + add .cby2,edx + + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex12 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey12 + add .cey2,eax + +end if + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz12 + add .cz2,edx + + inc ecx + cmp cx,.y2 + jl .loop12 + .loop12_done: + + movsx ecx,.y2 + cmp cx,.y3 + jge .loop23_done + + movsx eax,.z2 + shl eax,CATMULL_SHIFT + mov .cz2,eax + + movsx eax,.x2 + shl eax,ROUND + mov .cx2,eax + + movzx eax,word[.b_x2] + shl eax,ROUND + mov .cbx2,eax + + movzx eax,word[.b_y2] + shl eax,ROUND + mov .cby2,eax + + movzx eax,word[.e_x2] + shl eax,ROUND + mov .cex2,eax + + movzx eax,word[.e_y2] + shl eax,ROUND + mov .cey2,eax + + .loop23: + call .call_line +;if Ext = NON + mov eax,.dx13 + add .cx1,eax + mov ebx,.dx23 + add .cx2,ebx +if Ext>= SSE2 + movups xmm0,.cey1 + movups xmm1,.cey2 + movups xmm2,.dey23q + movups xmm3,.dey13q + paddd xmm0,xmm3 + paddd xmm1,xmm2 + movups .cey1,xmm0 + movups .cey2,xmm1 +else if (Ext = MMX) | ( Ext = SSE) + movq mm0,.cby2 ; with this mmx optimization object looks bit + movq mm1,.cby1 ; annoying + movq mm2,.cey2 + movq mm3,.cey1 + paddd mm0,.dby23q + paddd mm1,.dby13q + paddd mm2,.dey23q + paddd mm3,.dey13q + movq .cby2,mm0 + movq .cby1,mm1 + movq .cey2,mm2 + movq .cey1,mm3 + +else + mov edx,.dbx13 + add .cbx1,edx + mov eax,.dbx23 + add .cbx2,eax + mov ebx,.dby13 + add .cby1,ebx + mov edx,.dby23 + add .cby2,edx + + mov eax,.dex13 + add .cex1,eax + mov ebx,.dex23 + add .cex2,ebx + mov edx,.dey13 + add .cey1,edx + mov eax,.dey23 + add .cey2,eax +end if + + mov ebx,.dz13 + add .cz1,ebx + mov edx,.dz23 + add .cz2,edx +;else +; movq mm0,.db13q +; movq mm1,.cbx1q + + inc ecx + cmp cx,.y3 + jl .loop23 + .loop23_done: + + mov esp,ebp +ret 34 + +.call_line: + + pushad + + push .cz1 + push .cz2 + push .z_buff + push .t_bmap + push .t_emap + push dword .cey2 + push .cex2 + push dword .cey1 + push .cex1 + push dword .cby2 + push .cbx2 + push dword .cby1 + push .cbx1 + push ecx + + mov eax,.cx1 + sar eax,ROUND + mov ebx,.cx2 + sar ebx,ROUND + + call two_tex_line_z + + popad +ret +two_tex_line_z: +;--------------in: eax - x1 +;-------------- ebx - x2 +;-------------- edi - pointer to screen buffer +;stack - another parameters : +.y equ dword [ebp+4] +.bx1 equ [ebp+8] ; --- +.by1 equ [ebp+12] ; | +.bx2 equ [ebp+16] ; | +.by2 equ [ebp+20] ; |> b. texture and e. texture coords +.ex1 equ [ebp+24] ; |> shifted shl ROUND +.ey1 equ [ebp+28] ; | +.ex2 equ [ebp+32] ; | +.ey2 equ [ebp+36] ; --- +.emap equ [ebp+40] ; b texture offset +.bmap equ [ebp+44] ; e texture offset +.z_buff equ dword [ebp+48] +.z2 equ dword [ebp+52] ; -- |> z coords shifted +.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT + +.x1 equ dword [ebp-4] +.x2 equ dword [ebp-8] +.dbx equ [ebp-12] +.dex equ [ebp-16] +.dby equ [ebp-20] +.dey equ [ebp-24] +.dz equ dword [ebp-28] +.cbx equ [ebp-32] +.cex equ [ebp-36] +.cby equ [ebp-40] +.cey equ [ebp-44] +.cz equ dword [ebp-48] +.czbuff equ dword [ebp-52] + + mov ebp,esp + + mov ecx,.y + or ecx,ecx + jl .bl_end + mov dx,word[size_y_var] + dec dx + cmp cx,dx ;word[size_y_var] ;SIZE_Y + jge .bl_end + + cmp eax,ebx + jl @f + je .bl_end + + xchg eax,ebx +if Ext=NON + mov edx,.bx1 + xchg edx,.bx2 + mov .bx1,edx + mov edx,.by1 + xchg edx,.by2 + mov .by1,edx + + mov edx,.ex1 + xchg edx,.ex2 + mov .ex1,edx + mov edx,.ey1 + xchg edx,.ey2 + mov .ey1,edx +else + movq mm0,.bx1 + movq mm1,.ex1 + movq mm2,.bx2 + movq mm3,.ex2 + movq .bx2,mm0 + movq .ex2,mm1 + movq .bx1,mm2 + movq .ex1,mm3 +end if + mov edx,.z1 + xchg edx,.z2 + mov .z1,edx + @@: + push eax ebx ;store x1, x2 + mov ebx,.x1 + movzx edx,word[size_x_var] + dec edx + cmp ebx,edx + ; cmp bx,word[size_x_var] ;SIZE_X + jg .bl_end + cmp .x2,0 + jle .bl_end + + mov ebx,.x2 + sub ebx,.x1 + +if Ext >= SSE + + sub esp,16 + cvtsi2ss xmm3,ebx ;rcps + shufps xmm3,xmm3,0 + + ; movq mm0,.bx1q + ; movq mm1,.bx2q + ; movq mm2,.ex1q + ; movq mm3,.ex2q + ; psubd mm1,mm0 + ; psubd mm3,mm2 + ; cvtpi2ps xmm1,mm1 + ; movlhps xmm1,xmm1 + ; cvtpi2ps xmm1,mm3 + + cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1 + movlhps xmm0,xmm0 + cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1 + cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2 + movlhps xmm1,xmm1 + cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2 + subps xmm1,xmm0 + ; hi lo + divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex + + shufps xmm1,xmm1,11011000b + cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords + movhlps xmm1,xmm1 + cvtps2pi mm1,xmm1 + movq .dex,mm0 ; hi - lo -> dbx, dex + movq .dey,mm1 ; hi - lo -> dby, dey + +else + + mov eax,.bx2 ; calc .dbx + sub eax,.bx1 + cdq + idiv ebx + push eax + + mov eax,.ex2 ; calc .dby + sub eax,.ex1 + cdq + idiv ebx + push eax + + mov eax,.by2 ; calc .dex + sub eax,.by1 + cdq + idiv ebx + push eax + + mov eax,.ey2 ; calc .dey + sub eax,.ey1 + cdq + idiv ebx + push eax + +end if + + mov eax,.z2 ; calc .dz + sub eax,.z1 + cdq + idiv ebx + push eax + + cmp .x1,0 ; set correctly begin variable + jge @f ; CLIPPING ON FUNCTION + ; cutting triangle exceedes screen + mov ebx,.x1 + neg ebx + imul ebx ; eax = .dz * abs(.x1) + add .z1,eax + mov .x1,0 + + mov eax,.dbx + imul ebx + add .bx1,eax + + mov eax,.dby + imul ebx + add .by1,eax + + mov eax,.dex + imul ebx + add .ex1,eax + + mov eax,.dey + imul ebx + add .ey1,eax + @@: + movzx eax,word[size_x_var] ;SIZE_X ;word[size_x_var] + mov ebx,.x2 + cmp eax,ebx + jg @f + mov .x2,eax + @@: + ; movd mm0,eax + ; movd mm1,.x2 + ; pminsw mm0,mm1 + ; movd .x2,mm0 + ; cmp .x2,SIZE_X ;eax | + ; jl @f |> this dont work idk cause + ; mov .x2,SIZE_X ;eax | + @@: + ; movzx eax,word[size_x_var] ;calc memory begin in buffers + mov ebx,.y + mul ebx + mov ebx,.x1 + add eax,ebx + mov ebx,eax + lea eax,[eax*3] + add edi,eax ; edi - screen + mov esi,.z_buff ; z-buffer filled with dd variables + shl ebx,2 + add esi,ebx ; esi - Z buffer + + mov ecx,.x2 + sub ecx,.x1 + ; init current variables + push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi + push dword .ex1 + push dword .by1 + push dword .ey1 + + push .z1 ; current z shl CATMULL_SHIFT + push esi + +if Ext >= MMX + pxor mm0,mm0 + movq mm3,.cex ; hi - lo -> cbx; cex + movq mm4,.cey ; hi - lo -> cby; cey +; movq mm5,mm3 +; movq mm6,mm4 +; psrad mm5,ROUND +; psrad mm6,ROUND +; movq .ceyq,mm5 +; movq .cbyq,mm6 + mov edx,.czbuff +else + cld +end if + .draw: + ; if TEX = SHIFTING ;bump drawing only in shifting mode +if Ext=NON + mov esi,.czbuff ; .czbuff current address in buffer + mov ebx,.cz ; .cz - cur z position + cmp ebx,dword[esi] +else + mov ebx,.cz + cmp ebx,dword[edx] +end if + jge .skip + +if Ext=NON + mov eax,.cby + mov esi,.cbx + sar eax,ROUND + sar esi,ROUND + shl eax,TEX_SHIFT ;- + add esi,eax + lea esi,[esi*3] ;- ; esi - current b. texture addres + add esi,.bmap + + mov ebx,.cex ;.cex - current env map X + mov eax,.cey ;.cey - current env map y + sar ebx,ROUND + sar eax,ROUND + + shl eax,TEX_SHIFT + add ebx,eax + lea ebx,[ebx*3] + add ebx,.emap + + +else + movq mm5,mm4 ;.cey + psrad mm5,ROUND + pslld mm5,TEX_SHIFT + movq mm6,mm3 ;.cex + psrad mm6,ROUND + paddd mm5,mm6 + movq mm6,mm5 + paddd mm5,mm5 + paddd mm5,mm6 + paddd mm5,.emap + movd esi,mm5 + psrlq mm5,32 + movd ebx,mm5 +end if +if Ext>=MMX + movd mm1,[esi] + movd mm2,[ebx] + punpcklbw mm1,mm0 + punpcklbw mm2,mm0 + pmullw mm1,mm2 + psrlw mm1,8 + packuswb mm1,mm0 + movd [edi],mm1 + mov ebx,.cz + mov dword[edx],ebx +else + cld ; esi - tex e. + lodsb ; ebx - tex b. + mov dl,[ebx] + mul dl + shr ax,8 + stosb + inc ebx + lodsb + mov dl,[ebx] + mul dl + shr ax,8 + stosb + inc ebx + lodsb + mov dl,[ebx] + mul dl + shr ax,8 + stosb + mov ebx,.cz + mov esi,.czbuff + mov dword[esi],ebx + jmp .no_skip +end if + .skip: + add edi,3 + + if Ext = NON + .no_skip: + add .czbuff,4 + mov eax,.dbx + add .cbx,eax + mov eax,.dby + add .cby,eax + mov eax,.dex + add .cex,eax + mov eax,.dey + add .cey,eax + else + add edx,4 + paddd mm3,.dex + paddd mm4,.dey + ; movq mm5,mm3 + ; movq mm6,mm4 + ; psrad mm5,ROUND + ; psrad mm6,ROUND + ; movq .cex,mm3 + ; movq .cey,mm4 + end if + mov eax,.dz + add .cz,eax + if Ext = NON + dec ecx + jnz .draw + else + loop .draw + end if + + .bl_end: + mov esp,ebp +ret 56 + diff --git a/programs/demos/view3ds/view3ds.asm b/programs/demos/view3ds/view3ds.asm index 98ae5bc568..9f7050d8e0 100644 --- a/programs/demos/view3ds/view3ds.asm +++ b/programs/demos/view3ds/view3ds.asm @@ -1,5 +1,5 @@ -; application : View3ds ver. 0.071 - tiny .3ds and .asc files viewer +; application : View3ds ver. 0.074 - tiny .3ds and .asc files viewer ; with a few graphics effects demonstration. ; compiler : FASM ; system : KolibriOS @@ -38,6 +38,9 @@ SSE = 2 SSE2 = 3 SSE3 = 4 Ext = SSE3 ;Ext={ NON | MMX | SSE | SSE2 | SSE3 } +; For now correct only SSE2 and SSE3 versions. if you have older CPU +; use older versions of app. Probably ver 005 will be OK but it need +; re-edit to support new Kolibri features. ; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) USE_LFN = 1 ; App is Kolibri only now. @@ -54,6 +57,8 @@ use32 dd 0x0 ; I_Icon START: ; start of execution + rdtsc + mov [rand_seed],ax cld push dword (SIZE_Y shr 3) * 3 fninit @@ -90,9 +95,7 @@ START: ; start of execution ; set point(0,0,0) in center and calc all coords ; to be in <-1.0,1.0> call normalize_all_light_vectors - if Ext >= SSE3 call copy_lights ; to aligned float - end if call init_triangles_normals2 call init_point_normals call init_envmap2 @@ -100,16 +103,24 @@ START: ; start of execution call generate_texture2 call init_sincos_tab call do_color_buffer ; intit color_map + if Ext >= SSE3 + call init_point_lights + mov [fire_flag],0 ; proteza + end if mov edi,bumpmap call calc_bumpmap call calc_bumpmap_coords ; bump and texture mapping call do_edges_list call draw_window - ;mov [draw_win_at_first],0 - ;mov eax,40 ; set events mask - ;mov ebx,1100000000000000000000000100111b - ;int 0x40 - + if Ext > SSE2 + mov eax,1 + cpuid + bt ecx,0 ; is sse3 on board? + jc @f + mov [max_dr_flg],12 + mov [isSSE3],0 + @@: + end if still: cmp [edit_flag],1 @@ -122,7 +133,16 @@ still: mov ebx,111b .int: int 0x40 + if Ext > SSE2 + cmp [ray_shd_flag],1 + jne @f + cmp [isSSE3],1 + jne @f + mov eax,10 + jmp .intt + end if + @@: mov eax,23 mov ebx,TIMEOUT cmp [speed_flag],0 @@ -134,6 +154,7 @@ still: mov eax,10 @@: + .intt: int 0x40 cmp eax,1 ; redraw event ? @@ -263,8 +284,9 @@ still: jne .next_m5 ; 'grd ' 1 call make_random_lights ; 'env ' 2 call normalize_all_light_vectors ; 'bump' 3 - if Ext >= SSE3 call copy_lights + if Ext >= SSE3 + call init_point_lights ; for ex. ray casting end if call do_color_buffer ; intit color_map ; 'tex ' 4 @@ -396,7 +418,7 @@ still: cmp [move_flag],0 jne @f .x_minus: - sub [vect_x],10 + sub word[vect_x],10 jmp .next2 @@: cmp [move_flag],1 @@ -414,7 +436,7 @@ still: cmp [move_flag],0 jne @f .x_plus: - add [vect_x],10 + add word[vect_x],10 jmp .next3 @@: cmp [move_flag],1 @@ -522,8 +544,6 @@ still: .no_sort: cmp [dr_flag],7 ; fill if 2tex and texgrd jge @f - cmp [catmull_flag],0 ;non fill if Catmull = off - je .non_f cmp [dr_flag],6 ; non fill if dots je .non_f @@: @@ -534,44 +554,72 @@ still: call draw_dots jmp .blurrr @@: + if Ext > SSE2 + cmp [ray_shd_flag],1 ;non fill if Catmull = off + jne @f + cmp [isSSE3],1 + jne @f + mov ax,100 + jmp .dr + @@: + end if + + movzx ax,[dr_flag] + .dr: call draw_triangles ; draw all triangles from the list cmp [edit_flag],0 jz .no_edit call clear_vertices_index - call draw_handlers + movzx eax,[dr_flag] + movzx ebx,[ray_shd_flag] + shl ebx,10 + or eax,ebx + call draw_handlers ; call edit - - - .no_edit: .blurrr: - cmp [sinus_flag],0 - je @f - call do_sinus + movzx eax,[dr_flag] + movzx ebx,[ray_shd_flag] + shl ebx,10 + or eax,ebx + cmp [sinus_flag],0 + je .no_sin + movzx eax,[dr_flag] + movzx ebx,[ray_shd_flag] + shl ebx,10 + or eax,ebx + call do_sinus + ; jmp .finito + .no_sin: @@: - cmp [fire_flag],0 - jne @f + movzx ecx,[fire_flag] + cmp [fire_flag],1 + je @f cmp [blur_flag],0 je .no_blur ; no blur, no fire movzx ecx,[blur_flag] + @@: + movzx eax,[dr_flag] + movzx ebx,[ray_shd_flag] + shl ebx,10 + or eax,ebx call blur_screen ; blur and fire - jmp .no_blur - @@: - cmp [emboss_flag],0 - jne .emb ; if emboss=true -> no fire - movzx ecx,[fire_flag] - call blur_screen ; blur and fire + ; jmp .finito + .no_blur: ; no blur, no fire cmp [emboss_flag],0 je @f - .emb: + movzx eax,[dr_flag] + movzx ebx,[ray_shd_flag] + shl ebx,10 + or eax,ebx call do_emboss - - @@: + .finito: + @@: cmp [inc_bright_flag],0 ; increase brightness @@ -706,7 +754,9 @@ end if mov eax,7 ; put image mov ebx,[screen_ptr] mov ecx,[size_y_var] - mov edx,[offset_y] + mov edx,[offset_y] + cmp [ray_shd_flag],1 + jge .ff cmp [dr_flag],11 jge .ff int 0x40 @@ -764,6 +814,7 @@ include "3r_phg.inc" include '3stencil.inc' include '3glass.inc' include '3glass_tex.inc' +include '3ray_shd.inc' end if clear_vertices_index: mov edi,[vertices_index_ptr] @@ -1075,7 +1126,7 @@ calc_bumpmap_coords: ; map texture, bump fldpi fadd st,st mov esi,[points_ptr] - mov edi,tex_points + mov edi,[tex_points_ptr] mov ecx,[points_count_var] inc ecx ; cmp [map_tex_flag],1 @@ -1389,7 +1440,8 @@ do_color_buffer: ; do color buffer for Gouraud, flat shading mov esp,ebp pop ebp ret -if Ext >= SSE3 + +if Ext >= SSE2 init_point_normals: .z equ dword [ebp-8] .y equ dword [ebp-12] @@ -1397,7 +1449,6 @@ init_point_normals: .point_number equ dword [ebp-28] .hit_faces equ dword [ebp-32] - fninit push ebp mov ebp,esp sub esp,64 @@ -1438,19 +1489,25 @@ init_point_normals: jne .ipn_check_face cvtsi2ss xmm6,.hit_faces movaps xmm7,.x + rcpss xmm6,xmm6 shufps xmm6,xmm6,11000000b mulps xmm7,xmm6 - movaps xmm6,xmm7 - mulps xmm6,xmm6 - andps xmm6,[zero_hgst_dd] - haddps xmm6,xmm6 - haddps xmm6,xmm6 - rsqrtps xmm6,xmm6 - mulps xmm7,xmm6 movlps [edi],xmm7 movhlps xmm7,xmm7 movss [edi+8],xmm7 + call normalize_vector + ; movaps xmm6,xmm7 + ; mulps xmm6,xmm6 + ; andps xmm6,[zero_hgst_dd] + ; haddps xmm6,xmm6 + ; haddps xmm6,xmm6 + ; rsqrtps xmm6,xmm6 + ; mulps xmm7,xmm6 + ; movlps [edi],xmm7 + ; movhlps xmm7,xmm7 + ; movss [edi+8],xmm7 + add edi,12 inc .point_number mov edx,.point_number @@ -1576,11 +1633,9 @@ init_triangles_normals2: pop ecx sub ecx,1 jnz @b - ; cmp dword[ebp],-1 - ; jne @b ret -if Ext >= SSE3 + copy_lights: ; after normalising ! mov esi,lights mov edi,lights_aligned @@ -1610,7 +1665,7 @@ copy_lights: ; after normalising ! pop ecx loop .again ret -end if + clrscr: mov edi,[screen_ptr] @@ -1654,6 +1709,36 @@ ret draw_triangles: +; in: eax - render draw model + .tri_no equ dword[ebp-60] + .point_index3 equ [ebp-8] + .point_index2 equ [ebp-12] + .point_index1 equ [ebp-16] + .yy3 equ [ebp-18] + .xx3 equ [ebp-20] + .yy2 equ [ebp-22] + .xx2 equ [ebp-24] + .yy1 equ [ebp-26] + .xx1 equ [ebp-28] + + .zz3 equ [ebp-30] + .zz2 equ [ebp-32] + .zz1 equ [ebp-34] + .index3x12 equ [ebp-38] + .index2x12 equ [ebp-42] + .index1x12 equ [ebp-46] + .temp1 equ dword[ebp-50] + .temp2 equ dword[ebp-54] + .dr_flag equ word[ebp-56] + + + push ebp + mov ebp,esp + sub esp,60 + + ; movzx ax,[dr_flag] + mov .dr_flag,ax + emms ; update translated list MMX required @@ -1671,7 +1756,8 @@ draw_triangles: movd dword[eax],mm1 @@: if Ext >= SSE3 - cmp [dr_flag],13 + + cmp .dr_flag,13 jnge .no_stencil mov esi,[triangles_ptr] mov ecx,[triangles_count_var] @@ -1734,200 +1820,134 @@ draw_triangles: je .draw_smooth_line mov esi,[triangles_ptr] - mov ecx,[triangles_count_var] + xor ecx,ecx ;mov ecx,[triangles_count_var] .again_dts: + ; push ebp + push esi push ecx - mov ebp,[points_translated_ptr] - if Ext >= SSE2 - mov eax,dword[esi] - mov [point_index1],eax - lea eax,[eax*3] - add eax,eax - push ebp - add ebp,eax - mov eax,[ebp] - ; cmp [vertex_edit_no],0 - ; jne @f - ; - ; @@: - mov dword[xx1],eax - mov eax,[ebp+4] - mov [zz1],ax + mov .tri_no,ecx - pop ebp + mov eax,[esi] + mov ebx,[esi+4] + mov ecx,[esi+8] + + mov .point_index1,eax + mov .point_index2,ebx + mov .point_index3,ecx + imul eax,[i12] + imul ebx,[i12] + imul ecx,[i12] + mov .index1x12,eax + mov .index2x12,ebx + mov .index3x12,ecx + + shr eax,1 + shr ebx,1 + shr ecx,1 + add eax,[points_translated_ptr] + add ebx,[points_translated_ptr] + add ecx,[points_translated_ptr] + push word[eax+4] + push word[ebx+4] + push word[ecx+4] + pop word .zz3 + pop word .zz2 + pop word .zz1 + + mov eax,[eax] + mov ebx,[ebx] + mov ecx,[ecx] + ror eax,16 + ror ebx,16 + ror ecx,16 + mov .xx1,eax + mov .xx2,ebx + mov .xx3,ecx - mov eax,dword[esi+4] - mov [point_index2],eax - lea eax,[eax*3] - add eax,eax - push ebp - add ebp,eax - mov eax,[ebp] - mov dword[xx2],eax - mov eax,[ebp+4] - mov [zz2],ax - pop ebp - - mov eax,dword[esi+8] ; xyz3 = [ebp+[esi+4]*6] - mov [point_index3],eax - lea eax,[eax*3] - add eax,eax - ; push ebp - add ebp,eax - mov eax,[ebp] - mov dword[xx3],eax - mov eax,[ebp+4] - mov [zz3],ax - else - movq mm0,[esi] ; don't know MMX - mov qword[point_index1],mm0 - ; shr eax,16 - ; mov [point_index2],ax - mov eax,dword[esi+8] - mov [point_index3],eax - movdqu xmm0,[esi] - paddd xmm0,xmm0 - movdqa xmm1,xmm0 - paddd xmm0,xmm0 - paddd xmm0,xmm1 - movd eax,xmm0 - psrldq xmm0,4 - movd ebx,xmm0 - psrldq xmm0,4 - movd ecx,xmm0 - and eax,0FFFFh - and ebx,0FFFFh - and ecx,0FFFFh - movq mm0,[ebp+eax] - movq mm1,[ebp+ebx] - movq mm2,[ebp+ecx] - movq qword[xx1],mm0 - movq qword[xx2],mm1 - movq qword[xx3],mm2 -; emms - end if ; ********************************* - if 0 - cmp [vertex_edit_no],0 - jne .no_edit - mov ax,[vertex_edit_no] - dec ax - cmp ax,[point_index1] - jne @f - movd mm0,[edit_start_x] - psubw mm0,[edit_end_x] - movd mm1,dword[xx1] - paddw mm1,mm0 - movd dword[xx1],mm1 - jmp .no_edit - @@: - - cmp ax,[point_index2] - jne @f - movd mm0,[edit_start_x] - psubw mm0,[edit_end_x] - movd mm1,dword[xx2] - paddw mm1,mm0 - movd dword[xx2],mm1 - jmp .no_edit - @@: - - cmp ax,[point_index3] - jne @f - movd mm0,[edit_start_x] - psubw mm0,[edit_end_x] - movd mm1,dword[xx3] - paddw mm1,mm0 - movd dword[xx3],mm1 - jmp .no_edit - @@: - - - .no_edit: -end if - - push esi ; + ; push esi fninit ; DO culling AT FIRST cmp [culling_flag],1 ; (if culling_flag = 1) jne .no_culling - mov esi,point_index1 ; ********************************* + lea esi,.point_index1 ; ********************************* mov ecx,3 ; @@: mov eax,dword[esi] lea eax,[eax*3] shl eax,2 add eax,[points_normals_rot_ptr] -; lea eax,[eax+point_normals_rotated] - fld dword[eax+8] ; ***************************** - ftst ; CHECKING OF Z COOFICIENT OF - fstsw ax ; NORMAL VECTOR - sahf - jb @f - ffree st + mov eax,[eax+8] + bt eax,31 + jc @f + ; ***************************** + ; CHECKING OF Z COOFICIENT OF + ; NORMAL VECTOR + add esi,4 loop @b jmp .end_draw ; non visable @@: - ffree st ;is visable + .no_culling: - cmp [dr_flag],0 ; draw type flag + cmp .dr_flag,0 ; draw type flag je .flat_draw - cmp [dr_flag],2 + cmp .dr_flag,2 je .env_mapping - cmp [dr_flag],3 + cmp .dr_flag,3 je .bump_mapping - cmp [dr_flag],4 + cmp .dr_flag,4 je .tex_mapping - cmp [dr_flag],5 + cmp .dr_flag,5 je .rainbow - cmp [dr_flag],7 + cmp .dr_flag,7 je .grd_tex - cmp [dr_flag],8 + cmp .dr_flag,8 je .two_tex - cmp [dr_flag],9 + cmp .dr_flag,9 je .bump_tex - cmp [dr_flag],10 + cmp .dr_flag,10 je .cubic_env_mapping - cmp [dr_flag],11 + cmp .dr_flag,11 je .draw_smooth_line if Ext >= SSE3 - cmp [dr_flag],12 + cmp .dr_flag,12 je .r_phg - cmp [dr_flag],13 + cmp .dr_flag,13 je .glass - cmp [dr_flag],14 + cmp .dr_flag,14 je .glass_tex - end if ; **************** - mov esi,point_index3 ; do Gouraud shading + cmp .dr_flag,100 + je .ray_shd + + end if + + push ebp ; **************** + lea esi,.index3x12 ; do Gouraud shading + lea edi,.zz3 mov ecx,3 .again_grd_draw: mov eax,dword[esi] - shl eax,2 - lea eax,[eax*3] add eax,[points_normals_rot_ptr] ; texture x=(rotated point normal -> x * 255)+255 fld dword[eax] ; x cooficient of normal vector fimul [correct_tex] fiadd [correct_tex] - fistp [temp1] + fistp .temp1 ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] ; y cooficient fimul [correct_tex] fiadd [correct_tex] - fistp [temp2] + fistp .temp2 - mov eax,[temp2] - mov ebx,[temp1] + mov eax,.temp2 + mov ebx,.temp1 and ebx,0xfffffff shl eax,TEX_SHIFT add eax,ebx lea eax,[eax*3+color_map] mov eax,dword[eax] - ; cmp [catmull_flag],1 ; put on stack z coordinate if necessary - ; jne @f - lea edx,[ecx*3] - push word[edx*2+xx1-2] ; zz1 ,2 ,3 - ; @@: + push word[edi] ; zz1 ,2 ,3 + ror eax,16 ; eax -0xxxrrggbb -> 0xggbbxxrr xor ah,ah push ax ;r @@ -1938,98 +1958,55 @@ end if push ax ;b sub esi,4 + sub edi,2 dec cx jnz .again_grd_draw jmp .both_draw - ; movzx edi,[point_index3] ;gouraud shading according to light vector - ; lea edi,[edi*3] - ; lea edi,[4*edi+point_normals_rotated] ; edi - normal - ; mov esi,light_vector - ; call dot_product - ; fabs - ; fimul [orginal_color_r] - ; fistp [temp_col] - ; and [temp_col],0x00ff - ; push [temp_col] - ; push [temp_col] - ; push [temp_col] - - ; movzx edi,[point_index2] - ; lea edi,[edi*3] - ; lea edi,[4*edi+point_normals_rotated] ; edi - normal - ; mov esi,light_vector - ; call dot_product - ; fabs - ; fimul [orginal_color_r] - ; fistp [temp_col] - ; and [temp_col],0x00ff - ; push [temp_col] - ; push [temp_col] - ; push [temp_col] - - ; movzx edi,[point_index1] - ; lea edi,[edi*3] - ; lea edi,[4*edi+point_normals_rotated] ; edi - normal - ; mov esi,light_vector - ; call dot_product - ; fabs - ; fimul [orginal_color_r] - ; fistp [temp_col] - ; and [temp_col],0x00ff - ; push [temp_col] - ; push [temp_col] - ; push [temp_col] .rainbow: - ; cmp [catmull_flag],1 ; put on stack z coordinate if necessary - ; jne @f - push [zz3] - @@: - mov eax,dword[yy3] + push ebp + push word .zz3 + + mov eax, .xx3 + ror eax,16 mov ebx,0x00ff00ff and eax,ebx push eax neg al push ax - push [zz2] + push word .zz2 - mov eax,dword[yy2] + mov eax, .xx2 + ror eax,16 and eax,ebx push eax neg al push ax - push [zz1] + push word .zz1 - mov eax,dword[yy1] + mov eax, .xx1 + ror eax,16 and eax,ebx push eax neg al push ax .both_draw: - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax, .xx1 + mov ebx, .xx2 + mov ecx, .xx3 mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] call gouraud_triangle_z + pop ebp jmp .end_draw .flat_draw: ;************************** fninit ; FLAT DRAWING - mov eax,[point_index1] - mov ebx,[point_index2] - mov ecx,[point_index3] - shl eax,2 - shl ebx,2 - shl ecx,2 - lea eax,[eax*3] ;+point_normals_rotated] + mov eax,.index1x12 + mov ebx,.index2x12 + mov ecx,.index3x12 add eax,[points_normals_rot_ptr] - lea ebx,[ebx*3] ;+point_normals_rotated] add ebx,[points_normals_rot_ptr] - lea ecx,[ecx*3] ;+point_normals_rotated] add ecx,[points_normals_rot_ptr] fld dword[eax] ; x cooficient of normal vector fadd dword[ebx] @@ -2037,19 +2014,19 @@ end if fidiv [i3] fimul [correct_tex] fiadd [correct_tex] - fistp [temp1] ;dword[esp-4] ; x temp variables + fistp .temp1 ;dword[esp-4] ; x temp variables fld dword[eax+4] ; y cooficient of normal vector fadd dword[ebx+4] fadd dword[ecx+4] fidiv [i3] fimul [correct_tex] fiadd [correct_tex] - fistp [temp2] ;dword[esp-8] ; y - mov edx,[temp2] ;dword[esp-8] + fistp .temp2 ;dword[esp-8] ; y + mov edx,.temp2 ;dword[esp-8] and edx,0xfffffff - and [temp1],0xfffffff + and .temp1,0xfffffff shl edx,TEX_SHIFT - add edx,[temp1] ;dword[esp-4] + add edx,.temp1 ;dword[esp-4] lea eax,[3*edx] add eax,color_map @@ -2071,34 +2048,32 @@ end if ; shl eax,8 ; mov edx,eax - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax,dword .xx1 + mov ebx,dword .xx2 + mov ecx,dword .xx3 mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] - push word[zz3] - push word[zz2] - push word[zz1] + push ebp + push word .zz3 + push word .zz2 + push word .zz1 call flat_triangle_z + pop ebp jmp .end_draw .env_mapping: - push [zz3] - push [zz2] - push [zz1] + push ebp + push word .zz3 + push word .zz2 + push word .zz1 - mov esi,point_index1 + lea esi, .index1x12 sub esp,12 mov edi,esp mov ecx,3 @@: mov eax,dword[esi] - lea eax,[eax*3] - shl eax,2 add eax,[points_normals_rot_ptr] ;point_normals_rotated ; texture x=(rotated point normal -> x * 255)+255 fld dword[eax] @@ -2115,33 +2090,29 @@ end if add esi,4 loop @b - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax, .xx1 + mov ebx,dword .xx2 + mov ecx,dword .xx3 mov edi,[screen_ptr] mov esi,envmap mov edx,[Zbuffer_ptr] call tex_triangle_z - + pop ebp jmp .end_draw ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .cubic_env_mapping: - push [zz3] - push [zz2] - push [zz1] + push ebp + push word .zz3 + push word .zz2 + push word .zz1 - mov esi,point_index1 + lea esi,.index1x12 sub esp,12 mov edi,esp mov ecx,3 @@: mov eax,dword[esi] - lea eax,[eax*3] - shl eax,2 add eax,[points_normals_rot_ptr] fld dword[eax] @@ -2171,37 +2142,32 @@ end if add esi,4 loop @b - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax, .xx1 + mov ebx, .xx2 + mov ecx, .xx3 mov edi,[screen_ptr] mov esi,envmap_cub mov edx,[Zbuffer_ptr] call tex_triangle_z - + pop ebp jmp .end_draw ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .bump_mapping: - + push ebp push [Zbuffer_ptr] - push [zz3] - push [zz2] - push [zz1] + push word .zz3 + push word .zz2 + push word .zz1 - mov esi,point_index1 + lea esi,.index1x12 sub esp,12 mov edi,esp mov ecx,3 @@: mov eax,dword[esi] - lea eax,[eax*3] - shl eax,2 add eax,[points_normals_rot_ptr] ;point_normals_rotated ; texture x=(rotated point normal -> x * 255)+255 fld dword[eax] @@ -2218,70 +2184,58 @@ end if add esi,4 loop @b - mov esi,[point_index3] ; bump map coords + mov esi, .point_index3 ; bump map coords shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index2] + mov esi, .point_index2 shl esi,2 - add esi,tex_points -; lea esi,[esi*3] -; lea esi,[points+2+esi*2] + add esi,[tex_points_ptr] push dword[esi] - ; push dword[xx2] - mov esi,[point_index1] + mov esi, .point_index1 shl esi,2 - add esi,tex_points -; lea esi,[esi*3] -; lea esi,[points+2+esi*2] + add esi,[tex_points_ptr] push dword[esi] - ; push dword[xx1] - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax,dword .xx1 + mov ebx,dword .xx2 + mov ecx,dword .xx3 mov edi,[screen_ptr] mov esi,envmap mov edx,bumpmap ;BUMP_MAPPING call bump_triangle_z - + pop ebp jmp .end_draw .tex_mapping: - - push [zz3] - push [zz2] - push [zz1] + push ebp + push word .zz3 + push word .zz2 + push word .zz1 ; @@: - mov esi,[point_index3] ; tex map coords + mov esi, .point_index3 ; tex map coords shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index2] + mov esi, .point_index2 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index1] + mov esi, .point_index1 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax,dword .xx1 + mov ebx,dword .xx2 + mov ecx,dword .xx3 mov edi,[screen_ptr] mov esi,texmap - mov edx,[Zbuffer_ptr] + mov edx,[Zbuffer_ptr] call tex_triangle_z - + pop ebp jmp .end_draw ; .ray: ; grd_triangle according to points index @@ -2319,49 +2273,43 @@ end if .grd_tex: ; smooth shading + texture push ebp - mov ebp,esp - sub esp,4 - push ebp - mov esi,[point_index3] ; tex map coords + mov esi, .point_index3 ; tex map coords shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] ; texture coords as first - mov esi,[point_index2] ; group of parameters + mov esi, .point_index2 ; group of parameters shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index1] + mov esi, .point_index1 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,point_index3 + lea esi, .index3x12 + lea edi, .zz3 mov ecx,3 + .aagain_grd_draw: - .aagain_grd_draw: - - lea edx,[ecx*3] - push word[edx*2+xx1-2] ; zz1 ,2 ,3 + push word[edi] ; zz1 ,2 ,3 fninit mov eax,dword[esi] - shl eax,2 - lea eax,[eax*3] ;+point_normals_rotated] add eax,[points_normals_rot_ptr] ; texture x=(rotated point normal -> x * 255)+255 fld dword[eax] ; x cooficient of normal vector fimul [correct_tex] fiadd [correct_tex] - fistp [temp1] ;word[ebp-2] + fistp .temp1 ;word[ebp-2] ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] ; y cooficient fimul [correct_tex] fiadd [correct_tex] - fistp [temp2] ;word[ebp-4] + fistp .temp2 ;word[ebp-4] - mov eax,[temp2] ;word[ebp-4] - mov ebx,[temp1] ;word[ebp-2] - and ebx,0xfffffff ; some onjects need thid 'and' + mov eax,.temp2 + mov ebx,.temp1 + and ebx,0xfffffff ; some onjects need this 'and' shl eax,TEX_SHIFT add eax,ebx lea eax,[eax*3] @@ -2376,49 +2324,45 @@ end if push ax ;g shr eax,24 push ax ;b - + sub edi,2 sub esi,4 dec cx jnz .aagain_grd_draw - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax, .xx1 + mov ebx, .xx2 + mov ecx, .xx3 mov edi,[screen_ptr] mov edx,texmap mov esi,[Zbuffer_ptr] call tex_plus_grd_triangle - pop ebp - mov esp,ebp pop ebp jmp .end_draw .two_tex: + push ebp push [Zbuffer_ptr] - push word[zz3] - push word[zz2] - push word[zz1] + push word .zz3 + push word .zz2 + push word .zz1 - mov esi,[point_index3] ; tex map coords + mov esi, .point_index3 ; tex map coords shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index2] + mov esi, .point_index2 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index1] + mov esi, .point_index1 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,point_index1 ; env coords + lea esi, .point_index1 ; env coords sub esp,12 mov edi,esp mov ecx,3 @@ -2443,50 +2387,46 @@ end if add esi,4 loop @b - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax, .xx1 + mov ebx, .xx2 + mov ecx, .xx3 mov edi,[screen_ptr] mov esi,texmap mov edx,envmap call two_tex_triangle_z + pop ebp jmp .end_draw .bump_tex: - mov esi,[point_index3] ; tex map coords + push ebp + mov esi, .point_index3 ; tex map coords shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index2] + mov esi, .point_index2 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index1] + mov esi, .point_index1 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] push dword texmap push [Zbuffer_ptr] - xor edi,edi - push word[zz3] - push word[zz2] - push word[zz1] + push word .zz3 + push word .zz2 + push word .zz1 - mov esi,point_index1 ; env coords + lea esi, .index1x12 ; env coords sub esp,12 mov edi,esp mov ecx,3 @@: mov eax,dword[esi] - lea eax,[eax*3] - shl eax,2 add eax,[points_normals_rot_ptr] ; texture x=(rotated point normal -> x * 255)+255 fld dword[eax] @@ -2503,40 +2443,28 @@ end if add esi,4 loop @b -; push dword 1 shl 16 + 1 ; emap coords -; push dword 127 shl 16 + 1 -; push dword 127 shl 16 + 127 - - mov esi,[point_index3] ; bump map coords + mov esi, .point_index3 ; bump map coords shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index2] + mov esi, .point_index2 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] + push dword[esi] + mov esi, .point_index1 + shl esi,2 + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index1] - shl esi,2 - add esi,tex_points - push dword[esi] - -; push dword 1 shl 16 + 127 -; push dword 127 shl 16 + 127 -; push dword 1 shl 16 + 1 ; bump coords - - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax,dword .xx1 + mov ebx,dword .xx2 + mov ecx,dword .xx3 mov edi,[screen_ptr] mov esi,envmap mov edx,bumpmap call bump_tex_triangle_z - + pop ebp jmp .end_draw @@ -2549,12 +2477,9 @@ if Ext >= SSE3 pshufd xmm5,xmm5,01110011b - mov eax,[point_index1] - mov ebx,[point_index2] - mov ecx,[point_index3] - imul eax,[i12] - imul ebx,[i12] - imul ecx,[i12] + mov eax, .index1x12 + mov ebx, .index2x12 + mov ecx, .index3x12 add eax,[points_normals_rot_ptr] add ebx,[points_normals_rot_ptr] add ecx,[points_normals_rot_ptr] @@ -2566,12 +2491,9 @@ if Ext >= SSE3 andps xmm2,[zero_hgst_dd] xorps xmm3,xmm3 - mov eax,[point_index1] - mov ebx,[point_index2] - mov ecx,[point_index3] - imul eax,[i12] - imul ebx,[i12] - imul ecx,[i12] + mov eax, .index1x12 + mov ebx, .index2x12 + mov ecx, .index3x12 add eax,[points_rotated_ptr] add ebx,[points_rotated_ptr] add ecx,[points_rotated_ptr] @@ -2584,12 +2506,9 @@ if Ext >= SSE3 - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax,dword .xx1 + mov ebx,dword .xx2 + mov ecx,dword .xx3 mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] @@ -2603,12 +2522,9 @@ if Ext >= SSE3 pshufd xmm5,xmm5,01110011b - mov eax,[point_index1] - mov ebx ,[point_index2] - mov ecx,[point_index3] - imul eax,[i12] - imul ebx,[i12] - imul ecx,[i12] + mov eax, .index1x12 + mov ebx, .index2x12 + mov ecx, .index3x12 add eax,[points_normals_rot_ptr] add ebx,[points_normals_rot_ptr] add ecx,[points_normals_rot_ptr] @@ -2620,12 +2536,9 @@ if Ext >= SSE3 andps xmm2,[zero_hgst_dd] xorps xmm3,xmm3 - mov eax,[point_index1] - mov ebx,[point_index2] - mov ecx,[point_index3] - imul eax,[i12] - imul ebx,[i12] - imul ecx,[i12] + mov eax, .index1x12 + mov ebx, .index2x12 + mov ecx, .index3x12 add eax,[points_rotated_ptr] add ebx,[points_rotated_ptr] add ecx,[points_rotated_ptr] @@ -2638,12 +2551,9 @@ if Ext >= SSE3 - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax, .xx1 + mov ebx, .xx2 + mov ecx, .xx3 mov edi,[screen_ptr] mov edx,[Zbuffer_ptr] mov esi,[Zbuffer_ptr] @@ -2657,12 +2567,9 @@ if Ext >= SSE3 punpcklwd xmm5,[the_zero] pshufd xmm5,xmm5,01110011b - mov eax,[point_index1] - mov ebx,[point_index2] - mov ecx,[point_index3] - imul eax,[i12] - imul ebx,[i12] - imul ecx,[i12] + mov eax, .index1x12 + mov ebx, .index2x12 + mov ecx, .index3x12 add eax,[points_normals_rot_ptr] add ebx,[points_normals_rot_ptr] add ecx,[points_normals_rot_ptr] @@ -2674,12 +2581,9 @@ if Ext >= SSE3 andps xmm2,[zero_hgst_dd] xorps xmm3,xmm3 - mov eax,[point_index1] - mov ebx,[point_index2] - mov ecx,[point_index3] - imul eax,[i12] - imul ebx,[i12] - imul ecx,[i12] + mov eax, .index1x12 + mov ebx, .index2x12 + mov ecx, .index3x12 add eax,[points_rotated_ptr] add ebx,[points_rotated_ptr] add ecx,[points_rotated_ptr] @@ -2690,17 +2594,17 @@ if Ext >= SSE3 add esp,12 andps xmm4,[zero_hgst_dd] - mov esi,[point_index3] ; tex map coords + mov esi,.point_index3 ; tex map coords shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index2] + mov esi,.point_index2 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] - mov esi,[point_index1] + mov esi,.point_index1 shl esi,2 - add esi,tex_points + add esi,[tex_points_ptr] push dword[esi] movups xmm6,[esp] add esp,12 @@ -2714,31 +2618,84 @@ if Ext >= SSE3 por xmm6,xmm7 - mov eax,dword[xx1] - ror eax,16 - mov ebx,dword[xx2] - ror ebx,16 - mov ecx,dword[xx3] - ror ecx,16 + mov eax,dword .xx1 + mov ebx,dword .xx2 + mov ecx,dword .xx3 mov edx,texmap mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] call glass_tex_tri + jmp .end_draw + + .ray_shd: + emms + movd xmm5,[size_y_var] + punpcklwd xmm5,[the_zero] + pshufd xmm5,xmm5,01110011b + + mov eax, .index1x12 + mov ebx, .index2x12 + mov ecx, .index3x12 + add eax,[points_normals_rot_ptr] + add ebx,[points_normals_rot_ptr] + add ecx,[points_normals_rot_ptr] + movups xmm0,[eax] + movups xmm1,[ebx] + movups xmm2,[ecx] + andps xmm0,[zero_hgst_dd] + andps xmm1,[zero_hgst_dd] + andps xmm2,[zero_hgst_dd] + xorps xmm3,xmm3 + + ; mov ebx,.tri_no + ; cmp ebx,0 + ; je @f + ; int3 + ; @@: + mov eax, .index1x12 + mov ebx, .index2x12 + mov ecx, .index3x12 + add eax,[points_rotated_ptr] + add ebx,[points_rotated_ptr] + add ecx,[points_rotated_ptr] + push dword[ecx+8] + push dword[ebx+8] + push dword[eax+8] + movups xmm4,[esp] + add esp,12 + andps xmm4,[zero_hgst_dd] + + movd mm7,.tri_no + + ; mm7 - intialised + + + mov eax,dword .xx1 + mov ebx,dword .xx2 + mov ecx,dword .xx3 + mov edx,texmap + mov edi,[screen_ptr] + mov esi,[Zbuffer_ptr] + + call ray_shad end if .end_draw: - pop esi - add esi,12 - + ; pop ebp pop ecx - dec ecx + pop esi + add esi,12 + inc ecx + cmp ecx,[triangles_count_var] jnz .again_dts -ret + + jmp .eend + .draw_smooth_line: @@ -2789,7 +2746,8 @@ ret sub esp,16 movups [esp],xmm1 add esi,4 - loop .aga_n + dec ecx + jnz .aga_n movups xmm0,[esp] movups xmm1,[esp+16] @@ -2807,11 +2765,17 @@ ret movhps xmm7,[edx] pshufd xmm7,xmm7,11101000b movdqa xmm6,xmm7 + movdqa xmm3,xmm7 + movdqa xmm4,xmm7 movd xmm5,[size_y_var] pshuflw xmm5,xmm5,00010001b + pcmpeqw xmm3,xmm5 + pcmpeqw xmm4,[the_zero] pcmpgtw xmm7,xmm5 pcmpgtw xmm6,[the_zero] pxor xmm7,xmm6 + pxor xmm3,xmm4 + pxor xmm7,xmm3 pmovmskb eax,xmm7 cmp al,-1 jnz .skp @@ -2851,7 +2815,17 @@ ret cmp ecx,[edges_count] jnz .again_s_line - ret + + + + + + .eend: + add esp,60 + pop ebp + +ret + @@ -2859,21 +2833,22 @@ ret draw_handlers: - + ; in eax - render model push ebp mov ebp,esp .counter equ ebp-16 .xres3m18 equ ebp-8 .xres2m12 equ ebp-12 + .dr_model equ dword[ebp-4] ; init counter sub esp,12 push dword 0 - + mov .dr_model,eax movzx eax,word[size_x_var] - cmp [dr_flag],12 + cmp .dr_model,12 jge @f lea ebx,[eax*3] sub ebx,18 @@ -2931,7 +2906,7 @@ draw_handlers: add eax,ebx push eax lea edi,[eax*3] - cmp [dr_flag],12 + cmp .dr_model,12 jl @f add edi,[esp] @@: @@ -2956,7 +2931,7 @@ draw_handlers: mov byte[edi+2],0xff ;al mov word[eax],dx add eax,2 - cmp [dr_flag],12 + cmp .dr_model,12 jl @f add edi,4 loop .do @@ -3226,7 +3201,6 @@ read_from_file: .exit: mov dword[edi],-1 ret - alloc_mem_for_tp: mov eax, 68 cmp [re_alloc_flag],1 @@ -3292,6 +3266,14 @@ alloc_mem_for_tp: int 0x40 mov [points_rotated_ptr], eax + mov eax, 68 + mov ebx, 12 + mov ecx, [points_count_var] + shl ecx,2 + mov edx,[tex_points_ptr] + int 0x40 + mov [tex_points_ptr], eax + mov eax, 68 mov ecx, [points_count_var] inc ecx @@ -3302,7 +3284,6 @@ alloc_mem_for_tp: ret - read_from_disk: mov eax, 68 mov ebx, 11 @@ -3348,11 +3329,11 @@ buttons: ; draw some buttons (all but navig mov edi,menu .again: mov eax,8 ; function 8 : define and draw button - mov bx,[size_x_var] + movzx ebx,word[size_x_var] shl ebx,16 add ebx,(10)*65536+62 ; [x start] *65536 + [x size] movzx ecx,byte[edi] ; button id = position+2 - sub cl,2 + sub ecx,2 lea ecx,[ecx*5] lea ecx,[ecx*3] add ecx,25 @@ -3364,10 +3345,10 @@ buttons: ; draw some buttons (all but navig ; BUTTON LABEL mov eax,4 ; function 4 : write text to window movzx ebx,byte[edi] - sub bl,2 ; button id, according to position + sub ebx,2 ; button id, according to position lea ebx,[ebx*3] lea ebx,[ebx*5] - mov cx,[size_x_var] + movzx ecx,word[size_x_var] shl ecx,16 add ebx,ecx add ebx,(12)*65536+28 ; [x start] *65536 + [y start] @@ -3459,6 +3440,9 @@ ret ; ******* WINDOW DEFINITIONS AND DRAW ******** ; ********************************************* draw_window: + movzx eax,[fire_flag] + push eax + ; int3 mov eax,12 ; function 12:tell os about windowdraw mov ebx,1 ; 1, start of draw int 0x40 @@ -3492,6 +3476,7 @@ ret ; add edx,130*65536+60 ; [x start] *65536 + [y start] ; mov esi,0x00ddeeff ; font 1 & color ( 0xF0RRGGBB ) ; int 0x40 + call write_info ; ADD VECTOR LABEL ; add vector buttons - 30 ++ @@ -3644,6 +3629,8 @@ ret mov eax,12 ; function 12:tell os about windowdraw mov ebx,2 ; 2, end of draw int 0x40 + pop eax + mov [fire_flag],al ret