diff --git a/programs/demos/3DS/3GLASS.INC b/programs/demos/3DS/3GLASS.INC new file mode 100644 index 0000000000..0b2efcc358 --- /dev/null +++ b/programs/demos/3DS/3GLASS.INC @@ -0,0 +1,550 @@ +; Glass like rendering triangle by Maciej Guba. +; http://macgub.hekko.pl, macgub3@wp.pl + +ROUND2 equ 10 +glass_tri: +;----procedure render glass like triangle with z coord -- +;----interpolation ( Catmull alghoritm )----------------- +;----I normalize normal vector in every pixel ----------- +;------------------in - eax - x1 shl 16 + y1 ------------ +;---------------------- ebx - x2 shl 16 + y2 ------------ +;---------------------- ecx - x3 shl 16 + y3 ------------ +;---------------------- edx - ptr to stencil_buff ------- +;---------------------- esi - pointer to Z-buffer filled- +;---------------------- with dd float variables-------- +;---------------------- edi - pointer to screen buffer--- +;---------------------- xmm0 - 1st normal vector -------- +;---------------------- xmm1 - 2cond normal vector ------ +;---------------------- xmm2 - 3rd normal vector -------- +;---------------------- xmm3 - normalized light vector -- +;---------------------- xmm4 - lo -> hi z1, z2, z3 coords +;---------------------- as dwords floats --------------- +;---------------------- xmm5 - lo -> hi y_min, y_max, --- +;---------------------- x_min, x_max as dword integers - +;---------------------- stack - no parameters ----------- +;-------------------------------------------------------- +;----------------- procedure don't save registers !! ---- + + + + + push ebp + mov ebp,esp + sub esp,512 + sub ebp,16 + and ebp,0xfffffff0 + + .1_nv equ [ebp-16] + .2_nv equ [ebp-32] + .3_nv equ [ebp-48] + .l_v equ [ebp-64] + .z3 equ [ebp-72] + .z2 equ [ebp-76] + .z1 equ [ebp-80] + .x1 equ [ebp-82] + .y1 equ [ebp-84] + .x2 equ [ebp-86] + .y2 equ [ebp-88] + .x3 equ [ebp-90] + .y3 equ [ebp-92] + .Zbuf equ [ebp-96] + .x_max equ [ebp-100] + .x_min equ [ebp-104] + .y_max equ [ebp-108] + .y_min equ [ebp-112] + .screen equ [ebp-116] + .dx12 equ [ebp-120] + .dx13 equ [ebp-124] + .dx23 equ [ebp-128] + .dn12 equ [ebp-144] + .dn13 equ [ebp-160] + .dn23 equ [ebp-176] + .dz12 equ [ebp-180] + .dz13 equ [ebp-184] + .dz23 equ [ebp-188] + + .cnv1 equ [ebp-208] ; cur normal vectors + .cnv2 equ [ebp-224] + .cz2 equ [ebp-228] + .cz1 equ [ebp-232] + .stencil_buff equ [ebp-236] + + + + + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + shufps xmm4,xmm4,11100001b + movaps xmm6,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm6 + + + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + shufps xmm4,xmm4,11011000b + movaps xmm6,xmm1 + movaps xmm1,xmm2 + movaps xmm2,xmm6 + + jmp .sort3 + + .sort2: + + movaps .z1,xmm4 + mov .y1,eax + mov .y2,ebx + mov .y3,ecx + mov .stencil_buff, edx + + movdqa .y_min,xmm5 +if 1 ; check if at last only fragment + packssdw xmm5,xmm5 ; of triangle is in visable area + pshuflw xmm5,xmm5,11011000b + movdqu xmm7,.y3 + movdqa xmm6,xmm5 + pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min + pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max + movdqa xmm4,xmm7 + pcmpgtw xmm7,xmm5 + pcmpgtw xmm4,xmm6 + pxor xmm7,xmm4 + pmovmskb eax,xmm7 + and eax,0x00aaaaaa + or eax,eax + jz .rpt_loop2_end +end if + movaps .1_nv,xmm0 + movaps .2_nv,xmm1 + movaps .3_nv,xmm2 + movaps .l_v,xmm3 + ; mov .Zbuf,esi + mov .screen,edi + + + + mov bx,.y2 ; calc deltas + sub bx,.y1 + jnz .rpt_dx12_make + + xorps xmm7,xmm7 + mov dword .dx12,0 + mov dword .dz12,0 + movaps .dn12,xmm7 + jmp .rpt_dx12_done + + .rpt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx12,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z2 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz12,xmm5 + + movaps xmm0,.2_nv + subps xmm0,.1_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn12,xmm0 + + + .rpt_dx12_done: + + mov bx,.y3 ; calc deltas + sub bx,.y1 + jnz .rpt_dx13_make + + xorps xmm7,xmm7 + mov dword .dx13,0 + mov dword .dz13,0 + movaps .dn13,xmm7 + jmp .rpt_dx13_done + + .rpt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx13,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz13,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.1_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn13,xmm0 + + .rpt_dx13_done: + + mov bx,.y3 ; calc deltas + sub bx,.y2 + jnz .rpt_dx23_make + + xorps xmm7,xmm7 + mov dword .dx23,0 + mov dword .dz23,0 + movaps .dn23,xmm7 + jmp .rpt_dx23_done + + .rpt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx23,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + subss xmm5,.z2 + divss xmm5,xmm6 + movss .dz23,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.2_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn23,xmm0 + + .rpt_dx23_done: + + + movsx eax,word .x1 + shl eax,ROUND2 + mov ebx,eax + mov edx,.z1 + mov .cz1,edx + mov .cz2,edx + movaps xmm0,.1_nv + movaps .cnv1,xmm0 + movaps .cnv2,xmm0 + + + movsx ecx,word .y1 + cmp cx,.y2 + + jge .rpt_loop1_end + + .rpt_loop1: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edx,.stencil_buff + mov edi,.screen + ; mov esi,.Zbuf + + call glass_line + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movss xmm2,.cz1 + movss xmm3,.cz2 + addps xmm0,.dn13 + addps xmm1,.dn12 + addss xmm2,.dz13 + addss xmm3,.dz12 + add eax,.dx13 + add ebx,.dx12 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y2 + jl .rpt_loop1 + + + + + + .rpt_loop1_end: + movsx ecx,word .y2 + cmp cx,.y3 + jge .rpt_loop2_end + + movsx ebx,word .x2 ; eax - cur x1 + shl ebx,ROUND2 ; ebx - cur x2 + push dword .z2 + pop dword .cz2 + movaps xmm0,.2_nv + movaps .cnv2,xmm0 + + + .rpt_loop2: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edx,.stencil_buff + mov edi,.screen + ; mov esi,.Zbuf + + call glass_line + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movss xmm2,.cz1 + movss xmm3,.cz2 + addps xmm0,.dn13 + addps xmm1,.dn23 + addss xmm2,.dz13 + addss xmm3,.dz23 + add eax,.dx13 + add ebx,.dx23 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y3 + jl .rpt_loop2 + + .rpt_loop2_end: + + add esp,512 + pop ebp + +ret +align 16 +glass_line: +; in: +; xmm0 - normal vector 1 +; xmm1 - normal vect 2 +; xmm3 - lo -> hi z1, z2 coords as dwords floats +; xmm2 - lo -> hi y_min, y_max, x_min, x_max +; as dword integers +; xmm4 - normalized light vector +; eax - x1 +; ebx - x2 +; ecx - y +; edx - stencil buff ptr +; edi - screen buffer +; esi - z buffer ===> not needed in glass rendering + + push ebp + mov ebp,esp + sub esp,256 + sub ebp,16 + and ebp,0xfffffff0 + + .n1 equ [ebp-16] + .n2 equ [ebp-32] + .lv equ [ebp-48] + .lx1 equ [ebp-52] + .lx2 equ [ebp-56] + .z2 equ [ebp-60] + .z1 equ [ebp-64] + .screen equ [ebp-68] + .zbuff equ [ebp-72] + .x_max equ [ebp-74] + .x_min equ [ebp-76] + .y_max equ [ebp-78] + .y_min equ [ebp-80] + .dn equ [ebp-96] + .dz equ [ebp-100] + .y equ [ebp-104] + .cnv equ [ebp-128] + .col_sum_b equ [ebp-136] + .col_sum_g equ [ebp-140] + .col_sum_r equ [ebp-144] + .cur_col equ [ebp-160] + .stencil_buf equ [ebp-164] + + mov .y,ecx + packssdw xmm2,xmm2 + movq .y_min,xmm2 + cmp cx,.y_min + jl .end_rp_line + cmp cx,.y_max + jge .end_rp_line ; + + cmp eax,ebx + je .end_rp_line + jl @f + xchg eax,ebx + movaps xmm7,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm7 + shufps xmm3,xmm3,11100001b + @@: + + cmp ax,.x_max + jge .end_rp_line + cmp bx,.x_min + jle .end_rp_line + movaps .lv,xmm4 + movaps .n1,xmm0 + movaps .n2,xmm1 + mov .lx1,eax + mov .lx2,ebx + mov .stencil_buf,edx + movlps .z1,xmm3 + + sub ebx,eax + cvtsi2ss xmm7,ebx + shufps xmm7,xmm7,0 + subps xmm1,xmm0 + divps xmm1,xmm7 + movaps .dn,xmm1 + psrldq xmm3,4 + subss xmm3,.z1 + divss xmm3,xmm7 + movss .dz,xmm3 + + + + mov ebx,.lx1 + cmp bx,.x_min ; clipping on function4 + jge @f + movzx eax,word .x_min + sub eax,ebx + cvtsi2ss xmm7,eax + shufps xmm7,xmm7,0 + mulss xmm3,xmm7 + mulps xmm1,xmm7 + addss xmm3,.z1 + addps xmm1,.n1 + movsx eax,word .x_min + movss .z1,xmm3 + movaps .n1,xmm1 + mov dword .lx1,eax + + @@: + movzx eax,word .x_max + cmp .lx2,eax + jl @f + mov .lx2,eax + @@: + movzx eax,word[xres_var] + mul dword .y + + + add eax,.lx1 + shl eax,2 + add edi,eax + mov ebx,eax + add ebx,.stencil_buf + + + mov ecx,.lx2 + sub ecx,.lx1 + + + movaps xmm0,.n1 + movss xmm2,.z1 +align 16 + .ddraw: + movaps xmm7,xmm0 + mulps xmm7,xmm7 ; normalize + haddps xmm7,xmm7 + haddps xmm7,xmm7 + rsqrtps xmm7,xmm7 + mulps xmm7,xmm0 + maxps xmm7,[the_zero] + movups .cnv,xmm7 + + mov edx,lights_aligned ; lights_aligned - global variable + xorps xmm1,xmm1 ; instead global can be used .lv - light vect. + + .again_col: + movups xmm7,.cnv + mulps xmm7,[edx] + haddps xmm7,xmm7 + haddps xmm7,xmm7 + if 0 + cmp [bump_flag],1 ; on/off temporaly + ; depend on bump button + je @f + ; stencil + movss xmm5,xmm2 + movss xmm6,xmm2 + addss xmm5,[aprox] + subss xmm6,[aprox] + ; Stencil buffer for now not work as I expected, + ; moreover - it not work at all. + cmpnltss xmm5,dword[ebx] + cmpnltss xmm6,dword[ebx] + xorps xmm5,xmm6 + xorps xmm6,xmm6 + movd eax,xmm5 + cmp eax,-1 + jne .no_reflective + end if + @@: + movaps xmm6,xmm7 + mulps xmm6,xmm6 + mulps xmm6,xmm6 + + mulps xmm6,xmm6 + mulps xmm6,[edx+48] + .no_reflective: + mulps xmm7,[edx+16] + addps xmm7,xmm6 + addps xmm7,[edx+32] + minps xmm7,[mask_255f] ; global + + + maxps xmm1,xmm7 + add edx,64 ; size of one light in aligned list + cmp edx,lights_aligned_end + jl .again_col + cvtps2dq xmm1,xmm1 + movd xmm6,[edi] + packssdw xmm1,xmm1 + packuswb xmm1,xmm1 + paddusb xmm1,xmm6 + movd [edi],xmm1 + + + .skip: + add edi,4 + add ebx,4 ; stencil_buff + addps xmm0,.dn + addss xmm2,.dz + sub ecx,1 + jnz .ddraw + + .end_rp_line: + add esp,256 + pop ebp + +ret diff --git a/programs/demos/3DS/3R_PHG.INC b/programs/demos/3DS/3R_PHG.INC new file mode 100644 index 0000000000..708df3794b --- /dev/null +++ b/programs/demos/3DS/3R_PHG.INC @@ -0,0 +1,528 @@ +; Real Phong's shading implemented if flat assembler +; by Maciej Guba. +; http://macgub.vxm.pl + +ROUND2 equ 10 +real_phong_tri_z: +;----procedure render Phongs shaded triangle with z coord +;----interpolation ( Catmull alghoritm )----------------- +;----I normalize normal vector in every pixel ----------- +;------------------in - eax - x1 shl 16 + y1 ------------ +;---------------------- ebx - x2 shl 16 + y2 ------------ +;---------------------- ecx - x3 shl 16 + y3 ------------ +;---------------------- esi - pointer to Z-buffer filled- +;---------------------- with dd float variables-------- +;---------------------- edi - pointer to screen buffer--- +;---------------------- xmm0 - 1st normal vector -------- +;---------------------- xmm1 - 2cond normal vector ------ +;---------------------- xmm2 - 3rd normal vector -------- +;---------------------- xmm3 - normalized light vector -- +;---------------------- xmm4 - lo -> hi z1, z2, z3 coords +;---------------------- as dwords floats --------------- +;---------------------- xmm5 - lo -> hi y_min, y_max, --- +;---------------------- x_min, x_max as dword integers - +;---------------------- stack - no parameters ----------- +;-------------------------------------------------------- +;----------------- procedure don't save registers !! ---- + + + + + push ebp + mov ebp,esp + sub esp,512 + sub ebp,16 + and ebp,0xfffffff0 + + .1_nv equ [ebp-16] + .2_nv equ [ebp-32] + .3_nv equ [ebp-48] + .l_v equ [ebp-64] + .z3 equ [ebp-72] + .z2 equ [ebp-76] + .z1 equ [ebp-80] + .x1 equ [ebp-82] + .y1 equ [ebp-84] + .x2 equ [ebp-86] + .y2 equ [ebp-88] + .x3 equ [ebp-90] + .y3 equ [ebp-92] + .Zbuf equ [ebp-96] + .x_max equ [ebp-100] + .x_min equ [ebp-104] + .y_max equ [ebp-108] + .y_min equ [ebp-112] + .screen equ [ebp-116] + .dx12 equ [ebp-120] + .dx13 equ [ebp-124] + .dx23 equ [ebp-128] + .dn12 equ [ebp-144] + .dn13 equ [ebp-160] + .dn23 equ [ebp-176] + .dz12 equ [ebp-180] + .dz13 equ [ebp-184] + .dz23 equ [ebp-188] + + .cnv1 equ [ebp-208] ; cur normal vectors + .cnv2 equ [ebp-224] + .cz2 equ [ebp-228] + .cz1 equ [ebp-232] + + + + + + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + shufps xmm4,xmm4,11100001b + movaps xmm6,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm6 + + + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + shufps xmm4,xmm4,11011000b + movaps xmm6,xmm1 + movaps xmm1,xmm2 + movaps xmm2,xmm6 + + jmp .sort3 + + .sort2: + + movaps .z1,xmm4 + mov .y1,eax + mov .y2,ebx + mov .y3,ecx + + movdqa .y_min,xmm5 +if 1 ; check if at last only fragment + packssdw xmm5,xmm5 ; of triangle is in visable area + pshuflw xmm5,xmm5,11011000b + movdqu xmm7,.y3 + movdqa xmm6,xmm5 + pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min + pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max + movdqa xmm4,xmm7 + pcmpgtw xmm7,xmm5 + pcmpgtw xmm4,xmm6 + pxor xmm7,xmm4 + pmovmskb eax,xmm7 + and eax,0x00aaaaaa + or eax,eax + jz .rpt_loop2_end +end if + movaps .1_nv,xmm0 + movaps .2_nv,xmm1 + movaps .3_nv,xmm2 + movaps .l_v,xmm3 + mov .Zbuf,esi + mov .screen,edi + + + + mov bx,.y2 ; calc deltas + sub bx,.y1 + jnz .rpt_dx12_make + + xorps xmm7,xmm7 + mov dword .dx12,0 + mov dword .dz12,0 + movaps .dn12,xmm7 + jmp .rpt_dx12_done + + .rpt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx12,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z2 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz12,xmm5 + + movaps xmm0,.2_nv + subps xmm0,.1_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn12,xmm0 + + + .rpt_dx12_done: + + mov bx,.y3 ; calc deltas + sub bx,.y1 + jnz .rpt_dx13_make + + xorps xmm7,xmm7 + mov dword .dx13,0 + mov dword .dz13,0 + movaps .dn13,xmm7 + jmp .rpt_dx13_done + + .rpt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx13,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz13,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.1_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn13,xmm0 + + .rpt_dx13_done: + + mov bx,.y3 ; calc deltas + sub bx,.y2 + jnz .rpt_dx23_make + + xorps xmm7,xmm7 + mov dword .dx23,0 + mov dword .dz23,0 + movaps .dn23,xmm7 + jmp .rpt_dx23_done + + .rpt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx23,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + subss xmm5,.z2 + divss xmm5,xmm6 + movss .dz23,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.2_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn23,xmm0 + + .rpt_dx23_done: + + + movsx eax,word .x1 + shl eax,ROUND2 + mov ebx,eax + mov edx,.z1 + mov .cz1,edx + mov .cz2,edx + movaps xmm0,.1_nv + movaps .cnv1,xmm0 + movaps .cnv2,xmm0 + + + movsx ecx,word .y1 + cmp cx,.y2 + + jge .rpt_loop1_end + + .rpt_loop1: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edi,.screen + mov esi,.Zbuf + + call real_phong_line_z + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movss xmm2,.cz1 + movss xmm3,.cz2 + addps xmm0,.dn13 + addps xmm1,.dn12 + addss xmm2,.dz13 + addss xmm3,.dz12 + add eax,.dx13 + add ebx,.dx12 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y2 + jl .rpt_loop1 + + + + + + .rpt_loop1_end: + movsx ecx,word .y2 + cmp cx,.y3 + jge .rpt_loop2_end + + movsx ebx,word .x2 ; eax - cur x1 + shl ebx,ROUND2 ; ebx - cur x2 + push dword .z2 + pop dword .cz2 + movaps xmm0,.2_nv + movaps .cnv2,xmm0 + + + .rpt_loop2: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edi,.screen + mov esi,.Zbuf + + call real_phong_line_z + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movss xmm2,.cz1 + movss xmm3,.cz2 + addps xmm0,.dn13 + addps xmm1,.dn23 + addss xmm2,.dz13 + addss xmm3,.dz23 + add eax,.dx13 + add ebx,.dx23 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y3 + jl .rpt_loop2 + + .rpt_loop2_end: + + add esp,512 + pop ebp + +ret +align 16 +real_phong_line_z: +; in: +; xmm0 - normal vector 1 +; xmm1 - normal vect 2 +; xmm3 - lo -> hi z1, z2 coords as dwords floats +; xmm2 - lo -> hi y_min, y_max, x_min, x_max +; as dword integers +; xmm4 - normalized light vector +; eax - x1 +; ebx - x2 +; ecx - y +; edi - screen buffer +; esi - z buffer filled with dd floats + + push ebp + mov ebp,esp + sub esp,160 + sub ebp,16 + and ebp,0xfffffff0 + + .n1 equ [ebp-16] + .n2 equ [ebp-32] + .lv equ [ebp-48] + .lx1 equ [ebp-52] + .lx2 equ [ebp-56] + .z2 equ [ebp-60] + .z1 equ [ebp-64] + .screen equ [ebp-68] + .zbuff equ [ebp-72] + .x_max equ [ebp-74] + .x_min equ [ebp-76] + .y_max equ [ebp-78] + .y_min equ [ebp-80] + .dn equ [ebp-96] + .dz equ [ebp-100] + .y equ [ebp-104] + .cnv equ [ebp-128] + + mov .y,ecx + packssdw xmm2,xmm2 + movq .y_min,xmm2 + cmp cx,.y_min + jl .end_rp_line + cmp cx,.y_max + jge .end_rp_line ; + + cmp eax,ebx + je .end_rp_line + jl @f + xchg eax,ebx + movaps xmm7,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm7 + shufps xmm3,xmm3,11100001b + @@: + + cmp ax,.x_max + jge .end_rp_line + cmp bx,.x_min + jle .end_rp_line + movaps .lv,xmm4 + movaps .n1,xmm0 + movaps .n2,xmm1 + mov .lx1,eax + mov .lx2,ebx + movlps .z1,xmm3 + + sub ebx,eax + cvtsi2ss xmm7,ebx + shufps xmm7,xmm7,0 + subps xmm1,xmm0 + divps xmm1,xmm7 + movaps .dn,xmm1 + psrldq xmm3,4 + subss xmm3,.z1 + divss xmm3,xmm7 + movss .dz,xmm3 + + + + mov ebx,.lx1 + cmp bx,.x_min ; clipping on function4 + jge @f + movzx eax,word .x_min + sub eax,ebx + cvtsi2ss xmm7,eax + shufps xmm7,xmm7,0 + mulss xmm3,xmm7 + mulps xmm1,xmm7 + addss xmm3,.z1 + addps xmm1,.n1 + movsx eax,word .x_min + movss .z1,xmm3 + movaps .n1,xmm1 + mov dword .lx1,eax + + @@: + movzx eax,word .x_max + cmp .lx2,eax + jl @f + mov .lx2,eax + @@: + movzx eax,word[size_x_var] + mul dword .y + ; mov edx,.x1 + add eax,.lx1 + shl eax,2 + add edi,eax + add esi,eax + + mov ecx,.lx2 + sub ecx,.lx1 + movaps xmm0,.n1 + movss xmm2,.z1 +align 16 + .ddraw: + movss xmm7,xmm2 + cmpnltss xmm7,dword[esi] + movd eax,xmm7 + or eax,eax + jnz .skip + movss [esi],xmm2 + movaps xmm7,xmm0 + mulps xmm7,xmm7 ; normalize + haddps xmm7,xmm7 + haddps xmm7,xmm7 + rsqrtps xmm7,xmm7 + mulps xmm7,xmm0 + movaps .cnv,xmm7 + + mov edx,lights_aligned ; lights - global variable + xorps xmm1,xmm1 ; instead global can be used .lv - light vect. + @@: + movaps xmm6,[edx+16] + movaps xmm5,[edx] + movaps xmm3,[edx+48] + andps xmm5,[zero_hgst_dd] ; global + + mulps xmm5,.cnv ;.lv ; last dword should be zeroed + haddps xmm5,xmm5 + haddps xmm5,xmm5 + ; mulps xmm5,[env_const2] + ; maxps xmm5,[dot_min] + ; minps xmm5,[dot_max] + movaps xmm7,xmm5 + ; mulps xmm7,[env_const2] + ; mulps xmm7,[env_const2] + ; maxps xmm7,[dot_min] + ; minps xmm7,[dot_max] + + mulps xmm7,xmm7 + mulps xmm7,xmm7 + mulps xmm5,xmm6 + mulps xmm7,xmm7 + mulps xmm7,xmm3 + + addps xmm5,xmm7 + minps xmm5,[mask_255f] ; global + maxps xmm1,xmm5 + ; movq xmm3,[edx+20] ; minimal color + ; punpcklwd xmm3,[minimum0] + ; cvtdq2ps xmm3,xmm3 + ; maxps xmm1,xmm3 + add edx,64 + cmp edx,lights_aligned_end ; global + jnz @b + + cvtps2dq xmm1,xmm1 + packssdw xmm1,xmm1 + packuswb xmm1,xmm1 + movd [edi],xmm1 + .skip: + add edi,4 + add esi,4 + addps xmm0,.dn + addss xmm2,.dz + sub ecx,1 + jnz .ddraw + + .end_rp_line: + add esp,160 + pop ebp + +ret diff --git a/programs/demos/3DS/A_PROCS.INC b/programs/demos/3DS/A_PROCS.INC index 0d0e8152e5..fa69da6bed 100644 --- a/programs/demos/3DS/A_PROCS.INC +++ b/programs/demos/3DS/A_PROCS.INC @@ -76,11 +76,20 @@ end if mul edx ; shl eax,9 add eax,dword .x + lea ebx,[eax*3] + cmp [dr_flag],12 ; 32 bit col cause + jne @f + add ebx,eax + @@: mov eax,[esi] mov [edi+ebx],eax .skip: add esi,3 + cmp [dr_flag],12 + jne @f + inc esi + @@: inc dword .x movzx edx,word[size_x_var] cmp dword .x,edx ;SIZE_X @@ -97,9 +106,12 @@ end if movzx ecx,word[size_x_var] movzx eax,word[size_y_var] imul ecx,eax + cmp [dr_flag],12 + je @f lea ecx,[ecx*3] shr ecx,2 ; mov ecx,SIZE_X*SIZE_Y*3/4 + @@: cld rep movsd @@ -156,23 +168,42 @@ if Ext >= SSE2 sub ecx,ebx mov esi,[screen_ptr] mov edi,[Zbuffer_ptr] + cmp [dr_flag],12 + je @f lea ebx,[ebx*3] + jmp .f +@@: + shl ebx,2 +.f: mov edx,esi add esi,ebx lea ebx,[ebx+esi] pxor xmm0,xmm0 push eax -@@: +.emb: + cmp [dr_flag],12 + je @f movlps xmm1,[esi+3] movhps xmm1,[esi+6] - punpcklbw xmm1,xmm0 movlps xmm2,[esi-3] movhps xmm2,[esi] - punpcklbw xmm2,xmm0 movlps xmm3,[ebx] movhps xmm3,[ebx+3] movlps xmm4,[edx] movhps xmm4,[edx+3] + jmp .ff +@@: + movlps xmm1,[esi+4] + movhps xmm1,[esi+8] + movlps xmm2,[esi-4] + movhps xmm2,[esi] + movlps xmm3,[ebx] + movhps xmm3,[ebx+4] + movlps xmm4,[edx] + movhps xmm4,[edx+4] +.ff: + punpcklbw xmm1,xmm0 + punpcklbw xmm2,xmm0 punpcklbw xmm3,xmm0 punpcklbw xmm4,xmm0 psubsw xmm1,xmm2 @@ -199,7 +230,12 @@ end if movd eax,xmm1 movzx eax,al +; cmp [dr_flag],12 +; je @f lea eax,[eax*3+envmap_cub] +; jmp .fff +;@@: + mov eax,[eax] mov [edi],eax ;xmm1 psrldq xmm1,8 @@ -209,23 +245,35 @@ end if mov eax,[eax] mov [edi+4],eax + cmp [dr_flag],12 + jne @f + add esi,2 + add ebx,2 + add edx,2 +@@: add edi,8 add esi,6 add ebx,6 add edx,6 sub ecx,2 - jnc @b + jnc .emb pop ecx ;,eax mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] + cmp [dr_flag],12 + je .e @@: movsd dec edi loop @b +.e: + rep movsd + end if + ret ;align 16 diff --git a/programs/demos/3DS/B_PROCS.INC b/programs/demos/3DS/B_PROCS.INC index 5c5c53a574..e6150107e1 100644 --- a/programs/demos/3DS/B_PROCS.INC +++ b/programs/demos/3DS/B_PROCS.INC @@ -749,10 +749,13 @@ if Ext>=SSE2 push ecx mov edi,[screen_ptr] movzx ecx,word[size_x_var] ;SIZE_X*3/4 + + cmp [dr_flag],12 + je @f lea ecx,[ecx*3+1] shr ecx,2 + @@: - ; mov ecx,SIZE_X*3/4 xor eax,eax rep stosd if 1 @@ -760,9 +763,16 @@ if Ext>=SSE2 movzx ecx,word[size_y_var] sub ecx,3 imul ecx,ebx + cmp [dr_flag],12 ; 32 bit per pix cause + je @f lea ecx,[ecx*3] shr ecx,4 - lea ebx,[ebx*3] + lea ebx,[ebx *3] + jmp .blr + @@: + + shr ecx,2 + shl ebx,2 ; mov ecx,(SIZE_X*(SIZE_Y-3))*3/16 .blr: @@: @@ -771,15 +781,19 @@ if Ext>=SSE2 mov ecx,edi sub ecx,ebx movups xmm1,[ecx] + cmp [dr_flag],12 + je @f movups xmm2,[edi-3] movups xmm3,[edi+3] - + jmp .f + @@: + movups xmm2,[edi-4] + movups xmm3,[edi+4] + .f: pavgb xmm0,xmm1 pavgb xmm2,xmm3 pavgb xmm0,xmm2 - psubusb xmm0,xmm5 ; importand if fire - movups [edi],xmm0 add edi,16 add esi,16 @@ -788,12 +802,16 @@ if Ext>=SSE2 end if xor eax,eax movzx ecx,word[size_x_var] + cmp [dr_flag],12 + je @f lea ecx,[ecx*3] shr ecx,2 + @@: ; mov ecx,SIZE_X*3/4 rep stosd pop ecx - loop .again_blur + dec ecx + jnz .again_blur mov esp,ebp pop ebp end if diff --git a/programs/demos/3DS/DATA.INC b/programs/demos/3DS/DATA.INC index ef997c3ca1..05e00a36c0 100644 --- a/programs/demos/3DS/DATA.INC +++ b/programs/demos/3DS/DATA.INC @@ -1,214 +1,220 @@ ; DATA AREA ************************************ - i3 dw 3 - i256 dw 256 - i255d dd 255 - dot_max dd 1.0 ; dot product max and min - dot_min dd 0.0 - env_const dd 1.05 - correct_tex dw 255 - tex_x_div2 dw TEX_X / 2 - tex_y_div2 dw TEX_Y / 2 - xobs dw 0 ;SIZE_X / 2 ;200 ;observer = camera - yobs dw 0 ;SIZE_Y / 2 ;200 ;coordinates - zobs dw -1000 + i3 dw 3 + i12 dd 12 + i256 dw 256 + i255d dd 255 + dot_max dd 1.0 ; dot product max and min + dot_min dd 0.0 + env_const dd 1.05 + correct_tex dw 255 + tex_x_div2 dw TEX_X / 2 + tex_y_div2 dw TEX_Y / 2 + xobs dw 0 ;SIZE_X / 2 ;200 ;observer = camera + yobs dw 0 ;SIZE_Y / 2 ;200 ;coordinates + zobs dw -1000 ; size_x dw SIZE_X ; size_y dw SIZE_Y - re_alloc_flag db 0 - angle_counter dw 0 - piD180 dd 0.017453292519943295769236907684886 - piD128 dd 0.024544 - const6 dw 6,6,6,6 - x_offset dw SIZE_X / 2 - y_offset dw SIZE_Y / 2 - z_offset dw 0 - rsscale dd 175.0 ; next real scale - vect_x dw SIZE_X / 2 - vect_y dw SIZE_Y / 2 - vect_z dw 0 - angle_x dw 0 - angle_y dw 0 - angle_z dw 0 - sin_amplitude dd 50 - sin_frq dd 0.7 - sin_delta dd 0.07 ; wave frequency granularity - convert_muler: - dd 1, 10, 100, 1000, 10000 - XYZpartices: - db 'X','Y','Z' - i10 dw 10 - offset_y: - i25 dw 25 ; screen buff offset - offset_x: - i5 dw 5 - triangles_ptr dd 0 - triangles_w_z_ptr dd 0 - triangles_normals_ptr dd 0 - points_normals_ptr dd 0 - points_normals_rot_ptr dd 0 - points_ptr dd 0 - points_rotated_ptr dd 0 - points_translated_ptr dd 0 - screen_ptr dd 0 - Zbuffer_ptr dd 0 - vertices_index_ptr dd 0 + re_alloc_flag db 0 + angle_counter dw 0 + piD180 dd 0.017453292519943295769236907684886 + piD128 dd 0.024544 + const6 dw 6,6,6,6 + x_offset dw SIZE_X / 2 + y_offset dw SIZE_Y / 2 + z_offset dw 0 + rsscale dd 175.0 ; next real scale + vect_x dw SIZE_X / 2 + vect_y dw SIZE_Y / 2 + vect_z dw 0 + angle_x dw 0 + angle_y dw 0 + angle_z dw 0 + sin_amplitude dd 50 + sin_frq dd 0.7 + sin_delta dd 0.07 ; wave frequency granularity + convert_muler: + dd 1, 10, 100, 1000, 10000 + XYZpartices: + db 'X','Y','Z' + i10 dw 10 + offset_y: + i25 dw 25 ; screen buff offset + offset_x: + i5 dw 5 + triangles_ptr dd 0 + triangles_w_z_ptr dd 0 + triangles_normals_ptr dd 0 + points_normals_ptr dd 0 + points_normals_rot_ptr dd 0 + points_ptr dd 0 + points_rotated_ptr dd 0 + points_translated_ptr dd 0 + screen_ptr dd 0 + Zbuffer_ptr dd 0 + vertices_index_ptr dd 0 ; draw_win_at_first db 1 - vertex_edit_no dw 0 - edit_start_x: - dw 0 - edit_start_y dw 0 - edit_end_x: - dw 0 - edit_end_y dw 0 - mouse_state dd 0 - menu: - db 2 ; button number = index - db 'rotary ' ; label - db 3 ; max flag + 1 , if = 255, no flag - r_flag db 1 ; flag - dd axl_f ; offset to flags description + vertex_edit_no dw 0 + edit_start_x: + dw 0 + edit_start_y dw 0 + edit_end_x: + dw 0 + edit_end_y dw 0 + mouse_state dd 0 - db 3 - db 'shd. model' - db 12 + menu: + db 2 ; button number = index + db 'rotary ' ; label + db 3 ; max flag + 1 , if = 255, no flag + r_flag db 1 ; flag + dd axl_f ; offset to flags description + + db 3 + db 'shd. model' + if Ext >= SSE3 + db 13 + else + db 12 + end if dr_flag db 0 ; 6 - dots - dd shd_f + dd shd_f - db 4 - db 'speed ' - db 2 + db 4 + db 'speed ' + db 2 speed_flag db 0 - dd spd_f + dd spd_f - db 5 - db 'zoom out ' - db 255 - db ? - dd ? + db 5 + db 'zoom out ' + db 255 + db ? + dd ? - db 6 - db 'zoom in ' - db 255 - db ? - dd ? + db 6 + db 'zoom in ' + db 255 + db ? + dd ? - db 7 - db 'catmull ' - db 2 + db 7 + db 'catmull ' + db 2 catmull_flag db 1 - dd onoff_f + dd onoff_f - db 8 - db 'culling ' - db 2 + db 8 + db 'culling ' + db 2 culling_flag db 0 - dd onoff_f + dd onoff_f - db 9 - db 'rand.light' - db 255 - db ? - dd ? + db 9 + db 'rand.light' + db 255 + db ? + dd ? - db 10 - db 'blur ' - db 6 + db 10 + db 'blur ' + db 6 blur_flag db 0 - dd blur_f + dd blur_f - db 11 - db 'mirror x ' - db 2 + db 11 + db 'mirror x ' + db 2 mirr_x_flag db 0 - dd onoff_f + dd onoff_f - db 12 - db 'mirror y ' - db 2 + db 12 + db 'mirror y ' + db 2 mirr_y_flag db 0 - dd onoff_f + dd onoff_f - db 13 - db 'mirror z ' - db 2 + db 13 + db 'mirror z ' + db 2 mirr_z_flag db 0 - dd onoff_f + dd onoff_f - db 14 - db 'xchg ' - db 4 + db 14 + db 'xchg ' + db 4 xchg_flag db 0 - dd xchg_f + dd xchg_f - db 15 - db 'emboss ' - db 2 + db 15 + db 'emboss ' + db 2 emboss_flag db 0 - dd onoff_f + dd onoff_f - db 16 - db 'fire ' - db 3 + db 16 + db 'fire ' + db 3 fire_flag db 0 - dd blur_f + dd blur_f - db 17 - db 'move ' - db 3 + db 17 + db 'move ' + db 3 move_flag db 0 - dd move_f + dd move_f - db 18 - db 'generate ' - db 6 -generator_flag db 0 - dd blur_f + db 18 + db 'generate ' + db 6 +generator_flag db 0 + dd blur_f - db 19 - db 'bumps ' - db 2 -bumps_flag db 0 - dd bumps_f + db 19 + db 'bumps ' + db 2 +bumps_flag db 0 + dd bumps_f - db 20 - db 'bumps deep' - db 4 + db 20 + db 'bumps deep' + db 4 bumps_deep_flag db 3 - dd bumps_d_f + dd bumps_d_f - db 21 - db 're-map tex' - db 255 -map_tex_flag db ? ;1 - dd ? ;bumps_d_f + db 21 + db 're-map tex' + db 255 +map_tex_flag db ? ;1 + dd ? ;bumps_d_f - db 22 - db 'bright + ' - db 6 + db 22 + db 'bright + ' + db 6 inc_bright_flag db 0 ;1 - dd blur_f + dd blur_f - db 23 - db 'bright - ' - db 6 + db 23 + db 'bright - ' + db 6 dec_bright_flag db 0 ;1 - dd blur_f + dd blur_f - db 24 - db 'wav effect' - db 2 -sinus_flag db 0 - dd onoff_f + db 24 + db 'wav effect' + db 2 +sinus_flag db 0 + dd onoff_f - db 25 - db 'editor ' - db 2 - edit_flag db 0 - dd onoff_f + db 25 + db 'editor ' + db 2 + edit_flag db 0 + dd onoff_f ; db 24 ; db 'max ' ; db 2 @@ -242,10 +248,13 @@ sinus_flag db 0 ;; dd color_component_f - db -1 ; end mark -flags: ; flags description + + db -1 ; end mark + + +flags: ; flags description shd_f: db 'flat' db 'grd ' @@ -259,6 +268,7 @@ flags: ; flags description db 'btex' db 'cenv' db 'grdl' + db 'rphg' spd_f: db 'idle' db 'full' @@ -279,13 +289,13 @@ flags: ; flags description ;; db ' g ' ;; db ' b ' - blur_f: ; blur, fire - db 'off ' + blur_f: ; blur, fire + db 'off ' bumps_d_f: db ' 1 ' - db ' 2 ' - db ' 3 ' - db ' 4 ' - db ' 5 ' + db ' 2 ' + db ' 3 ' + db ' 4 ' + db ' 5 ' xchg_f: db 'no ' @@ -303,25 +313,25 @@ bumps_d_f: db ' 1 ' ; db 'cscl' base_vector: labelvector: - db 'add vector' + db 'add vector' labelvectorend: labelyminus: - db 'y -' + db 'y -' labelyminusend: labelzplus: - db 'z +' + db 'z +' labelzplusend: labelxminus: - db 'x -' + db 'x -' labelxminusend: labelxplus: - db 'x +' + db 'x +' labelxplusend: labelzminus: - db 'z -' + db 'z -' labelzminusend: labelyplus: - db 'y +' + db 'y +' labelyplusend: ;navigation_size = $ - labelvector @@ -334,43 +344,46 @@ base_vector: ; db 'r +' labelt: - db 'DEUS CARITAS EST' + db 'DEUS CARITAS EST' if Ext=MMX - db ' (MMX)' + db ' (MMX)' end if if Ext=SSE - db ' (SSE)' + db ' (SSE)' end if if Ext=SSE2 - db ' (SSE2)' + db ' (SSE2)' end if - db ' 0.069b',0 + if Ext=SSE3 + db ' (SSE3)' + end if + db ' 0.070',0 labellen: - STRdata db '-1 ' + STRdata db '-1 ' all_lights_size dw lightsend-lights if USE_LFN - file_info: - dd 0 - dd 0 - dd 0 - fsize dd 0 ;180000 ; sizeof(workarea) - fptr dd 0 ;workarea - file_name: - db '/rd/1/3d/house.3ds',0 + file_info: + dd 0 + dd 0 + dd 0 + fsize dd 0 ;180000 ; sizeof(workarea) + fptr dd 0 ;workarea + file_name: + db '/rd/1/3d/house.3ds',0 else - file_info: - dd 0 - dd 0 - fsize dd 1 - dd workarea - dd hash_table - file_name: - db '/rd/1/teapot.3ds',0 + file_info: + dd 0 + dd 0 + fsize dd 1 + dd workarea + dd hash_table + file_name: + db '/rd/1/teapot.3ds',0 end if ;I_END: @@ -379,50 +392,60 @@ end if ;============================================= lights: - .light_vector dd 0.0,0.0,-1.0 ; x,y,z Z cooficient of vector must be negative - .orginal_color_r db 1 ; +12 - .orginal_color_g db 255 ; - .orginal_color_b db 1 ; +14 - .min_color_r db 1 ; - .min_color_g db 1 ; +16 - .min_color_b db 1 ; - .max_color_r db 255 ; - .max_color_g db 255 ; - .max_color_b db 255 ; - .shine db 24 ; +21 + .light_vector dd 0.0,0.0,-1.0 ; x,y,z Z cooficient of vector must be negative + .orginal_color_r db 1 ; +12 + .orginal_color_g db 255 ; + .orginal_color_b db 1 ; +14 + .min_color_r db 1 ; + .min_color_g db 1 ; +16 + .min_color_b db 1 ; + .max_color_r db 255 ; + .max_color_g db 255 ; + .max_color_b db 255 ; + .shine db 24 ; +21 ; LIGHT_SIZE equ ($-lights) - dd -0.5,-0.5,-1.0 ; x,y,z ; .light_vector - db 5 ; .orginal_color_r - db 1 ; .orginal_color_g - db 135 ; .orginal_color_b - db 19 ; .min_color_r - db 19 ; .min_color_g - db 19 ; .min_color_b - db 255 ; .max_color_r - db 255 ; .max_color_g - db 255 ; .max_color_b - db 16 ; .shine + dd -0.5,-0.5,-1.0 ; x,y,z ; .light_vector + db 5 ; .orginal_color_r + db 1 ; .orginal_color_g + db 135 ; .orginal_color_b + db 19 ; .min_color_r + db 19 ; .min_color_g + db 19 ; .min_color_b + db 255 ; .max_color_r + db 255 ; .max_color_g + db 255 ; .max_color_b + db 16 ; .shine - dd 0.5,0.5,-1.0 ; x,y,z ; .light_vector - db 135 ; .orginal_color_r - db 1 ; .orginal_color_g - db 1 ; .orginal_color_b - db 19 ; .min_color_r - db 19 ; .min_color_g - db 19 ; .min_color_b - db 255 ; .max_color_r - db 255 ; .max_color_g - db 20 ; .max_color_b - db 16 ; .shine + dd 0.5,0.5,-1.0 ; x,y,z ; .light_vector + db 135 ; .orginal_color_r + db 1 ; .orginal_color_g + db 1 ; .orginal_color_b + db 19 ; .min_color_r + db 19 ; .min_color_g + db 19 ; .min_color_b + db 255 ; .max_color_r + db 255 ; .max_color_g + db 20 ; .max_color_b + db 16 ; .shine ; ALL_LIGHTS_SIZE equ ($ - lights) ;#all_lights_size dw ($ - lights) ;ALL_LIGHTS_SIZE ;=============================================== lightsend: + + + + align 16 emboss_bias: - dw 128, 128, 128, 128, 128, 128, 128, 128 + dw 128, 128, 128, 128, 128, 128, 128, 128 + zero_hgst_dd: + dd -1, -1, -1, 0 + mask_255f: + times 4 dd 255.0 + the_zero: + times 4 dd 0.0 I_END: if USE_LFN = 0 @@ -435,30 +458,30 @@ workarea rb 180 end if EndFile dd ? align 8 - sinbeta dd ?;+32 - cosbeta dd ? + sinbeta dd ?;+32 + cosbeta dd ? - xsub dw ? - zsub dw ?;+40 - ysub dw ? + xsub dw ? + zsub dw ?;+40 + ysub dw ? - xx1 dw ? - yy1 dw ? - zz1 dw ?;+48 xx1 + 4 - xx2 dw ? - yy2 dw ? - zz2 dw ? ; xx1 + 10 - xx3 dw ?;+56 - yy3 dw ? - zz3 dw ? ; xx1 + 16 - col1 dd ? - col2 dd ? - col3 dd ? - scale dd ? ; help scale variable - edges_counter dd ? + xx1 dw ? + yy1 dw ? + zz1 dw ?;+48 xx1 + 4 + xx2 dw ? + yy2 dw ? + zz2 dw ? ; xx1 + 10 + xx3 dw ?;+56 + yy3 dw ? + zz3 dw ? ; xx1 + 16 + col1 dd ? + col2 dd ? + col3 dd ? + scale dd ? ; help scale variable + edges_counter dd ? ;== - triangles_count_var dd ? - points_count_var dd ? + triangles_count_var dd ? + points_count_var dd ? ; triangles_ptr dd ? ; triangles_w_z_ptr dd ? ; triangles_normals_ptr dd ? @@ -471,96 +494,101 @@ align 8 ; Zbuffer_ptr dd ? ; vertices_index_ptr dd ? ; edit_start_x: - dw ? ; don't change order + dw ? ; don't change order ; edit_start_y dw ? ; edges_ptr dd ? - size_y_var: - dw ? - size_x_var: - dw ? - x_start: - dw ? - y_start: - dw ? + size_y_var: + dw ? + size_x_var: + dw ? + x_start: + dw ? + y_start: + dw ? ;=== - point_index1 dd ? ;-\ - point_index2 dd ? ; } don't change order - point_index3 dd ? ;-/ - temp_col dw ? - high dd ? - rand_seed dw ? + point_index1 dd ? ;-\ + point_index2 dd ? ; } don't change order + point_index3 dd ? ;-/ + temp_col dw ? + high dd ? + rand_seed dw ? align 8 - buffer dq ? - errr dd ? - drr dd ? - xx dd ? - yy dd ? - xst dd ? - yst dd ? + buffer dq ? + errr dd ? + drr dd ? + xx dd ? + yy dd ? + xst dd ? + yst dd ? ; screen_ptr dd ? ; Zbuffer_ptr dd ? - matrix rb 36 - cos_tab rd 360 - sin_tab rd 360 + matrix rb 36 + cos_tab rd 360 + sin_tab rd 360 align 16 if USE_LFN = 0 points: - rw (EndFile-SourceFile)/12*3 - points_count = ($-points)/6 + rw (EndFile-SourceFile)/12*3 + points_count = ($-points)/6 triangles: - rw (EndFile-SourceFile)/12*3 - triangles_count = ($-triangles)/6 + rw (EndFile-SourceFile)/12*3 + triangles_count = ($-triangles)/6 align 16 - real_points rd points_count*3 + 1 + real_points rd points_count*3 + 1 align 16 - rotated_points_r rd points_count*3 + 1 + rotated_points_r rd points_count*3 + 1 align 16 - points_rotated rw points_count*3 + 2 ;means translated + points_rotated rw points_count*3 + 2 ;means translated align 16 - triangles_normals rb triangles_count * 12 ; + triangles_normals rb triangles_count * 12 ; align 16 - point_normals rb points_count * 12 ;one 3dvector - triple float dword x,y,z + point_normals rb points_count * 12 ;one 3dvector - triple float dword x,y,z align 16 - point_normals_rotated rb points_count * 12 + point_normals_rotated rb points_count * 12 align 16 - triangles_normals_rotated rb triangles_count * 12 + triangles_normals_rotated rb triangles_count * 12 else points_count = 180000/6*3 triangles_count = 180000 / 6 ;($-triangles)/6 end if align 16 - label trizdd dword - label trizdq qword - triangles_with_z rw triangles_count*4 + 2 ; triangles triple dw + z position + label trizdd dword + label trizdq qword + triangles_with_z rw triangles_count*4 + 2 ; triangles triple dw + z position align 16 - vectors rb 24 + vectors rb 24 ;align 16 ; points_color rb 6*points_count ; each color as word ; sorted_triangles rw triangles_count*3 + 2 align 16 - bumpmap rb TEXTURE_SIZE + 1 + bumpmap rb TEXTURE_SIZE + 1 align 16 - bumpmap2 rb TEXTURE_SIZE + 1 + bumpmap2 rb TEXTURE_SIZE + 1 align 16 - envmap rb (TEXTURE_SIZE +1) * 3 + envmap rb (TEXTURE_SIZE +1) * 3 align 16 - envmap_cub rb TEX_X * 3 + envmap_cub rb TEX_X * 3 align 16 - texmap rb (TEXTURE_SIZE +1) * 3 + texmap rb (TEXTURE_SIZE +1) * 3 align 16 - color_map rb (TEXTURE_SIZE +1) * 3 + color_map rb (TEXTURE_SIZE +1) * 3 align 16 - tex_points rb points_count * 4 ; bump_map and texture coords - ; each point word x, word y + tex_points rb points_count * 4 ; bump_map and texture coords + ; each point word x, word y align 16 + lights_aligned: + lights_aligned_end = $ + 16 * 12 + rb 16 * 12 + + if Ext >= SSE2 - sse_repository rb 1024 + sse_repository rb 1024 end if ; SourceFile: ; source file temporally in screen area ; workarea dd ? @@ -568,9 +596,10 @@ end if ; screen rb SIZE_X * SIZE_Y * 3 ; screen buffer ;align 16 ; Z_buffer rb SIZE_X * SIZE_Y * 4 - procinfo: - rb 1024 ; process info - I_Param rb 256 - memStack rb 4000 ;memory area for stack + procinfo: + rb 1024 ; process info + I_Param rb 256 + memStack: + rb 2000 align 16 screen: diff --git a/programs/demos/3DS/History.txt b/programs/demos/3DS/History.txt index 01a42f49f3..e662d28904 100644 --- a/programs/demos/3DS/History.txt +++ b/programs/demos/3DS/History.txt @@ -1,3 +1,12 @@ +View3ds 0.069 - May 2020 +1. KPacked files support by Leency. +2. 32bit vertices indexes and ability to load whole RAM limited objects. + (Above 65535 vertices and triangles), (by me). +3. I switch off painters algotithm mode (depth sorting). In app impelementetion it has + limited vertices count and produce less quality image than Z buffer Catmull algo. + In addition this switch off reduces app size, (by me). + +----------------------------------------------------------------------------------- View3ds 0.068 - XI 2016 1. Editing option - new 'editor' button. @@ -108,7 +117,7 @@ View3ds 0.03 - March 2007. 1. Two shading models: smooth + texture, spherical environment mapping + texture. Version only with z coordinate interpolation. 2. Bit changed rotary. I took attempt in SSE instuctions. ( matrix multiplication ) -3. Color position depend drawing model instead spot light ( I hope,spot light come +3. Color position depend drawing model instead spot light ( I hope,spot light come back in future ). ----------------------------------------------------------------------------------- diff --git a/programs/demos/3DS/README.TXT b/programs/demos/3DS/README.TXT index 10134cb0fd..1a51874f12 100644 --- a/programs/demos/3DS/README.TXT +++ b/programs/demos/3DS/README.TXT @@ -1,14 +1,11 @@ -View3ds 0.069 - tiny viewer to .3ds and .asc files with several graphics +View3ds 0.070 - tiny viewer to .3ds and .asc files with several graphics effects implementation. What's new? -1. KPacked files support by Leency. -1. 32bit vertices indexes and ability to load whole RAM limited objects. - (Above 65535 vertices and triangles), (by me). -2. I switch off painters algotithm mode (depth sorting). In app impelementetion it has - limited vertices count and produce less quality image than Z buffer Catmull algo. - In addition this switch off reduces app size, (by me). +1. Some keys support by Leency. +2. New displaying model - real Phong - real not fake normal vector interpolation, normalising it and calculating + dot product (one for each light). It requires SSE3. (by me) Buttons description: 1. rotary: choosing rotary axle: x, y, x+y. @@ -17,7 +14,7 @@ Buttons description: pos (position shading depend), dots (app draws only points - nodes of object), txgrd (texture mapping + smooth shading), 2tex (texture mapping + spherical environment mapping), bmap (bump + texture mapping), cenv (cubic environment - mapping), grdl (Gouraud lines - edges only). + mapping), grdl (Gouraud lines - edges only), rphg (real Phong). 3. speed: idle, full. 4,5. zoom in, out: no comment. 6. catmull: disabled @@ -42,4 +39,4 @@ Buttons description: is released apply current position. You may also decrease whole handlers count by enable culling (using appropriate button) - some back handlers become hidden. - Maciej Guba V 2020 + Maciej Guba VII 2020 diff --git a/programs/demos/3DS/VIEW3DS.ASM b/programs/demos/3DS/VIEW3DS.ASM index 9560f95025..99316390bc 100644 --- a/programs/demos/3DS/VIEW3DS.ASM +++ b/programs/demos/3DS/VIEW3DS.ASM @@ -1,11 +1,11 @@ -; application : View3ds ver. 0.069 - tiny .3ds and .asc files viewer +; application : View3ds ver. 0.070 - tiny .3ds and .asc files viewer ; with a few graphics effects demonstration. ; compiler : FASM ; system : KolibriOS ; author : Macgub aka Maciej Guba ; email : macgub3@wp.pl -; web : www.macgub.hekko.pl +; web : http://macgub.vxm.pl ; Fell free to use this intro in your own distribution of KolibriOS. ; Special greetings to KolibriOS team . ; I hope because my demos Christian Belive will be near to each of You. @@ -36,6 +36,7 @@ NON = 0 ; -/ \- MMX = 1 SSE = 2 SSE2 = 3 +SSE3 = 4 Ext = SSE2 ;Ext={ NON | MMX | SSE | SSE2 } ; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) @@ -54,9 +55,6 @@ use32 START: ; start of execution cld - ; mov eax,14 ; window size according to cur res ... - ; int 0x40 - ; sub eax,150 shl 16 + 150 mov eax, 500 shl 16 + 600 ; ... or set manually mov [size_y_var],ax shr ax,1 @@ -94,15 +92,10 @@ START: ; start of execution je .gen jmp .malloc .gen: - if USE_LFN - mov [triangles_count_var],1000 - mov [points_count_var],1000 - call alloc_mem_for_tp - end if call generate_object jmp .opt .asc: - mov [triangles_count_var],10000 + mov [triangles_count_var],10000 ; to do: read asc header mov [points_count_var],10000 call alloc_mem_for_tp call read_asc @@ -115,13 +108,15 @@ START: ; start of execution .opt: - ; call alloc_buffer_mem ; alloc memfor screnn and z buffer call optimize_object1 ; proc in file b_procs.asm ; set point(0,0,0) in center and calc all coords ; to be in <-1.0,1.0> call normalize_all_light_vectors - call init_triangles_normals2 + if Ext >= SSE3 + call copy_lights ; to aligned float + end if + call init_triangles_normals2 call init_point_normals call init_envmap2 call init_envmap_cub @@ -233,7 +228,7 @@ still: je add_vec_buttons.y_minus cmp ah, 177 ;down je add_vec_buttons.y_plus - + jmp noclose button: ; button @@ -268,7 +263,7 @@ still: ; ah = 5 -> scale- cmp ah,5 jne @f - .zoom_out: + .zoom_out: mov dword[scale],0.7 fninit fld [rsscale] @@ -278,7 +273,7 @@ still: @@: cmp ah,6 ; ah = 6 -> scale+ jne @f - .zoom_in: + .zoom_in: mov dword[scale],1.3 fninit fld [rsscale] @@ -290,6 +285,9 @@ still: jne .next_m5 ; 'grd ' 1 call make_random_lights ; 'env ' 2 call normalize_all_light_vectors ; 'bump' 3 + if Ext >= SSE3 + call copy_lights + end if call do_color_buffer ; intit color_map ; 'tex ' 4 ; cmp [emboss_flag],1 ; 'pos ' 5 ; je @f ; 'dots' 6 @@ -332,13 +330,13 @@ still: .next_m: cmp ah,18 jne .next_m2 - if USE_LFN + mov [re_alloc_flag],1 ; reallocate memory mov [triangles_count_var],1000 mov [points_count_var],1000 call alloc_mem_for_tp mov [re_alloc_flag],0 - end if + mov bl,[generator_flag] ; or bl,bl ; jz .next_m2 @@ -472,7 +470,7 @@ still: cmp [move_flag],0 jne @f ; call add_vector - .y_plus: + .y_plus: add [vect_y],10 jmp .next5 @@: @@ -536,21 +534,12 @@ still: mov ecx,[points_count_var] call rotary -; RDTSC -; pop ebx -; sub eax,ebx -; sub eax,41 -; push eax mov esi,[points_rotated_ptr] mov edi,[points_translated_ptr] mov ecx,[points_count_var] call translate_points -; cmp [dr_flag],5 -; jne @f -; call calc_attenuation_light -; @@: cmp [fire_flag],0 jne @f call clrscr ; clear the screen @@ -568,8 +557,6 @@ still: @@: call fill_Z_buffer ; make background .non_f: -; RDTSC -; push eax cmp [dr_flag],6 jne @f call draw_dots @@ -742,13 +729,22 @@ end if loop .dc pop eax + mov eax,7 ; put image mov ebx,[screen_ptr] mov ecx,[size_y_var] ; mov ecx,SIZE_X shl 16 + SIZE_Y mov edx,[offset_y] ;5 shl 16 + 25 + cmp [dr_flag],12 + je .ff int 0x40 - + jmp .f + .ff: + mov eax,65 + mov esi,32 + xor ebp,ebp + int 0x40 + .f: mov eax,13 mov bx,[size_x_var] add ebx,18 @@ -773,8 +769,6 @@ end if int 40h - ; addsubps xmm0,xmm0 - jmp still @@ -797,6 +791,9 @@ include "BUMP_TEX.INC" include "GRD_TEX.INC" include "TWO_TEX.INC" include "ASC.INC" +if Ext >= SSE3 +include "3r_phg.inc" +end if clear_vertices_index: mov edi,[vertices_index_ptr] movzx eax,word[size_x_var] @@ -825,9 +822,15 @@ edit: ; mmx required, edit mesh by vertex movzx edx,word[size_x_var] imul edx,ecx add ebx,edx - + push ebx lea ecx,[ebx*2] lea ebx,[ebx*3] + + cmp [dr_flag],12 + jne @f + add ebx,[esp] + @@: + add esp,4 add ebx,[screen_ptr] mov ebx,[ebx] and ebx,0x00ffffff @@ -868,18 +871,6 @@ edit: ; mmx required, edit mesh by vertex ; left button pressed - ; macro check_bar - ; { - ; movzx ebx,word[.x_coord] - ; movzx ecx,word[.y_coord] - ; imul ebx,ecx - ; lea ecx,[ebx*2] - ; lea ebx,[ebx*3] - ; add ebx,[screen_ptr] - ; mov ebx,[ebx] - ; and ebx,0x00ffffff - ; cmp ebx,0x00ff0000 ; is handle bar ? - ; } check_bar jne .no_edit @@ -985,7 +976,7 @@ alloc_buffer_mem: mul ecx mov [.temp],eax - lea ecx,[eax*3] + lea ecx,[eax*4] ; more mem for r_phg cause add ecx,256 mov eax,68 mov ebx,20 @@ -1020,28 +1011,6 @@ alloc_buffer_mem: -if 0 -;old Menuet style alloc - movzx ecx,word[size_x_var] - movzx eax,word[size_y_var] - add eax,200 - mul ecx - lea ecx,[eax*3] - add ecx,16 - and ecx,0xfffffff0 - push ecx - shl eax,2 - add ecx,eax - add ecx,MEM_END - mov ebx,1 - mov eax,64 ; allocate mem - resize app mem - int 0x40 - mov [screen_ptr],MEM_END - mov [Zbuffer_ptr],MEM_END - pop ecx - add [Zbuffer_ptr],ecx -end if -ret update_flags: ; updates flags and writing flag description @@ -1063,6 +1032,7 @@ update_flags: add edi,17 cmp byte[edi],-1 jne .ch_another + jmp .no_write .write: ; clreol {pascal never dies} ; * eax = 13 - function number @@ -1447,39 +1417,6 @@ do_color_buffer: ; do color buffer for Gouraud, flat shading mov esp,ebp pop ebp ret -if 0 -init_triangles_normals: - mov ebx,triangles_normals - mov ebp,triangles - @@: - push ebx - mov ebx,vectors - mov esi,dword[ebp] ; first point index - lea esi,[esi*3] - lea esi,[points+esi*2] ; esi - pointer to 1st 3d point - movzx edi,dword[ebp+4] ; second point index - lea edi,[edi*3] - lea edi,[points+edi*2] ; edi - pointer to 2nd 3d point - call make_vector - add ebx,12 - mov esi,edi - movzx edi,dword[ebp+8] ; third point index - lea edi,[edi*3] - lea edi,[points+edi*2] - call make_vector - mov edi,ebx ; edi - pointer to 2nd vector - mov esi,ebx - sub esi,12 ; esi - pointer to 1st vector - pop ebx - call cross_product - mov edi,ebx - call normalize_vector - add ebp,12 - add ebx,12 - cmp dword[ebp],-1 - jne @b -ret -end if init_point_normals: .x equ dword [ebp-4] @@ -1599,326 +1536,46 @@ init_triangles_normals2: ; cmp dword[ebp],-1 ; jne @b ret -if 0 ; ind 64 but -;================================================================= -sort_triangles: - mov esi,[triangles_ptr] - mov edi,triangles_with_z - mov ebp,[points_translated_ptr] - - make_triangle_with_z: ;makes list with triangles and z position - movzx eax,word[esi] - lea eax,[eax*3] - movzx ecx,word[ebp+eax*2+4] - - movzx eax,word[esi+2] - lea eax,[eax*3] - add cx,word[ebp+eax*2+4] - - movzx eax,word[esi+4] - lea eax,[eax*3] - add cx,word[ebp+eax*2+4] - - mov ax,cx - ; cwd - ; idiv word[i3] - movsd ; store vertex coordinates - movsw - stosw ; middle vertex coordinate 'z' in triangles_with_z list - cmp dword[esi],-1 - jne make_triangle_with_z - movsd ; copy end mark - mov eax,4 - lea edx,[edi-8-trizdd] - ; lea edx, [edi-8] - ; sub edx,[triangles_w_z_ptr] - mov [high],edx - call quicksort - mov eax,4 - mov edx,[high] - call insertsort - jmp end_sort - - quicksort: - mov ecx,edx - sub ecx,eax - cmp ecx,32 - jc .exit - lea ecx,[eax+edx] - shr ecx,4 - lea ecx,[ecx*8-4]; -; mov edi,[triangles_w_z_ptr] -; mov ebx,[edi+eax] -; mov esi,[edi+ecx] -; mov edi,[edi+edx] - mov ebx,[trizdd+eax]; trizdd[l] - mov esi,[trizdd+ecx]; trizdd[i] - mov edi,[trizdd+edx]; trizdd[h] - cmp ebx,esi - jg @f ; direction NB! you need to negate these to invert the order - if Ext=NON - mov [trizdd+eax],esi - mov [trizdd+ecx],ebx - mov ebx,[trizdd+eax-4] - mov esi,[trizdd+ecx-4] - mov [trizdd+eax-4],esi - mov [trizdd+ecx-4],ebx - mov ebx,[trizdd+eax] - mov esi,[trizdd+ecx] - else -; push ebx -; mov ebx,[triangles_w_z_ptr] -; movq mm0,[ebx+eax-4] -; movq mm1,[ebx+ecx-4] -; movq [ebx+ecx-4],mm0 -; movq [ebx+eax-4],mm1 -; pop ebx - movq mm0,[trizdq+eax-4] - movq mm1,[trizdq+ecx-4] - movq [trizdq+ecx-4],mm0 - movq [trizdq+eax-4],mm1 - xchg ebx,esi - end if +if Ext >= SSE3 +copy_lights: ; after normalising ! + mov esi,lights + mov edi,lights_aligned + mov ecx,3 + .again: + push ecx + mov ecx,3 + cld + rep movsd + xor eax,eax + stosd + mov ecx,3 + .b: + push ecx + mov ecx,3 @@: - cmp ebx,edi - jg @f ; direction - if Ext=NON - mov [trizdd+eax],edi - mov [trizdd+edx],ebx - mov ebx,[trizdd+eax-4] - mov edi,[trizdd+edx-4] - mov [trizdd+eax-4],edi - mov [trizdd+edx-4],ebx - mov ebx,[trizdd+eax] - mov edi,[trizdd+edx] - else -; push ebx -; mov ebx,[triangles_w_z_ptr] -; movq mm0,[ebx+eax-4] -; movq mm1,[ebx+edx-4] -; movq [ebx+edx-4],mm0 -; movq [ebx+eax-4],mm1 - movq mm0,[trizdq+eax-4] - movq mm1,[trizdq+edx-4] - movq [trizdq+edx-4],mm0 - movq [trizdq+eax-4],mm1 -; pop ebx - xchg ebx,edi - end if - @@: - cmp esi,edi - jg @f ; direction - if Ext=NON - mov [trizdd+ecx],edi - mov [trizdd+edx],esi - mov esi,[trizdd+ecx-4] - mov edi,[trizdd+edx-4] - mov [trizdd+ecx-4],edi - mov [trizdd+edx-4],esi - else -; push ebx -; mov ebx,[triangles_w_z_ptr] -; movq mm0,[ebx+ecx-4] -; movq mm1,[ebx+edx-4] -; movq [ebx+edx-4],mm0 -; movq [ebx+ecx-4],mm1 -; pop ebx - - movq mm0,[trizdq+ecx-4] - movq mm1,[trizdq+edx-4] - movq [trizdq+edx-4],mm0 - movq [trizdq+ecx-4],mm1 - xchg ebx,esi - end if - @@: - mov ebp,eax ; direction - add ebp,8 ; j - if Ext=NON - mov esi,[trizdd+ebp] - mov edi,[trizdd+ecx] - mov [trizdd+ebp],edi - mov [trizdd+ecx],esi - mov esi,[trizdd+ebp-4] - mov edi,[trizdd+ecx-4] - mov [trizdd+ecx-4],esi - mov [trizdd+ebp-4],edi - else -; push ebx -; mov ebx,[triangles_w_z_ptr] -; movq mm0,[ebx+ebp-4] -; movq mm1,[ebx+ecx-4] -; movq [ebx+ecx-4],mm0 -; movq [ebx+ebp-4],mm1 -; pop ebx - - movq mm0,[trizdq+ebp-4] - movq mm1,[trizdq+ecx-4] - movq [trizdq+ecx-4],mm0 - movq [trizdq+ebp-4],mm1 - end if - mov ecx,edx ; i; direction - mov ebx,[trizdd+ebp]; trizdd[j] -; mov ebx, [triangles_w_z_ptr] -; add ebx, ebp - - ; push eax - ; mov eax, [triangles_w_z_ptr] - .loop: - sub ecx,8 ; direction - cmp [trizdd+ecx],ebx -; cmp [eax+ecx],ebx - jl .loop ; direction - @@: - add ebp,8 ; direction - cmp [trizdd+ebp],ebx -; cmp [eax+ebp],ebx - jg @b ; direction - cmp ebp,ecx - jge @f ; direction - if Ext=NON - mov esi,[trizdd+ecx] - mov edi,[trizdd+ebp] - mov [trizdd+ebp],esi - mov [trizdd+ecx],edi - mov edi,[trizdd+ecx-4] - mov esi,[trizdd+ebp-4] - mov [trizdd+ebp-4],edi - mov [trizdd+ecx-4],esi - else -; movq mm0,[eax+ecx-4] -; movq mm1,[eax+ebp-4] -; movq [eax+ebp-4],mm0 -; movq [eax+ecx-4],mm1 - movq mm0,[trizdq+ecx-4] - movq mm1,[trizdq+ebp-4] - movq [trizdq+ebp-4],mm0 - movq [trizdq+ecx-4],mm1 - end if - jmp .loop -; pop eax - @@: - if Ext=NON - mov esi,[trizdd+ecx] - mov edi,[trizdd+eax+8] - mov [trizdd+eax+8],esi - mov [trizdd+ecx],edi - mov edi,[trizdd+ecx-4] - mov esi,[trizdd+eax+4] - mov [trizdd+eax+4],edi - mov [trizdd+ecx-4],esi - else -; push edx -; mov edx,[triangles_w_z_ptr] -; movq mm0,[edx+ecx-4] -; movq mm1,[edx+eax+4]; dir -; movq [edx+eax+4],mm0; dir -; movq [edx+ecx-4],mm1 -; pop edx - - movq mm0,[trizdq+ecx-4] - movq mm1,[trizdq+eax+4]; dir - movq [trizdq+eax+4],mm0; dir - movq [trizdq+ecx-4],mm1 - end if - add ecx,8 - push ecx edx - mov edx,ebp - call quicksort - pop edx eax - call quicksort - .exit: - ret - insertsort: - mov esi,eax - .start: - add esi,8 - cmp esi,edx - ja .exit - mov ebx,[trizdd+esi] -; mov ebx,[triangles_w_z_ptr] -; add ebx,esi - if Ext=NON - mov ecx,[trizdd+esi-4] - else -; push ebx -; mov ebx,[triangles_w_z_ptr] -; movq mm1,[ebx+esi-4] - movq mm1,[trizdq+esi-4] -; pop ebx - end if - mov edi,esi - @@: - cmp edi,eax - jna @f -; push eax -; mov eax,[triangles_w_z_ptr] -; cmp [eax+edi-8],ebx -; pop eax - cmp [trizdd+edi-8],ebx - jg @f ; direction - if Ext=NON - mov ebp,[trizdd+edi-8] - mov [trizdd+edi],ebp - mov ebp,[trizdd+edi-12] - mov [trizdd+edi-4],ebp - else -; push eax -; mov eax,[triangles_w_z_ptr] -; movq mm0,[eax+edi-12] -; movq [eax+edi-4],mm0 - movq mm0,[trizdq+edi-12] - movq [trizdq+edi-4],mm0 -; pop eax - end if - sub edi,8 - jmp @b - @@: - if Ext=NON - mov [trizdd+edi],ebx - mov [trizdd+edi-4],ecx - else -; push eax -; mov eax,[triangles_w_z_ptr] -; movq [eax+edi-4],mm1 - movq [trizdq+edi-4],mm1 -; pop eax - end if - jmp .start - .exit: - ret - end_sort: - ; translate triangles_with_z to sorted_triangles - mov esi,triangles_with_z -; mov esi,[triangles_w_z_ptr] - ; mov edi,sorted_triangles - mov edi,[triangles_ptr] - again_copy: - if Ext=NON - movsd - movsw - add esi,2 - else - movq mm0,[esi] - movq [edi],mm0 - add esi,8 - add edi,6 - end if - cmp dword[esi],-1 - jne again_copy -; if Ext=MMX -; emms -; end if - movsd ; copy end mark too + movzx ebx,byte[esi] + cvtsi2ss xmm0,ebx + movss [edi],xmm0 + inc esi + add edi,4 + loop @b + stosd + pop ecx + loop .b + inc esi ; skip shiness + pop ecx + loop .again ret -end if ; 64 ind +end if + clrscr: mov edi,[screen_ptr] movzx ecx,word[size_x_var] movzx eax,word[size_y_var] imul ecx,eax - lea ecx,[ecx*3] - shr ecx,2 + + xor eax,eax if Ext=NON rep stosd @@ -2130,7 +1787,10 @@ end if je .cubic_env_mapping cmp [dr_flag],11 je .draw_smooth_line - ; **************** + if Ext >= SSE3 + cmp [dr_flag],12 + je .r_phg + end if ; **************** mov esi,point_index3 ; do Gouraud shading mov ecx,3 .again_grd_draw: @@ -2953,7 +2613,72 @@ end if push [xx2] call smooth_line + jmp .end_draw @@: + if Ext >= SSE3 + .r_phg: + + + movd xmm5,[size_y_var] + punpcklwd xmm5,[the_zero] + pshufd xmm5,xmm5,01110011b + + + mov eax,[point_index1] + mov ebx,[point_index2] + mov ecx,[point_index3] + imul eax,[i12] + imul ebx,[i12] + imul ecx,[i12] + add eax,[points_normals_rot_ptr] + add ebx,[points_normals_rot_ptr] + add ecx,[points_normals_rot_ptr] + movups xmm0,[eax] + movups xmm1,[ebx] + movups xmm2,[ecx] + andps xmm0,[zero_hgst_dd] + andps xmm1,[zero_hgst_dd] + andps xmm2,[zero_hgst_dd] + xorps xmm3,xmm3 + + mov eax,[point_index1] + mov ebx,[point_index2] + mov ecx,[point_index3] + imul eax,[i12] + imul ebx,[i12] + imul ecx,[i12] + add eax,[points_rotated_ptr] + add ebx,[points_rotated_ptr] + add ecx,[points_rotated_ptr] + push dword[ecx+8] + push dword[ebx+8] + push dword[eax+8] + movups xmm4,[esp] + add esp,12 + andps xmm4,[zero_hgst_dd] + + + + mov eax,dword[xx1] + ror eax,16 + mov ebx,dword[xx2] + ror ebx,16 + mov ecx,dword[xx3] + ror ecx,16 + mov edi,[screen_ptr] + mov esi,[Zbuffer_ptr] + + call real_phong_tri_z + + jmp .end_draw + end if + + + + + + + .end_draw: pop esi @@ -2990,12 +2715,25 @@ draw_handlers: push dword 0 movzx eax,word[size_x_var] + cmp [dr_flag],12 + je @f lea ebx,[eax*3] sub ebx,18 add eax,eax sub eax,12 mov [.xres3m18],ebx mov [.xres2m12],eax + jmp .f + @@: + lea ebx,[eax*4] + sub ebx,4*6 + add eax,eax + sub eax,3*4 + mov [.xres3m18],ebx + mov [.xres2m12],eax + .f: + + mov esi,[points_translated_ptr] .loop: @@ -3033,7 +2771,13 @@ draw_handlers: ; sub eax,3 imul eax,edx add eax,ebx + push eax lea edi,[eax*3] + cmp [dr_flag],12 + jne @f + add edi,[esp] + @@: + add esp,4 lea eax,[eax*2] ; draw bar 6x6 add edi,[screen_ptr] @@ -3049,13 +2793,20 @@ draw_handlers: push ecx mov ecx,6 - @@: + .do: mov word[edi],0x0000 ;ax mov byte[edi+2],0xff ;al mov word[eax],dx add eax,2 + cmp [dr_flag],12 + jne @f + add edi,4 + loop .do + jmp .ad + @@: add edi,3 - loop @b + loop .do + .ad: add edi,[.xres3m18] add eax,[.xres2m12] pop ecx @@ -3077,6 +2828,10 @@ ret fill_Z_buffer: mov eax,0x70000000 + cmp [dr_flag],12 + jne @f + mov eax,60000.1 + @@: mov edi,[Zbuffer_ptr] movzx ecx,word[size_x_var] movzx ebx,word[size_y_var]