diff --git a/data/Tupfile.lua b/data/Tupfile.lua index 57ebac914c..20dba98ff7 100644 --- a/data/Tupfile.lua +++ b/data/Tupfile.lua @@ -419,7 +419,7 @@ tup.append_table(img_files, { {"3D/FREE3D04", PROGS .. "/demos/free3d04/trunk/free3d04"}, {"3D/GEARS", PROGS .. "/develop/libraries/TinyGL/asm_fork/examples/gears"}, {"3D/RAY", PROGS .. "/demos/ray/ray"}, - {"3D/VIEW3DS", PROGS .. "/demos/3DS/VIEW3DS"}, + {"3D/VIEW3DS", PROGS .. "/demos/view3ds/view3ds"}, {"DEMOS/BCDCLK", PROGS .. "/demos/bcdclk/trunk/bcdclk"}, {"DEMOS/CIRCLE", PROGS .. "/develop/examples/circle/trunk/circle"}, {"DEMOS/COLORREF", PROGS .. "/demos/colorref/trunk/colorref"}, diff --git a/programs/demos/3DS/3GLASS.INC b/programs/demos/3DS/3GLASS.INC deleted file mode 100644 index 0b2efcc358..0000000000 --- a/programs/demos/3DS/3GLASS.INC +++ /dev/null @@ -1,550 +0,0 @@ -; Glass like rendering triangle by Maciej Guba. -; http://macgub.hekko.pl, macgub3@wp.pl - -ROUND2 equ 10 -glass_tri: -;----procedure render glass like triangle with z coord -- -;----interpolation ( Catmull alghoritm )----------------- -;----I normalize normal vector in every pixel ----------- -;------------------in - eax - x1 shl 16 + y1 ------------ -;---------------------- ebx - x2 shl 16 + y2 ------------ -;---------------------- ecx - x3 shl 16 + y3 ------------ -;---------------------- edx - ptr to stencil_buff ------- -;---------------------- esi - pointer to Z-buffer filled- -;---------------------- with dd float variables-------- -;---------------------- edi - pointer to screen buffer--- -;---------------------- xmm0 - 1st normal vector -------- -;---------------------- xmm1 - 2cond normal vector ------ -;---------------------- xmm2 - 3rd normal vector -------- -;---------------------- xmm3 - normalized light vector -- -;---------------------- xmm4 - lo -> hi z1, z2, z3 coords -;---------------------- as dwords floats --------------- -;---------------------- xmm5 - lo -> hi y_min, y_max, --- -;---------------------- x_min, x_max as dword integers - -;---------------------- stack - no parameters ----------- -;-------------------------------------------------------- -;----------------- procedure don't save registers !! ---- - - - - - push ebp - mov ebp,esp - sub esp,512 - sub ebp,16 - and ebp,0xfffffff0 - - .1_nv equ [ebp-16] - .2_nv equ [ebp-32] - .3_nv equ [ebp-48] - .l_v equ [ebp-64] - .z3 equ [ebp-72] - .z2 equ [ebp-76] - .z1 equ [ebp-80] - .x1 equ [ebp-82] - .y1 equ [ebp-84] - .x2 equ [ebp-86] - .y2 equ [ebp-88] - .x3 equ [ebp-90] - .y3 equ [ebp-92] - .Zbuf equ [ebp-96] - .x_max equ [ebp-100] - .x_min equ [ebp-104] - .y_max equ [ebp-108] - .y_min equ [ebp-112] - .screen equ [ebp-116] - .dx12 equ [ebp-120] - .dx13 equ [ebp-124] - .dx23 equ [ebp-128] - .dn12 equ [ebp-144] - .dn13 equ [ebp-160] - .dn23 equ [ebp-176] - .dz12 equ [ebp-180] - .dz13 equ [ebp-184] - .dz23 equ [ebp-188] - - .cnv1 equ [ebp-208] ; cur normal vectors - .cnv2 equ [ebp-224] - .cz2 equ [ebp-228] - .cz1 equ [ebp-232] - .stencil_buff equ [ebp-236] - - - - - .sort3: ; sort triangle coordinates... - cmp ax,bx - jle .sort1 - xchg eax,ebx - shufps xmm4,xmm4,11100001b - movaps xmm6,xmm0 - movaps xmm0,xmm1 - movaps xmm1,xmm6 - - - .sort1: - cmp bx,cx - jle .sort2 - xchg ebx,ecx - shufps xmm4,xmm4,11011000b - movaps xmm6,xmm1 - movaps xmm1,xmm2 - movaps xmm2,xmm6 - - jmp .sort3 - - .sort2: - - movaps .z1,xmm4 - mov .y1,eax - mov .y2,ebx - mov .y3,ecx - mov .stencil_buff, edx - - movdqa .y_min,xmm5 -if 1 ; check if at last only fragment - packssdw xmm5,xmm5 ; of triangle is in visable area - pshuflw xmm5,xmm5,11011000b - movdqu xmm7,.y3 - movdqa xmm6,xmm5 - pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min - pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max - movdqa xmm4,xmm7 - pcmpgtw xmm7,xmm5 - pcmpgtw xmm4,xmm6 - pxor xmm7,xmm4 - pmovmskb eax,xmm7 - and eax,0x00aaaaaa - or eax,eax - jz .rpt_loop2_end -end if - movaps .1_nv,xmm0 - movaps .2_nv,xmm1 - movaps .3_nv,xmm2 - movaps .l_v,xmm3 - ; mov .Zbuf,esi - mov .screen,edi - - - - mov bx,.y2 ; calc deltas - sub bx,.y1 - jnz .rpt_dx12_make - - xorps xmm7,xmm7 - mov dword .dx12,0 - mov dword .dz12,0 - movaps .dn12,xmm7 - jmp .rpt_dx12_done - - .rpt_dx12_make: - mov ax,.x2 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND2 - cdq - idiv ebx - mov .dx12,eax - - cvtsi2ss xmm6,ebx - movss xmm5,.z2 - subss xmm5,.z1 - divss xmm5,xmm6 - movss .dz12,xmm5 - - movaps xmm0,.2_nv - subps xmm0,.1_nv - shufps xmm6,xmm6,0 - divps xmm0,xmm6 - movaps .dn12,xmm0 - - - .rpt_dx12_done: - - mov bx,.y3 ; calc deltas - sub bx,.y1 - jnz .rpt_dx13_make - - xorps xmm7,xmm7 - mov dword .dx13,0 - mov dword .dz13,0 - movaps .dn13,xmm7 - jmp .rpt_dx13_done - - .rpt_dx13_make: - mov ax,.x3 - sub ax,.x1 - cwde - movsx ebx,bx - shl eax,ROUND2 - cdq - idiv ebx - mov .dx13,eax - - cvtsi2ss xmm6,ebx - movss xmm5,.z3 - subss xmm5,.z1 - divss xmm5,xmm6 - movss .dz13,xmm5 - - movaps xmm0,.3_nv - subps xmm0,.1_nv - shufps xmm6,xmm6,0 - divps xmm0,xmm6 - movaps .dn13,xmm0 - - .rpt_dx13_done: - - mov bx,.y3 ; calc deltas - sub bx,.y2 - jnz .rpt_dx23_make - - xorps xmm7,xmm7 - mov dword .dx23,0 - mov dword .dz23,0 - movaps .dn23,xmm7 - jmp .rpt_dx23_done - - .rpt_dx23_make: - mov ax,.x3 - sub ax,.x2 - cwde - movsx ebx,bx - shl eax,ROUND2 - cdq - idiv ebx - mov .dx23,eax - - cvtsi2ss xmm6,ebx - movss xmm5,.z3 - subss xmm5,.z2 - divss xmm5,xmm6 - movss .dz23,xmm5 - - movaps xmm0,.3_nv - subps xmm0,.2_nv - shufps xmm6,xmm6,0 - divps xmm0,xmm6 - movaps .dn23,xmm0 - - .rpt_dx23_done: - - - movsx eax,word .x1 - shl eax,ROUND2 - mov ebx,eax - mov edx,.z1 - mov .cz1,edx - mov .cz2,edx - movaps xmm0,.1_nv - movaps .cnv1,xmm0 - movaps .cnv2,xmm0 - - - movsx ecx,word .y1 - cmp cx,.y2 - - jge .rpt_loop1_end - - .rpt_loop1: - pushad - - movaps xmm2,.y_min - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - movlps xmm3,.cz1 - movaps xmm4,.l_v - sar ebx,ROUND2 - sar eax,ROUND2 - mov edx,.stencil_buff - mov edi,.screen - ; mov esi,.Zbuf - - call glass_line - - popad - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - movss xmm2,.cz1 - movss xmm3,.cz2 - addps xmm0,.dn13 - addps xmm1,.dn12 - addss xmm2,.dz13 - addss xmm3,.dz12 - add eax,.dx13 - add ebx,.dx12 - - movaps .cnv1,xmm0 - movaps .cnv2,xmm1 - movss .cz1,xmm2 - movss .cz2,xmm3 - - add ecx,1 - cmp cx,.y2 - jl .rpt_loop1 - - - - - - .rpt_loop1_end: - movsx ecx,word .y2 - cmp cx,.y3 - jge .rpt_loop2_end - - movsx ebx,word .x2 ; eax - cur x1 - shl ebx,ROUND2 ; ebx - cur x2 - push dword .z2 - pop dword .cz2 - movaps xmm0,.2_nv - movaps .cnv2,xmm0 - - - .rpt_loop2: - pushad - - movaps xmm2,.y_min - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - movlps xmm3,.cz1 - movaps xmm4,.l_v - sar ebx,ROUND2 - sar eax,ROUND2 - mov edx,.stencil_buff - mov edi,.screen - ; mov esi,.Zbuf - - call glass_line - - popad - movaps xmm0,.cnv1 - movaps xmm1,.cnv2 - movss xmm2,.cz1 - movss xmm3,.cz2 - addps xmm0,.dn13 - addps xmm1,.dn23 - addss xmm2,.dz13 - addss xmm3,.dz23 - add eax,.dx13 - add ebx,.dx23 - - movaps .cnv1,xmm0 - movaps .cnv2,xmm1 - movss .cz1,xmm2 - movss .cz2,xmm3 - - add ecx,1 - cmp cx,.y3 - jl .rpt_loop2 - - .rpt_loop2_end: - - add esp,512 - pop ebp - -ret -align 16 -glass_line: -; in: -; xmm0 - normal vector 1 -; xmm1 - normal vect 2 -; xmm3 - lo -> hi z1, z2 coords as dwords floats -; xmm2 - lo -> hi y_min, y_max, x_min, x_max -; as dword integers -; xmm4 - normalized light vector -; eax - x1 -; ebx - x2 -; ecx - y -; edx - stencil buff ptr -; edi - screen buffer -; esi - z buffer ===> not needed in glass rendering - - push ebp - mov ebp,esp - sub esp,256 - sub ebp,16 - and ebp,0xfffffff0 - - .n1 equ [ebp-16] - .n2 equ [ebp-32] - .lv equ [ebp-48] - .lx1 equ [ebp-52] - .lx2 equ [ebp-56] - .z2 equ [ebp-60] - .z1 equ [ebp-64] - .screen equ [ebp-68] - .zbuff equ [ebp-72] - .x_max equ [ebp-74] - .x_min equ [ebp-76] - .y_max equ [ebp-78] - .y_min equ [ebp-80] - .dn equ [ebp-96] - .dz equ [ebp-100] - .y equ [ebp-104] - .cnv equ [ebp-128] - .col_sum_b equ [ebp-136] - .col_sum_g equ [ebp-140] - .col_sum_r equ [ebp-144] - .cur_col equ [ebp-160] - .stencil_buf equ [ebp-164] - - mov .y,ecx - packssdw xmm2,xmm2 - movq .y_min,xmm2 - cmp cx,.y_min - jl .end_rp_line - cmp cx,.y_max - jge .end_rp_line ; - - cmp eax,ebx - je .end_rp_line - jl @f - xchg eax,ebx - movaps xmm7,xmm0 - movaps xmm0,xmm1 - movaps xmm1,xmm7 - shufps xmm3,xmm3,11100001b - @@: - - cmp ax,.x_max - jge .end_rp_line - cmp bx,.x_min - jle .end_rp_line - movaps .lv,xmm4 - movaps .n1,xmm0 - movaps .n2,xmm1 - mov .lx1,eax - mov .lx2,ebx - mov .stencil_buf,edx - movlps .z1,xmm3 - - sub ebx,eax - cvtsi2ss xmm7,ebx - shufps xmm7,xmm7,0 - subps xmm1,xmm0 - divps xmm1,xmm7 - movaps .dn,xmm1 - psrldq xmm3,4 - subss xmm3,.z1 - divss xmm3,xmm7 - movss .dz,xmm3 - - - - mov ebx,.lx1 - cmp bx,.x_min ; clipping on function4 - jge @f - movzx eax,word .x_min - sub eax,ebx - cvtsi2ss xmm7,eax - shufps xmm7,xmm7,0 - mulss xmm3,xmm7 - mulps xmm1,xmm7 - addss xmm3,.z1 - addps xmm1,.n1 - movsx eax,word .x_min - movss .z1,xmm3 - movaps .n1,xmm1 - mov dword .lx1,eax - - @@: - movzx eax,word .x_max - cmp .lx2,eax - jl @f - mov .lx2,eax - @@: - movzx eax,word[xres_var] - mul dword .y - - - add eax,.lx1 - shl eax,2 - add edi,eax - mov ebx,eax - add ebx,.stencil_buf - - - mov ecx,.lx2 - sub ecx,.lx1 - - - movaps xmm0,.n1 - movss xmm2,.z1 -align 16 - .ddraw: - movaps xmm7,xmm0 - mulps xmm7,xmm7 ; normalize - haddps xmm7,xmm7 - haddps xmm7,xmm7 - rsqrtps xmm7,xmm7 - mulps xmm7,xmm0 - maxps xmm7,[the_zero] - movups .cnv,xmm7 - - mov edx,lights_aligned ; lights_aligned - global variable - xorps xmm1,xmm1 ; instead global can be used .lv - light vect. - - .again_col: - movups xmm7,.cnv - mulps xmm7,[edx] - haddps xmm7,xmm7 - haddps xmm7,xmm7 - if 0 - cmp [bump_flag],1 ; on/off temporaly - ; depend on bump button - je @f - ; stencil - movss xmm5,xmm2 - movss xmm6,xmm2 - addss xmm5,[aprox] - subss xmm6,[aprox] - ; Stencil buffer for now not work as I expected, - ; moreover - it not work at all. - cmpnltss xmm5,dword[ebx] - cmpnltss xmm6,dword[ebx] - xorps xmm5,xmm6 - xorps xmm6,xmm6 - movd eax,xmm5 - cmp eax,-1 - jne .no_reflective - end if - @@: - movaps xmm6,xmm7 - mulps xmm6,xmm6 - mulps xmm6,xmm6 - - mulps xmm6,xmm6 - mulps xmm6,[edx+48] - .no_reflective: - mulps xmm7,[edx+16] - addps xmm7,xmm6 - addps xmm7,[edx+32] - minps xmm7,[mask_255f] ; global - - - maxps xmm1,xmm7 - add edx,64 ; size of one light in aligned list - cmp edx,lights_aligned_end - jl .again_col - cvtps2dq xmm1,xmm1 - movd xmm6,[edi] - packssdw xmm1,xmm1 - packuswb xmm1,xmm1 - paddusb xmm1,xmm6 - movd [edi],xmm1 - - - .skip: - add edi,4 - add ebx,4 ; stencil_buff - addps xmm0,.dn - addss xmm2,.dz - sub ecx,1 - jnz .ddraw - - .end_rp_line: - add esp,256 - pop ebp - -ret diff --git a/programs/demos/3DS/GRD_LINE.INC b/programs/demos/3DS/GRD_LINE.INC deleted file mode 100644 index 8335e0b448..0000000000 --- a/programs/demos/3DS/GRD_LINE.INC +++ /dev/null @@ -1,643 +0,0 @@ -;-procedure draws smooth shaded lines (I mean interpolation 24 bit-- -;-color), with z coord interpolation-------------------------------- -;-author: Maciej Guba (www.macgub.hekko.pl)------------------------- -;-in : ------------------------------------------------------------- -;----- edi - pointer to screen buffer ------------------------------ -;----- esi - pointer to Z buffer ----------------------------------- -;------ constans : SIZE_X, SIZE_Y - screen width and height--------- -;----------------- ROUND - fixed point shift------------------------ -;------ other parameters via stack---------------------------------- -smooth_line: -.x1 equ ebp+4 -.y1 equ ebp+6 -.z1 equ ebp+8 -.r1 equ ebp+10 -.g1 equ ebp+12 -.b1 equ ebp+14 -.x2 equ ebp+16 -.y2 equ ebp+18 -.z2 equ ebp+20 -.r2 equ ebp+22 -.g2 equ ebp+24 -.b2 equ ebp+26 - - -.line_lenght equ ebp-2 -.delta equ ebp-6 -.delta_x equ ebp-10 -.delta_y equ ebp-14 -.dr equ ebp-18 -.dg equ ebp-22 -.db equ ebp-26 -.dz equ ebp-30 -.cr equ ebp-34 -.cg equ ebp-38 -.cb equ ebp-42 -.cz equ ebp-46 - -;.line_lenght equ ebp-48 -.screen equ ebp-52 -.zbuffer equ ebp-56 -.ccoord equ ebp-60 ;current coordinate -.czbuf equ ebp-64 -.cscr equ ebp-68 -.xres equ ebp-72 -.yres equ ebp-76 -.xresm1 equ ebp-80 -.yresm1 equ ebp-84 -.xresp1 equ ebp-88 -.yresp1 equ ebp-92 -.xres3 equ ebp-96 -.xres4 equ ebp-100 - -macro .update_cur_var -{ -if Ext=NON - mov ebx,[.dz] - add [.cz],ebx - mov ebx,[.dr] - add [.cr],ebx - mov ebx,[.dg] - add [.cg],ebx - mov ebx,[.db] - add [.cb],ebx -elseif Ext=MMX - movq mm0,[.cz] - movq mm1,[.cg] - paddd mm0,mm2 ;[.dz] - paddd mm1,mm3 ;[.dg] - movq [.cz],mm0 - movq [.cg],mm1 -elseif Ext >= SSE2 -; movups xmm1,[.cz] - paddd xmm1,xmm0 -; movups [.cz],xmm1 -end if -} -macro .draw_pixel -{ - mov [esi],ebx ; actualize Z buffer -if Ext>=SSE2 - movaps xmm7,xmm1 ;[.cb] ;;xmm1 - shufps xmm7,xmm7,00111001b - psrld xmm7,ROUND - packssdw xmm7,xmm7 - packuswb xmm7,xmm7 - pand xmm7,xmm6 ;[.mask] - movd [edi],xmm7 -else - - mov eax,[.cb] - sar eax,ROUND - mov [edi],al -; and eax,0x000000ff ; clean unused bits - mov ebx,[.cg] - sar ebx,ROUND - mov [edi+1],bl -; mov ah,bl - mov edx,[.cr] - sar edx,ROUND - mov [edi+2],dl -end if -; shl ebx,16 -; or eax,ebx -; mov [edi],eax -} -macro .sort -{ - -if Ext >= MMX - movq mm0,[.x1] - movq mm1,[.x2] - movq [.x1],mm1 - movq [.x2],mm0 -else - mov edx,[.x1] - xchg edx,[.x2] - mov [.x1],edx - mov edx,[.z1] - xchg edx,[.z2] - mov [.z1],edx -end if - mov edx,[.g1] - xchg edx,[.g2] - mov [.g1],edx -} - - - - emms - mov ebp,esp - sub esp,128 - mov eax,[.x1] ; check if parameters exceedes screen area - mov ebx,[.x2] - or eax,ebx - test eax,80008000h - jne .end_line - movzx edx,word [size_x_var] - mov [.xres],edx - dec edx - movzx ecx,word [size_y_var] - mov [.yres],ecx - dec ecx - cmp word[.x1],dx ;SIZE_X - jg .end_line - cmp word[.x2],dx ;SIZE_X - jg .end_line - cmp word[.y1],cx ;SIZE_Y - jg .end_line - cmp word[.y2],cx ;SIZE_Y - jg .end_line - - mov edx,[.xres] - shl edx,2 - mov [.xres4],edx - shr edx,2 - lea edx,[edx*3] - mov [.xres3],edx - mov edx,[.xres] - mov ecx,[.yres] - dec edx - dec ecx - mov [.xresm1],edx - mov [.yresm1],ecx - add edx,2 - add ecx,2 - mov [.xresp1],edx - mov [.yresp1],ecx - - mov [.screen],edi - mov cx,[.x1] - cmp cx,[.x2] - je .vertical_l - mov cx,[.y1] - cmp cx,[.y2] - je .horizontal_l - mov ax,[.x1] - sub ax,[.x2] - cmp ax,0 - jg @f - neg ax ; calc absolute value - @@: - mov [.delta_x],ax - mov bx,[.y1] - sub bx,[.y2] - cmp bx,0 - jg @f - neg bx - @@: - mov [.delta_y],bx - cmp ax,bx - je .deg45_l - jl .more_vertical_l - jg .more_horizon_l - jmp .end_line - ; -.horizontal_l: - mov ax,[.x1] - mov bx,[.x2] - cmp bx,ax - jge @f - - .sort -@@: - - mov bx,[.x2] - sub bx,[.x1] - movsx ebx,bx - cmp ebx,0 ;line lenght equql 0 - je .end_line - mov [.delta_x],ebx - - call .calc_delta - - mov eax,[.xres] ;SIZE_X - movsx ebx,word[.y1] - mul ebx - add esi,eax - lea eax,[eax*3] - add esi,eax - add edi,eax - movsx eax,word[.x1] - add esi,eax - lea eax,[eax*3] - add edi,eax - add esi,eax - - mov ecx,[.delta_x] - - movsx ebx,word[.r1] - shl ebx,ROUND - mov [.cr],ebx - movsx ebx,word[.g1] - shl ebx,ROUND - mov [.cg],ebx - movsx ebx,word[.b1] - shl ebx,ROUND - mov [.cb],ebx - movsx ebx,word[.z1] - shl ebx,ROUND - mov [.cz],ebx -if Ext = SSE2 - movups xmm1,[.cz] -end if -.hdraw: -if Ext = SSE2 - movd ebx,xmm1 -else - mov ebx,[.cz] -end if - cmp [esi],ebx - jle .skip - - .draw_pixel - -.skip: - add edi,3 - add esi,4 - - .update_cur_var - - loop .hdraw - jmp .end_line - -.vertical_l: - mov ax,[.y1] - cmp [.y2],ax - jge @f - - .sort -@@: - mov bx,[.y2] - sub bx,[.y1] - movsx ebx,bx - cmp ebx,0 - je .end_line - mov [.delta_y],ebx - - call .calc_delta - - mov eax,[.xres] ;SIZE_X - movsx ebx,word[.y1] - mul ebx - add esi,eax - lea eax,[eax*3] - add edi,eax - add esi,eax - movsx eax,word[.x1] - add esi,eax - lea eax,[eax*3] - add esi,eax - add edi,eax - - mov ecx,[.delta_y] - - movsx ebx,word[.r1] - shl ebx,ROUND - mov [.cr],ebx - movsx ebx,word[.g1] - shl ebx,ROUND - mov [.cg],ebx - movsx ebx,word[.b1] - shl ebx,ROUND - mov [.cb],ebx - movsx ebx,word[.z1] - shl ebx,ROUND - mov [.cz],ebx -if Ext = SSE2 - movups xmm1,[.cz] -end if - -.v_draw: -if Ext = SSE2 - movd ebx,xmm1 -else - mov ebx,[.cz] -end if - cmp [esi],ebx - jle @f - - .draw_pixel - -@@: - add edi,[.xres3] - add esi,[.xres4] - - .update_cur_var - - loop .v_draw - jmp .end_line -.deg45_l: - mov word[.line_lenght],ax - mov ax,[.x1] - cmp [.x2],ax - jge @f - - .sort -@@: - mov bx,[.y2] - sub bx,[.y1] - movsx ebx,bx - cmp ebx,0 - je .end_line - mov [.delta_y],ebx - mov bx,[.x2] - sub bx,[.x1] - movsx ebx,bx - mov [.delta_x],ebx - - call .calc_delta - - mov eax,[.xres] - movsx ebx,word[.y1] ;calc begin values in screen and Z buffers - mul ebx - lea ebx,[3*eax] - add edi,ebx - shl eax,2 - add esi,eax - movsx eax,word[.x1] - lea ebx,[eax*3] - add edi,ebx - shl eax,2 - add esi,eax - - movzx ecx,word[.line_lenght] - - movsx ebx,word[.r1] - shl ebx,ROUND - mov [.cr],ebx - movsx ebx,word[.g1] - shl ebx,ROUND - mov [.cg],ebx - movsx ebx,word[.b1] - shl ebx,ROUND - mov [.cb],ebx - movsx ebx,word[.z1] - shl ebx,ROUND - mov [.cz],ebx -.d45_draw: -if Ext = SSE2 - movd ebx,xmm1 -else - mov ebx,[.cz] -end if - cmp [esi],ebx - jle @f - - .draw_pixel - -@@: - cmp dword[.delta_y],0 - jl @f - add edi,[.xres3] ;SIZE_X*3+3 - add edi,3 - add esi,[.xres4] ;SIZE_X*4+4 - add esi,4 - jmp .d45_1 -@@: - sub edi,[.xres3] ;(SIZE_X*3)-3 - sub edi,3 - sub esi,[.xres4] ;(SIZE_X*4)-4 - sub esi,4 -.d45_1: - .update_cur_var - - loop .d45_draw - jmp .end_line - -.more_vertical_l: - mov word[.line_lenght],bx - mov ax,[.y1] - cmp [.y2],ax - jge @f - .sort -@@: - mov bx,[.y2] - sub bx,[.y1] - movsx ebx,bx - cmp ebx,0 - je .end_line ;======================= - mov [.delta_y],ebx - - mov ax,[.x2] - sub ax,[.x1] - cwde - shl eax,ROUND - cdq - idiv ebx - mov [.delta],eax - - call .calc_delta - - mov eax,[.xres] ;SIZE_X - movsx ebx,word[.y1] ;calc begin values in screen and Z buffers - mul ebx - lea ebx,[3*eax] - add esi,ebx - add esi,eax - add edi,ebx - mov [.cscr],edi - mov [.czbuf],esi - - movzx ecx,word[.line_lenght] - - movsx ebx,word[.r1] - shl ebx,ROUND - mov [.cr],ebx - movsx ebx,word[.g1] - shl ebx,ROUND - mov [.cg],ebx - movsx ebx,word[.b1] - shl ebx,ROUND - mov [.cb],ebx - movsx ebx,word[.z1] - shl ebx,ROUND - mov [.cz],ebx -if Ext = SSE2 - movups xmm1,[.cz] -end if - movsx ebx,word[.x1] - shl ebx,ROUND - mov [.ccoord],ebx ; .ccoord -> x coordinate -.draw_m_v: - mov edi,[.cscr] - mov esi,[.czbuf] - mov eax,[.ccoord] - sar eax,ROUND - lea ebx,[eax*3] - add edi,ebx - add esi,ebx - add esi,eax -if Ext = SSE2 - movd ebx,xmm1 -else - mov ebx,[.cz] -end if - cmp [esi],ebx - jle @f - - .draw_pixel - -@@: - mov eax,[.delta] - mov ebx,[.xres3] - add [.ccoord],eax - mov eax,[.xres4] - add dword[.cscr],ebx ;SIZE_X*3 ; - add dword[.czbuf],eax ;SIZE_X*4 -.d_m_v1: - - .update_cur_var - - dec ecx - jnz .draw_m_v - jmp .end_line - -.more_horizon_l: - mov word[.line_lenght],ax - mov ax,[.x1] - cmp [.x2],ax - jge @f - - .sort -@@: - mov bx,[.x2] - sub bx,[.x1] - movsx ebx,bx - cmp ebx,0;======================= - je .end_line - mov [.delta_x],ebx - - mov ax,[.y2] - sub ax,[.y1] - cwde - shl eax,ROUND - cdq - idiv ebx - mov [.delta],eax - - call .calc_delta - - ;calc begin values in screen and Z buffers - movsx ebx,word[.x1] - mov eax,ebx - add esi,ebx - lea ebx,[3*ebx] - add esi,ebx - add edi,ebx - mov [.cscr],edi - mov [.czbuf],esi - - movzx ecx,word[.line_lenght] - - movsx ebx,word[.r1] - shl ebx,ROUND - mov [.cr],ebx - movsx ebx,word[.g1] - shl ebx,ROUND - mov [.cg],ebx - movsx ebx,word[.b1] - shl ebx,ROUND - mov [.cb],ebx - movsx ebx,word[.z1] - shl ebx,ROUND - mov [.cz],ebx -if Ext = SSE2 - movups xmm1,[.cz] -end if - movsx ebx,word[.y1] - shl ebx,ROUND - mov [.ccoord],ebx ; .ccoord -> y coordinate - -.draw_m_h: - mov edi,[.cscr] - mov esi,[.czbuf] - mov eax,[.ccoord] ; ccoord - cur y coordinate - sar eax,ROUND - mov ebx,[.xres] ;SIZE_X - mul ebx - add esi,eax - lea eax,[eax*3] - add esi,eax - add edi,eax -if Ext = SSE2 - movd ebx,xmm1 -else - mov ebx,[.cz] -end if - cmp [esi],ebx - jle @f - - .draw_pixel - -@@: - mov eax,[.delta] - add [.ccoord],eax - add dword[.cscr],3 ; - add dword[.czbuf],4 - - .update_cur_var - - dec ecx - jnz .draw_m_h - -.end_line: - mov esp,ebp - ret 24 - -.calc_delta: - mov ax,[.z2] - sub ax,[.z1] - cwde - shl eax,ROUND - cdq - idiv ebx - mov [.dz],eax - - mov ax,[.r2] - sub ax,[.r1] - cwde - shl eax,ROUND - cdq - idiv ebx - mov [.dr],eax - - mov ax,[.g2] - sub ax,[.g1] - cwde - shl eax,ROUND - cdq - idiv ebx - mov [.dg],eax - - mov ax,[.b2] - sub ax,[.b1] - cwde - shl eax,ROUND - cdq - idiv ebx - mov [.db],eax -if Ext=MMX | Ext = SSE - movq mm2,[.dz] - movq mm3,[.dg] -else if Ext >= SSE2 - movups xmm0,[.dz] - movups xmm6,[.mask] -end if -ret -.mask: - dq 0xffffffff00ffffff - dq 0xffffffffffffffff - - - - - - - - - - - - - diff --git a/programs/demos/3DS/Tupfile.lua b/programs/demos/3DS/Tupfile.lua deleted file mode 100644 index 1d71eb4e60..0000000000 --- a/programs/demos/3DS/Tupfile.lua +++ /dev/null @@ -1,2 +0,0 @@ -if tup.getconfig("NO_FASM") ~= "" then return end -tup.rule("VIEW3DS.ASM", "fasm %f %o " .. tup.getconfig("KPACK_CMD"), "VIEW3DS") diff --git a/programs/demos/3DS/build.bat b/programs/demos/3DS/build.bat deleted file mode 100644 index ce7745d46a..0000000000 --- a/programs/demos/3DS/build.bat +++ /dev/null @@ -1,3 +0,0 @@ -@fasm VIEW3DS.asm VIEW3DS -@kpack VIEW3DS -@pause \ No newline at end of file diff --git a/programs/demos/3DS/3DMATH.INC b/programs/demos/view3ds/3dmath.inc similarity index 91% rename from programs/demos/3DS/3DMATH.INC rename to programs/demos/view3ds/3dmath.inc index 028df61912..43b492b324 100644 --- a/programs/demos/3DS/3DMATH.INC +++ b/programs/demos/view3ds/3dmath.inc @@ -198,7 +198,24 @@ ret ;---------------------------- edi - pointer to vector ----- ;----------------------- out : none normalize_vector: -if Ext >= SSE3 +if Ext = SSE2 | Ext = SSE | Ext = SSE3 + movups xmm0,[edi] + andps xmm0,[zero_hgst_dd] + movups xmm1,xmm0 + mulps xmm0,xmm0 + movhlps xmm2,xmm0 + addps xmm0,xmm2 + movaps xmm2,xmm0 + shufps xmm2,xmm2,11100001b + addps xmm0,xmm2 + shufps xmm0,xmm0,0 + rsqrtps xmm0,xmm0 + mulps xmm0,xmm1 + movlps [edi],xmm0 + movhlps xmm0,xmm0 + movss [edi+8],xmm0 +end if +if 0 ; Ext >= SSE3 movups xmm0,[edi] andps xmm0,[zero_hgst_dd] movups xmm1,xmm0 @@ -210,7 +227,8 @@ if Ext >= SSE3 movlps [edi],xmm0 movhlps xmm0,xmm0 movss [edi+8],xmm0 -else +end if +if Ext < SSE fninit fld dword [edi+vec_x] diff --git a/programs/demos/3DS/3ds_objects/03.3ds b/programs/demos/view3ds/3ds_objects/03.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/03.3ds rename to programs/demos/view3ds/3ds_objects/03.3ds diff --git a/programs/demos/3DS/3ds_objects/House.3ds b/programs/demos/view3ds/3ds_objects/House.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/House.3ds rename to programs/demos/view3ds/3ds_objects/House.3ds diff --git a/programs/demos/3DS/3ds_objects/UV_Cyl.3ds b/programs/demos/view3ds/3ds_objects/UV_Cyl.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/UV_Cyl.3ds rename to programs/demos/view3ds/3ds_objects/UV_Cyl.3ds diff --git a/programs/demos/3DS/3ds_objects/ZS_Mouth.3DS b/programs/demos/view3ds/3ds_objects/ZS_Mouth.3DS similarity index 100% rename from programs/demos/3DS/3ds_objects/ZS_Mouth.3DS rename to programs/demos/view3ds/3ds_objects/ZS_Mouth.3DS diff --git a/programs/demos/3DS/3ds_objects/ZS_Pos3.3DS b/programs/demos/view3ds/3ds_objects/ZS_Pos3.3DS similarity index 100% rename from programs/demos/3DS/3ds_objects/ZS_Pos3.3DS rename to programs/demos/view3ds/3ds_objects/ZS_Pos3.3DS diff --git a/programs/demos/3DS/3ds_objects/ZS_Sword.3DS b/programs/demos/view3ds/3ds_objects/ZS_Sword.3DS similarity index 100% rename from programs/demos/3DS/3ds_objects/ZS_Sword.3DS rename to programs/demos/view3ds/3ds_objects/ZS_Sword.3DS diff --git a/programs/demos/3DS/3ds_objects/cone.3ds b/programs/demos/view3ds/3ds_objects/cone.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/cone.3ds rename to programs/demos/view3ds/3ds_objects/cone.3ds diff --git a/programs/demos/3DS/3ds_objects/cube.3ds b/programs/demos/view3ds/3ds_objects/cube.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/cube.3ds rename to programs/demos/view3ds/3ds_objects/cube.3ds diff --git a/programs/demos/3DS/3ds_objects/face.3DS b/programs/demos/view3ds/3ds_objects/face.3DS similarity index 100% rename from programs/demos/3DS/3ds_objects/face.3DS rename to programs/demos/view3ds/3ds_objects/face.3DS diff --git a/programs/demos/3DS/3ds_objects/fighter.3ds b/programs/demos/view3ds/3ds_objects/fighter.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/fighter.3ds rename to programs/demos/view3ds/3ds_objects/fighter.3ds diff --git a/programs/demos/3DS/3ds_objects/hrt.3ds b/programs/demos/view3ds/3ds_objects/hrt.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/hrt.3ds rename to programs/demos/view3ds/3ds_objects/hrt.3ds diff --git a/programs/demos/3DS/3ds_objects/iron.3ds b/programs/demos/view3ds/3ds_objects/iron.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/iron.3ds rename to programs/demos/view3ds/3ds_objects/iron.3ds diff --git a/programs/demos/3DS/3ds_objects/knight.3ds b/programs/demos/view3ds/3ds_objects/knight.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/knight.3ds rename to programs/demos/view3ds/3ds_objects/knight.3ds diff --git a/programs/demos/3DS/3ds_objects/knot.3ds b/programs/demos/view3ds/3ds_objects/knot.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/knot.3ds rename to programs/demos/view3ds/3ds_objects/knot.3ds diff --git a/programs/demos/3DS/3ds_objects/opacity2.3ds b/programs/demos/view3ds/3ds_objects/opacity2.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/opacity2.3ds rename to programs/demos/view3ds/3ds_objects/opacity2.3ds diff --git a/programs/demos/3DS/3ds_objects/second-A.3ds b/programs/demos/view3ds/3ds_objects/second-A.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/second-A.3ds rename to programs/demos/view3ds/3ds_objects/second-A.3ds diff --git a/programs/demos/3DS/3ds_objects/shield1.3ds b/programs/demos/view3ds/3ds_objects/shield1.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/shield1.3ds rename to programs/demos/view3ds/3ds_objects/shield1.3ds diff --git a/programs/demos/3DS/3ds_objects/sink.3ds b/programs/demos/view3ds/3ds_objects/sink.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/sink.3ds rename to programs/demos/view3ds/3ds_objects/sink.3ds diff --git a/programs/demos/3DS/3ds_objects/surf32.3ds b/programs/demos/view3ds/3ds_objects/surf32.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/surf32.3ds rename to programs/demos/view3ds/3ds_objects/surf32.3ds diff --git a/programs/demos/3DS/3ds_objects/teapot.3ds b/programs/demos/view3ds/3ds_objects/teapot.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/teapot.3ds rename to programs/demos/view3ds/3ds_objects/teapot.3ds diff --git a/programs/demos/3DS/3ds_objects/torus0.3ds b/programs/demos/view3ds/3ds_objects/torus0.3ds similarity index 100% rename from programs/demos/3DS/3ds_objects/torus0.3ds rename to programs/demos/view3ds/3ds_objects/torus0.3ds diff --git a/programs/demos/view3ds/3glass.inc b/programs/demos/view3ds/3glass.inc new file mode 100644 index 0000000000..e4e0d495cf --- /dev/null +++ b/programs/demos/view3ds/3glass.inc @@ -0,0 +1,543 @@ +; Glass like rendering triangle by Maciej Guba. +; http://macgub.hekko.pl, macgub3@wp.pl + +ROUND2 equ 10 +glass_tri: +;----procedure render glass like triangle with z coord -- +;----interpolation ( Catmull alghoritm )----------------- +;----I normalize normal vector in every pixel ----------- +;------------------in - eax - x1 shl 16 + y1 ------------ +;---------------------- ebx - x2 shl 16 + y2 ------------ +;---------------------- ecx - x3 shl 16 + y3 ------------ +;---------------------- edx - ptr to stencil_buff ------- +;---------------------- esi - pointer to Z-buffer filled- +;---------------------- with dd float variables-------- +;---------------------- edi - pointer to screen buffer--- +;---------------------- xmm0 - 1st normal vector -------- +;---------------------- xmm1 - 2cond normal vector ------ +;---------------------- xmm2 - 3rd normal vector -------- +;---------------------- xmm3 - normalized light vector -- +;---------------------- xmm4 - lo -> hi z1, z2, z3 coords +;---------------------- as dwords floats --------------- +;---------------------- xmm5 - lo -> hi y_min, y_max, --- +;---------------------- x_min, x_max as dword integers - +;---------------------- stack - no parameters ----------- +;-------------------------------------------------------- +;----------------- procedure don't save registers !! ---- + + + + + push ebp + mov ebp,esp + sub esp,512 + sub ebp,16 + and ebp,0xfffffff0 + + .1_nv equ [ebp-16] + .2_nv equ [ebp-32] + .3_nv equ [ebp-48] + .l_v equ [ebp-64] + .z3 equ [ebp-72] + .z2 equ [ebp-76] + .z1 equ [ebp-80] + .x1 equ [ebp-82] + .y1 equ [ebp-84] + .x2 equ [ebp-86] + .y2 equ [ebp-88] + .x3 equ [ebp-90] + .y3 equ [ebp-92] + .Zbuf equ [ebp-96] + .x_max equ [ebp-100] + .x_min equ [ebp-104] + .y_max equ [ebp-108] + .y_min equ [ebp-112] + .screen equ [ebp-116] + .dx12 equ [ebp-120] + .dx13 equ [ebp-124] + .dx23 equ [ebp-128] + .dn12 equ [ebp-144] + .dn13 equ [ebp-160] + .dn23 equ [ebp-176] + .dz12 equ [ebp-180] + .dz13 equ [ebp-184] + .dz23 equ [ebp-188] + + .cnv1 equ [ebp-208] ; cur normal vectors + .cnv2 equ [ebp-224] + .cz2 equ [ebp-228] + .cz1 equ [ebp-232] + .stencil_buff equ [ebp-236] + + + + + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + shufps xmm4,xmm4,11100001b + movaps xmm6,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm6 + + + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + shufps xmm4,xmm4,11011000b + movaps xmm6,xmm1 + movaps xmm1,xmm2 + movaps xmm2,xmm6 + + jmp .sort3 + + .sort2: + + movaps .z1,xmm4 + mov .y1,eax + mov .y2,ebx + mov .y3,ecx + mov .stencil_buff, edx + + movdqa .y_min,xmm5 +if 1 ; check if at last only fragment + packssdw xmm5,xmm5 ; of triangle is in visable area + pshuflw xmm5,xmm5,11011000b + movdqu xmm7,.y3 + movdqa xmm6,xmm5 + pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min + pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max + movdqa xmm4,xmm7 + pcmpgtw xmm7,xmm5 + pcmpgtw xmm4,xmm6 + pxor xmm7,xmm4 + pmovmskb eax,xmm7 + and eax,0x00aaaaaa + or eax,eax + jz .rpt_loop2_end +end if + movaps .1_nv,xmm0 + movaps .2_nv,xmm1 + movaps .3_nv,xmm2 + movaps .l_v,xmm3 + ; mov .Zbuf,esi + mov .screen,edi + + + + mov bx,.y2 ; calc deltas + sub bx,.y1 + jnz .rpt_dx12_make + + xorps xmm7,xmm7 + mov dword .dx12,0 + mov dword .dz12,0 + movaps .dn12,xmm7 + jmp .rpt_dx12_done + + .rpt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx12,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z2 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz12,xmm5 + + movaps xmm0,.2_nv + subps xmm0,.1_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn12,xmm0 + + + .rpt_dx12_done: + + mov bx,.y3 ; calc deltas + sub bx,.y1 + jnz .rpt_dx13_make + + xorps xmm7,xmm7 + mov dword .dx13,0 + mov dword .dz13,0 + movaps .dn13,xmm7 + jmp .rpt_dx13_done + + .rpt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx13,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz13,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.1_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn13,xmm0 + + .rpt_dx13_done: + + mov bx,.y3 ; calc deltas + sub bx,.y2 + jnz .rpt_dx23_make + + xorps xmm7,xmm7 + mov dword .dx23,0 + mov dword .dz23,0 + movaps .dn23,xmm7 + jmp .rpt_dx23_done + + .rpt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx23,eax + + cvtsi2ss xmm6,ebx + movss xmm5,.z3 + subss xmm5,.z2 + divss xmm5,xmm6 + movss .dz23,xmm5 + + movaps xmm0,.3_nv + subps xmm0,.2_nv + shufps xmm6,xmm6,0 + divps xmm0,xmm6 + movaps .dn23,xmm0 + + .rpt_dx23_done: + + + movsx eax,word .x1 + shl eax,ROUND2 + mov ebx,eax + mov edx,.z1 + mov .cz1,edx + mov .cz2,edx + movaps xmm0,.1_nv + movaps .cnv1,xmm0 + movaps .cnv2,xmm0 + + + movsx ecx,word .y1 + cmp cx,.y2 + + jge .rpt_loop1_end + + .rpt_loop1: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edx,.stencil_buff + mov edi,.screen + ; mov esi,.Zbuf + + call glass_line + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movss xmm2,.cz1 + movss xmm3,.cz2 + addps xmm0,.dn13 + addps xmm1,.dn12 + addss xmm2,.dz13 + addss xmm3,.dz12 + add eax,.dx13 + add ebx,.dx12 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y2 + jl .rpt_loop1 + + + + + + .rpt_loop1_end: + movsx ecx,word .y2 + cmp cx,.y3 + jge .rpt_loop2_end + + movsx ebx,word .x2 ; eax - cur x1 + shl ebx,ROUND2 ; ebx - cur x2 + push dword .z2 + pop dword .cz2 + movaps xmm0,.2_nv + movaps .cnv2,xmm0 + + + .rpt_loop2: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movlps xmm3,.cz1 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edx,.stencil_buff + mov edi,.screen + ; mov esi,.Zbuf + + call glass_line + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movss xmm2,.cz1 + movss xmm3,.cz2 + addps xmm0,.dn13 + addps xmm1,.dn23 + addss xmm2,.dz13 + addss xmm3,.dz23 + add eax,.dx13 + add ebx,.dx23 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movss .cz1,xmm2 + movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y3 + jl .rpt_loop2 + + .rpt_loop2_end: + + add esp,512 + pop ebp + +ret +align 16 +glass_line: +; in: +; xmm0 - normal vector 1 +; xmm1 - normal vect 2 +; xmm3 - lo -> hi z1, z2 coords as dwords floats +; xmm2 - lo -> hi y_min, y_max, x_min, x_max +; as dword integers +; xmm4 - normalized light vector +; eax - x1 +; ebx - x2 +; ecx - y +; edx - stencil buff ptr +; edi - screen buffer +; esi - z buffer ===> not needed in glass rendering + + push ebp + mov ebp,esp + sub esp,256 + sub ebp,16 + and ebp,0xfffffff0 + + .n1 equ [ebp-16] + .n2 equ [ebp-32] + .lv equ [ebp-48] + .lx1 equ [ebp-52] + .lx2 equ [ebp-56] + .z2 equ [ebp-60] + .z1 equ [ebp-64] + .screen equ [ebp-68] + .zbuff equ [ebp-72] + .x_max equ [ebp-74] + .x_min equ [ebp-76] + .y_max equ [ebp-78] + .y_min equ [ebp-80] + .dn equ [ebp-96] + .dz equ [ebp-100] + .y equ [ebp-104] + .cnv equ [ebp-128] + .col_sum_b equ [ebp-136] + .col_sum_g equ [ebp-140] + .col_sum_r equ [ebp-144] + .cur_col equ [ebp-160] + .stencil_buf equ [ebp-164] + + mov .y,ecx + packssdw xmm2,xmm2 + movq .y_min,xmm2 + cmp cx,.y_min + jl .end_rp_line + cmp cx,.y_max + jge .end_rp_line ; + + cmp eax,ebx + je .end_rp_line + jl @f + xchg eax,ebx + movaps xmm7,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm7 + shufps xmm3,xmm3,11100001b + @@: + + cmp ax,.x_max + jge .end_rp_line + cmp bx,.x_min + jle .end_rp_line + movaps .lv,xmm4 + movaps .n1,xmm0 + movaps .n2,xmm1 + mov .lx1,eax + mov .lx2,ebx + mov .stencil_buf,edx + movlps .z1,xmm3 + + sub ebx,eax + cvtsi2ss xmm7,ebx + shufps xmm7,xmm7,0 + subps xmm1,xmm0 + divps xmm1,xmm7 + movaps .dn,xmm1 + psrldq xmm3,4 + subss xmm3,.z1 + divss xmm3,xmm7 + movss .dz,xmm3 + + mov ebx,.lx1 + cmp bx,.x_min ; clipping on function4 + jge @f + movzx eax,word .x_min + sub eax,ebx + cvtsi2ss xmm7,eax + shufps xmm7,xmm7,0 + mulss xmm3,xmm7 + mulps xmm1,xmm7 + addss xmm3,.z1 + addps xmm1,.n1 + movsx eax,word .x_min + movss .z1,xmm3 + movaps .n1,xmm1 + mov dword .lx1,eax + + @@: + movzx eax,word .x_max + cmp .lx2,eax + jl @f + mov .lx2,eax + @@: + movzx eax,word[xres_var] + mul dword .y + + add eax,.lx1 + shl eax,2 + add edi,eax + mov ebx,eax + add ebx,.stencil_buf + + + mov ecx,.lx2 + sub ecx,.lx1 + + movaps xmm0,.n1 + movss xmm2,.z1 +align 16 + .ddraw: + movaps xmm7,xmm0 + mulps xmm7,xmm7 ; normalize + andps xmm7,[zero_hgst_dd] + haddps xmm7,xmm7 + haddps xmm7,xmm7 + rsqrtps xmm7,xmm7 + mulps xmm7,xmm0 + ; maxps xmm7,[the_zero] + movaps .cnv,xmm7 + + mov edx,lights_aligned ; lights_aligned - global variable + xorps xmm1,xmm1 ; instead global can be used .lv - light vect. + .again_col: + movaps xmm3,.cnv + mulps xmm3,[edx] + haddps xmm3,xmm3 + haddps xmm3,xmm3 ; xmm3 - dot pr + + ; cmp [bump_flag],1 ; on/off temporaly + ; depend on bump button + ; je @f + ; stencil + movss xmm5,xmm2 + movss xmm6,xmm2 + addss xmm5,[aprox] + subss xmm6,[aprox] + cmpnltss xmm5,dword[ebx] + cmpnltss xmm6,dword[ebx] + xorps xmm5,xmm6 + movd eax,xmm5 + or eax,eax + jz .no_reflective + @@: + movaps xmm6,xmm3 ;xmm7 + mulps xmm6,xmm6 + mulps xmm6,xmm6 + mulps xmm6,xmm6 + mulps xmm6,xmm6 + mulps xmm6,xmm6 + mulps xmm6,[edx+48] + .no_reflective: + movaps xmm7,xmm3 + mulps xmm7,[edx+16] + addps xmm7,xmm6 + addps xmm7,[edx+32] + minps xmm7,[mask_255f] ; global + + maxps xmm1,xmm7 + add edx,64 ; size of one light in aligned list + cmp edx,lights_aligned_end + jl .again_col + cvtps2dq xmm1,xmm1 + movd xmm6,[edi] + packssdw xmm1,xmm1 + packuswb xmm1,xmm1 + paddusb xmm1,xmm6 + movd [edi],xmm1 + + + .skip: + add edi,4 + add ebx,4 ; stencil_buff + addps xmm0,.dn + addss xmm2,.dz + sub ecx,1 + jnz .ddraw + + .end_rp_line: + add esp,256 + pop ebp + +ret diff --git a/programs/demos/view3ds/3glass_tex.inc b/programs/demos/view3ds/3glass_tex.inc new file mode 100644 index 0000000000..549d80cf75 --- /dev/null +++ b/programs/demos/view3ds/3glass_tex.inc @@ -0,0 +1,762 @@ +; Bilinear filtering, real Phongs shading and glass like parallel. +; Thanks to authors of 3dica tutorial. +; Implemented in FASM by Maciej Guba. +; http://macgub.j.pl + +ROUND2 equ 10 + +glass_tex_tri: +;----Procedure render Phongs shaded triangle with z coord +;----interpolation ( Catmull alghoritm ), each pixel is - +;----covered by texture using bilinear filtering.-------- +;----I normalize normal vector in every pixel ----------- +;------------------in - eax - x1 shl 16 + y1 ------------ +;---------------------- ebx - x2 shl 16 + y2 ------------ +;---------------------- ecx - x3 shl 16 + y3 ------------ +;---------------------- esi - pointer to stencil buffer-- +;---------------------- filled with dd float variables- +;---------------------- edi - pointer to screen buffer--- +;---------------------- edx - pointer to texture--------- +;---------------------- xmm0 - 1st normal vector -------- +;---------------------- xmm1 - 2cond normal vector ------ +;---------------------- xmm2 - 3rd normal vector -------- +;---------------------- xmm3 - normalized light vector -- +;---------------------- xmm4 - lo -> hi z1, z2, z3 coords +;---------------------- as dwords floats --------------- +;---------------------- xmm5 - lo -> hi y_min, y_max, --- +;---------------------- x_min, x_max as dword integers - +;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, -- +;---------------------- ty2, tx3, ty3 as word, xres as-- +;---------------------- dword integers------------------ +;---------------------- stack - no parameters ----------- +;-------------------------------------------------------- +;----------------- procedure don't save registers !! ---- + + + + + push ebp + mov ebp,esp + sub esp,512 + sub ebp,16 + and ebp,0xfffffff0 + + .1_nv equ [ebp-16] + .2_nv equ [ebp-32] + .3_nv equ [ebp-48] + .l_v equ [ebp-64] + .z3 equ [ebp-72] + .z2 equ [ebp-76] + .z1 equ [ebp-80] + .x1 equ [ebp-82] + .y1 equ [ebp-84] + .x2 equ [ebp-86] + .y2 equ [ebp-88] + .x3 equ [ebp-90] + .y3 equ [ebp-92] + .Zbuf equ [ebp-96] + .x_max equ [ebp-100] + .x_min equ [ebp-104] + .y_max equ [ebp-108] + .y_min equ [ebp-112] + .screen equ [ebp-116] + .dx12 equ [ebp-120] + .dx13 equ [ebp-124] + .dx23 equ [ebp-128] + .dn12 equ [ebp-144] + .dn13 equ [ebp-160] + .dn23 equ [ebp-176] + + .cnv1 equ [ebp-192] ; cur normal vectors + .cnv2 equ [ebp-208] + .x_res equ [ebp-212] + .ty3 equ [ebp-214] + .tx3 equ [ebp-216] + .ty2 equ [ebp-218] + .tx2 equ [ebp-220] + .ty1 equ [ebp-222] + .tx1 equ [ebp-224] + .dz12 equ [ebp-232] + .dty12 equ [ebp-236] + .dtx12 equ [ebp-240] + .dz13 equ [ebp-248] + .dty13 equ [ebp-252] + .dtx13 equ [ebp-256] + .dz23 equ [ebp-264] + .dty23 equ [ebp-268] + .dtx23 equ [ebp-272] + .cz1 equ [ebp-280] + .cty1 equ [ebp-284] + .ctx1 equ [ebp-288] + .cz2 equ [ebp-296] + .cty2 equ [ebp-300] + .ctx2 equ [ebp-304] + .tx_ptr equ [ebp-308] + + + emms + ; movd .x_res,xmm7 + .sort3: ; sort triangle coordinates... + cmp ax,bx + jle .sort1 + xchg eax,ebx + shufps xmm4,xmm4,11100001b + shufps xmm6,xmm6,11100001b + movaps xmm7,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm7 + + + .sort1: + cmp bx,cx + jle .sort2 + xchg ebx,ecx + shufps xmm4,xmm4,11011000b + shufps xmm6,xmm6,11011000b + movaps xmm7,xmm1 + movaps xmm1,xmm2 + movaps xmm2,xmm7 + + jmp .sort3 + + .sort2: + ; movq .tx1,xmm6 + ; pshufd xmm6,xmm6,01001110b + ; movd .tx3,xmm6 + movaps .tx1,xmm6 + movaps .z1,xmm4 + mov .y1,eax + mov .y2,ebx + mov .y3,ecx + + movdqa .y_min,xmm5 +if 1 ; check if at last only fragment + packssdw xmm5,xmm5 ; of triangle is in visable area + pshuflw xmm5,xmm5,11011000b + movdqu xmm7,.y3 + movdqa xmm6,xmm5 + pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min + pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max + movdqa xmm4,xmm7 + pcmpgtw xmm7,xmm5 + pcmpgtw xmm4,xmm6 + pxor xmm7,xmm4 + pmovmskb eax,xmm7 + and eax,0x00aaaaaa + or eax,eax + jz .rpt_loop2_end +end if + movaps .1_nv,xmm0 + movaps .2_nv,xmm1 + movaps .3_nv,xmm2 + movaps .l_v,xmm3 + mov .Zbuf,esi + mov .screen,edi + mov .tx_ptr,edx + + + + mov bx,.y2 ; calc deltas + sub bx,.y1 + jnz .rpt_dx12_make + + xorps xmm7,xmm7 + mov dword .dx12,0 + movaps .dtx12,xmm7 + movaps .dn12,xmm7 + jmp .rpt_dx12_done + + .rpt_dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx12,eax + + cvtsi2ss xmm6,ebx + shufps xmm6,xmm6,0 + movss xmm5,.z2 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz12,xmm5 + + movd xmm0,.tx1 + movd xmm2,.tx2 + pxor xmm1,xmm1 + punpcklwd xmm0,xmm1 + punpcklwd xmm2,xmm1 + psubd xmm2,xmm0 + ; cvtdq2ps xmm0,xmm0 + cvtdq2ps xmm2,xmm2 +; movlps .ctx1,xmm0 +; movlps .ctx2,xmm2 + ; subps xmm2,xmm0 + divps xmm2,xmm6 + movlps .dtx12,xmm2 + + movaps xmm0,.2_nv + subps xmm0,.1_nv + divps xmm0,xmm6 + movaps .dn12,xmm0 + + + .rpt_dx12_done: + + mov bx,.y3 ; calc deltas + sub bx,.y1 + jnz .rpt_dx13_make + + xorps xmm7,xmm7 + mov dword .dx13,0 + movaps .dtx13,xmm7 + movaps .dn13,xmm7 + jmp .rpt_dx13_done + + .rpt_dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx13,eax + + + cvtsi2ss xmm6,ebx + shufps xmm6,xmm6,0 + + movss xmm5,.z3 + subss xmm5,.z1 + divss xmm5,xmm6 + movss .dz13,xmm5 + + movd xmm0,.tx1 + movd xmm2,.tx3 + pxor xmm1,xmm1 + punpcklwd xmm0,xmm1 + punpcklwd xmm2,xmm1 + psubd xmm2,xmm0 + ; cvtdq2ps xmm0,xmm0 + cvtdq2ps xmm2,xmm2 + ; subps xmm2,xmm0 + divps xmm2,xmm6 + movlps .dtx13,xmm2 + + + + movaps xmm0,.3_nv + subps xmm0,.1_nv + divps xmm0,xmm6 + movaps .dn13,xmm0 + + .rpt_dx13_done: + + mov bx,.y3 ; calc deltas + sub bx,.y2 + jnz .rpt_dx23_make + + xorps xmm7,xmm7 + mov dword .dx23,0 + movaps .dtx23,xmm7 + movaps .dn23,xmm7 + jmp .rpt_dx23_done + + .rpt_dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx23,eax + + cvtsi2ss xmm6,ebx + shufps xmm6,xmm6,0 + movss xmm5,.z3 + subss xmm5,.z2 + divss xmm5,xmm6 + movss .dz23,xmm5 + + movd xmm0,.tx2 + movd xmm2,.tx3 + pxor xmm1,xmm1 + punpcklwd xmm0,xmm1 + punpcklwd xmm2,xmm1 + psubd xmm2,xmm0 + ; cvtdq2ps xmm0,xmm0 + cvtdq2ps xmm2,xmm2 +; movlps .ctx1,xmm0 +; movlps .ctx2,xmm2 + ; subps xmm2,xmm0 + divps xmm2,xmm6 + movlps .dtx23,xmm2 + + + + + movaps xmm0,.3_nv + subps xmm0,.2_nv + divps xmm0,xmm6 + movaps .dn23,xmm0 + + .rpt_dx23_done: + + movsx eax,word .x1 + shl eax,ROUND2 + mov ebx,eax + mov edx,.z1 + movd xmm1,.tx1 + pxor xmm2,xmm2 + punpcklwd xmm1,xmm2 + cvtdq2ps xmm1,xmm1 + + mov .cz1,edx + mov .cz2,edx + movaps xmm0,.1_nv + movlps .ctx1,xmm1 + movlps .ctx2,xmm1 + movaps .cnv1,xmm0 + movaps .cnv2,xmm0 + + ; mov edx,.dx13 + ; cmp edx,.dx12 + ; jg .second_cause + + movsx ecx,word .y1 + cmp cx,.y2 + + jge .rpt_loop1_end + + .rpt_loop1: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + ; movlps xmm3,.cz1 ; cz1, cz2 both + movaps xmm3,.ctx1 + movaps xmm5,.ctx2 + movaps xmm4,.l_v + movd xmm6,.x_res + sar ebx,ROUND2 + sar eax,ROUND2 + mov edx,.tx_ptr + mov edi,.screen + + mov esi,.Zbuf + + call glass_tex_line + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + ; movss xmm2,.cz1 + ; movss xmm3,.cz2 + movaps xmm2,.ctx1 + movaps xmm3,.ctx2 + addps xmm0,.dn13 + addps xmm1,.dn12 + addps xmm2,.dtx13 + addps xmm3,.dtx12 + add eax,.dx13 + add ebx,.dx12 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + ; movss .cz1,xmm2 + ; movss .cz2,xmm3 + movaps .ctx1,xmm2 + movaps .ctx2,xmm3 + add ecx,1 + cmp cx,.y2 + jl .rpt_loop1 + + + ; jmp .rpt_loop2_end + + + .rpt_loop1_end: + movsx ecx,word .y2 + cmp cx,.y3 + jge .rpt_loop2_end + + movsx ebx,word .x2 ; eax - cur x1 + shl ebx,ROUND2 ; ebx - cur x2 + push dword .z2 + pop dword .cz2 + movd xmm1,.tx2 + pxor xmm2,xmm2 + punpcklwd xmm1,xmm2 + cvtdq2ps xmm1,xmm1 + movlps .ctx2,xmm1 + movaps xmm0,.2_nv + movaps .cnv2,xmm0 + + + .rpt_loop2: + pushad + + movaps xmm2,.y_min + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + movaps xmm3,.ctx1 + movaps xmm5,.ctx2 + movaps xmm4,.l_v + sar ebx,ROUND2 + sar eax,ROUND2 + mov edx,.tx_ptr + mov edi,.screen + mov esi,.Zbuf + movd xmm6,.x_res + call glass_tex_line + + popad + movaps xmm0,.cnv1 + movaps xmm1,.cnv2 + ; movss xmm2,.cz1 + ; movss xmm3,.cz2 + movaps xmm2,.ctx1 + movaps xmm3,.ctx2 + addps xmm0,.dn13 + addps xmm1,.dn23 + ; addss xmm2,.dz13 + ; addss xmm3,.dz23 + addps xmm2,.dtx13 + addps xmm3,.dtx23 + + add eax,.dx13 + add ebx,.dx23 + + movaps .cnv1,xmm0 + movaps .cnv2,xmm1 + movaps .ctx1,xmm2 + movaps .ctx2,xmm3 + + ; movss .cz1,xmm2 + ; movss .cz2,xmm3 + + add ecx,1 + cmp cx,.y3 + jl .rpt_loop2 + + .second_cause: ;dx13 > dx12 + + .rpt_loop2_end: + + add esp,512 + pop ebp + +ret +align 16 +glass_tex_line: +; in: +; xmm0 - normal vector 1 +; xmm1 - normal vect 2 +; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float +; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float +; xmm2 - lo -> hi y_min, y_max, x_min, x_max +; as dword integers +; xmm4 - normalized light vector +; eax - x1 +; ebx - x2 +; ecx - y +; edi - screen buffer +; esi - stencil buffer filled with dd floats +; edx - texture pointer (handle) +; xmm6 - lowest dword x_res as integer + + push ebp + mov ebp,esp + sub esp,350 + sub ebp,16 + and ebp,0xfffffff0 + + .n1 equ [ebp-16] + .n2 equ [ebp-32] + .lv equ [ebp-48] + .lx1 equ [ebp-52] + .lx2 equ [ebp-56] +; .z2 equ [ebp-60] +; .z1 equ [ebp-64] + .screen equ [ebp-68] + .zbuff equ [ebp-72] + .x_max equ [ebp-74] + .x_min equ [ebp-76] + .y_max equ [ebp-78] + .y_min equ [ebp-80] + .dn equ [ebp-96] + .x_res equ [ebp-100] + .y equ [ebp-104] + .cnv equ [ebp-128] + .z1 equ [ebp-136] + .ty1 equ [ebp-140] + .tx1 equ [ebp-144] + .z2 equ [ebp-152] + .ty2 equ [ebp-156] + .tx2 equ [ebp-160] + .cz equ [ebp-168] + .cty equ [ebp-172] + .ctx equ [ebp-176] + .dz equ [ebp-184] + .dty equ [ebp-188] + .dtx equ [ebp-192] + .yd equ [ebp-196] + .xd equ [ebp-200] + .yf equ [ebp-204] + .xf equ [ebp-208] + .w4 equ [ebp-212] + .w3 equ [ebp-216] + .w2 equ [ebp-220] + .w1 equ [ebp-224] + .p4 equ [ebp-228] + .p3 equ [ebp-232] + .p2 equ [ebp-236] + .p1 equ [ebp-240] + + + .tx_ptr equ [ebp-244] + + ; movaps xmm7,xmm3 + ; movaps xmm3,xmm5 + ; movaps xmm5,xmm7 + + + mov .y,ecx + packssdw xmm2,xmm2 + ; movaps xmm7,xmm2 + ; movhps xmm2,[the_zero] + ; pshuflw xmm2,xmm2,11111000b + ; pshufd xmm2,xmm2,11111100b + ; movlps xmm7,[the_zero] + ; pshufhw xmm7,xmm7,11111111b + ; movlps xmm7,[the_zero] + ; psrldq xmm7,4 + ; por xmm2,xmm7 + movq .y_min,xmm2 + cmp cx,.y_min + jl .end_line + cmp cx,.y_max + jge .end_line ; + + cmp eax,ebx + je .end_line + jl @f + xchg eax,ebx + movaps xmm7,xmm0 + movaps xmm0,xmm1 + movaps xmm1,xmm7 + movaps xmm7,xmm3 + movaps xmm3,xmm5 + movaps xmm5,xmm7 + @@: + + cmp ax,.x_max + jge .end_line + cmp bx,.x_min + jle .end_line + movaps .lv,xmm4 + movaps .n1,xmm0 + movaps .n2,xmm1 + mov .lx1,eax + mov .lx2,ebx + movaps .tx1,xmm3 + movaps .tx2,xmm5 + movd .x_res,xmm6 + mov .tx_ptr,edx + sub ebx,eax + cvtsi2ss xmm7,ebx + shufps xmm7,xmm7,0 + subps xmm1,xmm0 + divps xmm1,xmm7 + movaps .dn,xmm1 + subps xmm5,xmm3 + divps xmm5,xmm7 + movaps .dtx,xmm5 + + + + mov ebx,.lx1 + cmp bx,.x_min ; clipping on function4 + jge @f + movzx eax,word .x_min + sub eax,ebx + cvtsi2ss xmm7,eax + shufps xmm7,xmm7,0 + mulps xmm5,xmm7 + mulps xmm1,xmm7 + addps xmm5,.tx1 + addps xmm1,.n1 + movsx eax,word .x_min + movaps .tx1,xmm5 + movaps .n1,xmm1 + mov dword .lx1,eax + + @@: + movzx eax,word .x_max + cmp .lx2,eax + jl @f + mov .lx2,eax + @@: + mov eax,.x_res + mul dword .y + add eax,.lx1 + shl eax,2 + add edi,eax + add esi,eax + + mov ecx,.lx2 + sub ecx,.lx1 + ; movaps xmm0,.n1 + movaps xmm2,.tx1 + ; xorps xmm1,xmm1 +align 16 + .ddraw: + ; movhlps xmm7,xmm2 + ; cmpnltss xmm7,dword[esi] + ; movd eax,xmm7 + ; or eax,eax + ; jnz .skip + xorps xmm5,xmm5 + ; movhlps xmm7,xmm2 + ; movss [esi],xmm7 + movaps xmm7,.n1 ;xmm0 + mulps xmm7,xmm7 ; normalize + haddps xmm7,xmm7 + haddps xmm7,xmm7 + rsqrtps xmm7,xmm7 + mulps xmm7,.n1 ;xmm0 + ; andps xmm7,[abs_z_coof] + movaps .cnv,xmm7 + + movaps xmm6,xmm2 + minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2 + cvttps2dq xmm7,xmm6 + cvtdq2ps xmm4,xmm7 + subps xmm6,xmm4 + movlps .xf,xmm6 + ; movaps xmm5,.lv + mov eax,lights_aligned ; global + align 16 + .again_col: + movaps xmm0,[eax] ; calc multple lights + mulps xmm0,.cnv ;.lv ; last dword should be zeroed + haddps xmm0,xmm0 + haddps xmm0,xmm0 + ; andps xmm0,[abs_val] ;calc absolute value +if 1 + ; stencil + movhlps xmm6,xmm2 + movhlps xmm4,xmm2 + addss xmm6,[aprox] + subss xmm4,[aprox] + cmpnltss xmm6,dword[esi] + cmpnltss xmm4,dword[esi] + xorps xmm6,xmm4 + xorps xmm4,xmm4 + movd ebx,xmm6 + cmp ebx,-1 + jne .no_reflective +end if + movaps xmm4,xmm0 + mulps xmm4,xmm4 + mulps xmm4,xmm4 + mulps xmm4,xmm4 + mulps xmm4,xmm4 + mulps xmm4,[eax+48] + + .no_reflective: + maxps xmm0,[the_zero] + ; movaps xmm1,xmm0 + mulps xmm0,[eax+16] + addps xmm4,xmm0 + addps xmm4,[eax+32] + maxps xmm5,xmm4 + add eax,64 + cmp eax,lights_aligned_end + jnz .again_col + minps xmm5,[mask_255f] + + ; texture coords work + movd eax,xmm7 + psrldq xmm7,4 + movd ebx,xmm7 + shl ebx,TEX_SHIFT + add eax,ebx + lea eax,[eax*3] + add eax,.tx_ptr + mov ebx,eax + add ebx,TEX_X*3 + movd xmm7,[eax] + movd xmm6,[eax+3] + movd xmm4,[ebx] + movd xmm3,[ebx+3] + punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2 + punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4 + punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ... + movdqa xmm6,xmm7 + movdqa xmm4,xmm7 + psrldq xmm6,4 + psrldq xmm4,8 + + punpcklbw xmm7,[the_zero] ; broadcasted 0 + punpcklbw xmm6,[the_zero] + punpcklbw xmm4,[the_zero] + punpcklwd xmm7,[the_zero] + punpcklwd xmm6,[the_zero] + punpcklwd xmm4,[the_zero] + + + ; calc w ......... + movlps xmm3,[the_one] ; broadcasted dword 1.0 + cvtdq2ps xmm7,xmm7 + subps xmm3,.xf + cvtdq2ps xmm6,xmm6 + movhps xmm3,.xf + cvtdq2ps xmm4,xmm4 + movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf + shufps xmm3,xmm3,10001000b + shufps xmm1,xmm1,11110101b + mulps xmm3,xmm1 + + mulps xmm7,xmm3 + mulps xmm6,xmm3 + mulps xmm4,xmm3 + haddps xmm7,xmm7 ; r + haddps xmm6,xmm6 ; g + haddps xmm4,xmm4 ; b + haddps xmm7,xmm7 ; r + haddps xmm6,xmm6 ; g + haddps xmm4,xmm4 ; b + movlhps xmm7,xmm6 + shufps xmm7,xmm7,11101000b + movlhps xmm7,xmm4 + + mulps xmm5,xmm7 + cvtps2dq xmm5,xmm5 + psrld xmm5,8 + movd xmm6,[edi] + packssdw xmm5,xmm5 + packuswb xmm5,xmm5 + paddusb xmm5,xmm6 + movd [edi],xmm5 + .skip: + add edi,4 + add esi,4 + ; addps xmm0,.dn + movaps xmm0,.n1 ; cur normal + addps xmm0,.dn + addps xmm2,.dtx + movaps .n1,xmm0 + sub ecx,1 + jnz .ddraw + + .end_line: + add esp,350 + pop ebp + +ret diff --git a/programs/demos/3DS/3R_PHG.INC b/programs/demos/view3ds/3r_phg.inc similarity index 100% rename from programs/demos/3DS/3R_PHG.INC rename to programs/demos/view3ds/3r_phg.inc diff --git a/programs/demos/3DS/3STENCIL.INC b/programs/demos/view3ds/3stencil.inc similarity index 94% rename from programs/demos/3DS/3STENCIL.INC rename to programs/demos/view3ds/3stencil.inc index b9e3ab55a7..bea4b6fd12 100644 --- a/programs/demos/3DS/3STENCIL.INC +++ b/programs/demos/view3ds/3stencil.inc @@ -312,11 +312,9 @@ stencil_line: sub ecx,.x1 movss xmm2,.z1 ; cz .ccalc: - movss xmm1,xmm2 - cmpltss xmm1,dword[esi] - movd eax,xmm1 - cmp eax,-1 - jnz @f + ; movss xmm1,xmm2 + comiss xmm2,dword[esi] + jg @f movss dword[esi],xmm2 @@: add esi,4 diff --git a/programs/demos/view3ds/Tupfile.lua b/programs/demos/view3ds/Tupfile.lua new file mode 100644 index 0000000000..34758aa765 --- /dev/null +++ b/programs/demos/view3ds/Tupfile.lua @@ -0,0 +1,2 @@ +if tup.getconfig("NO_FASM") ~= "" then return end +tup.rule("view3ds.asm", "fasm %f %o " .. tup.getconfig("KPACK_CMD"), "view3ds") diff --git a/programs/demos/3DS/A_PROCS.INC b/programs/demos/view3ds/a_procs.inc similarity index 77% rename from programs/demos/3DS/A_PROCS.INC rename to programs/demos/view3ds/a_procs.inc index 466c1c9331..421feebfda 100644 --- a/programs/demos/3DS/A_PROCS.INC +++ b/programs/demos/view3ds/a_procs.inc @@ -19,53 +19,21 @@ do_sinus: cld rep stosd pop edi -; movzx eax,[sinus_flag] -; mov edx,10 -; mul edx -; mov [sin_amplitude],eax -; mov [sin_frq],eax fninit -;if Ext = SSE2 -; movups xmm1,[const0123] ; xmm1 - init values -; mov eax,0x000000ff -; movd xmm2,eax -; shufps xmm2,xmm2,0 ; xmm2 - mask value -; mov eax,4 -; movd xmm3,eax -; shufps xmm3,xmm3,0 .again: -if 0 - fild dword .x - fidiv [sin_frq] - fsin - fimul [sin_amplitude] - fiadd dword .y - fistp dword .new_y -else fild dword .x fmul [sin_frq] fistp dword .temp mov eax, .temp -; mov bx, [angle_x] -; add bx, [angle_y] -; movzx ebx,bx -; shr ebx,1 ; change phase -; add eax,ebx - and eax, 0x000000ff -; cdq - ; mul [sin_frq] -; and eax,0x000000ff -; and ax,0x00ff -; cwde fld dword [sin_tab+eax*4] fimul dword [sin_amplitude] fiadd dword .y fistp dword .new_y -end if + mov eax,.new_y or eax,eax jl .skip @@ -276,109 +244,7 @@ end if ret -;align 16 -; emboss_bias: -; dw 128, 128, 128, 128, 128, 128, 128, 128 -if 0 ; old emb proc - - ; emboss - after drawing all, - ; transfer screen buffer into bump map - ; and draw two bump triangles - ; ************************************* - mov esi,screen - mov edi,bumpmap2 - mov ecx,TEXTURE_SIZE/3 - cld -if Ext=NON - xor eax,eax - xor bh,bh - xor dh,dh - @@: - lodsb - movzx bx,al - lodsb - movzx dx,al - lodsb - add ax,bx - add ax,dx - ; cwd - ; div [i3] - ;; push ax - ;; pop bx - ;; shr bx,3 - ;; shr ax,2 - ;; add ax,bx - - lea eax,[eax*5] - shr ax,4 - - stosb - loop @b -else - emms - pxor mm1,mm1 - mov ebx,0x0000ffff - @@: - movd mm0,[esi] - punpcklbw mm0,mm1 - movq mm2,mm0 - psrlq mm2,16 - movq mm3,mm0 - psrlq mm3,32 - paddw mm0,mm2 - paddw mm0,mm3 - - - movd eax,mm0 - and eax,ebx - lea eax,[eax*5] - shr ax,4 - stosb - add esi,3 - loop @b - -end if - push ebp - - push dword 0 ; env coords - push word 0 - push word SIZE_X - push word SIZE_Y - push dword 0 - push dword 0 ; bump coords - push word SIZE_X - push word SIZE_Y - push word 0 - mov eax,SIZE_Y - mov ebx,SIZE_X*65536+0 - xor ecx,ecx - mov edx,bumpmap2 - mov esi,envmap - mov edi,screen - call bump_triangle - - push dword SIZE_X shl 16 + SIZE_Y ; env coords - push word 0 - push word SIZE_X - push word SIZE_Y - push word 0 - push dword SIZE_X shl 16 + SIZE_Y ; bump coords - push word 0 - push word SIZE_X - push word SIZE_Y - push word 0 - mov eax,SIZE_Y - mov ebx,SIZE_X * 65536+0 - mov ecx,SIZE_X shl 16 + SIZE_Y - mov edx,bumpmap2 - mov esi,envmap - mov edi,screen - call bump_triangle - - pop ebp -ret -end if ;********************************EMBOSS DONE******************************* diff --git a/programs/demos/3DS/ASC.INC b/programs/demos/view3ds/asc.inc similarity index 100% rename from programs/demos/3DS/ASC.INC rename to programs/demos/view3ds/asc.inc diff --git a/programs/demos/3DS/asc_objects/2TORUS.ASC b/programs/demos/view3ds/asc_objects/2TORUS.ASC similarity index 100% rename from programs/demos/3DS/asc_objects/2TORUS.ASC rename to programs/demos/view3ds/asc_objects/2TORUS.ASC diff --git a/programs/demos/3DS/asc_objects/3TORUS.ASC b/programs/demos/view3ds/asc_objects/3TORUS.ASC similarity index 100% rename from programs/demos/3DS/asc_objects/3TORUS.ASC rename to programs/demos/view3ds/asc_objects/3TORUS.ASC diff --git a/programs/demos/3DS/asc_objects/AR2.ASC b/programs/demos/view3ds/asc_objects/AR2.ASC similarity index 100% rename from programs/demos/3DS/asc_objects/AR2.ASC rename to programs/demos/view3ds/asc_objects/AR2.ASC diff --git a/programs/demos/3DS/asc_objects/ARCEE.ASC b/programs/demos/view3ds/asc_objects/ARCEE.ASC similarity index 100% rename from programs/demos/3DS/asc_objects/ARCEE.ASC rename to programs/demos/view3ds/asc_objects/ARCEE.ASC diff --git a/programs/demos/3DS/asc_objects/PALLERO.ASC b/programs/demos/view3ds/asc_objects/PALLERO.ASC similarity index 100% rename from programs/demos/3DS/asc_objects/PALLERO.ASC rename to programs/demos/view3ds/asc_objects/PALLERO.ASC diff --git a/programs/demos/3DS/asc_objects/TOORUS.ASC b/programs/demos/view3ds/asc_objects/TOORUS.ASC similarity index 100% rename from programs/demos/3DS/asc_objects/TOORUS.ASC rename to programs/demos/view3ds/asc_objects/TOORUS.ASC diff --git a/programs/demos/3DS/asc_objects/box.asc b/programs/demos/view3ds/asc_objects/box.asc similarity index 100% rename from programs/demos/3DS/asc_objects/box.asc rename to programs/demos/view3ds/asc_objects/box.asc diff --git a/programs/demos/3DS/B_PROCS.INC b/programs/demos/view3ds/b_procs.inc similarity index 92% rename from programs/demos/3DS/B_PROCS.INC rename to programs/demos/view3ds/b_procs.inc index 8ed566eea1..9bd613cae0 100644 --- a/programs/demos/3DS/B_PROCS.INC +++ b/programs/demos/view3ds/b_procs.inc @@ -60,24 +60,25 @@ calc_one_col: ; stack - other parameters ; out - eax - 0x00rrggbb .dot_prd equ dword[ebp+4] ; dot product - cos x - not now -.min_col_r equ word[ebp+8] ; minimum color - ambient +.min_col_r equ [ebp+8] ; minimum color - ambient .min_col_g equ word[ebp+10] .min_col_b equ word[ebp+12] -.max_col_r equ word[ebp+14] ; maximum color - specular +.max_col_r equ [ebp+14] ; maximum color - specular .max_col_g equ word[ebp+16] .max_col_b equ word[ebp+18] -.org_col_r equ word[ebp+20] ; orginal color - diffuse +.org_col_r equ [ebp+20] ; orginal color - diffuse .org_col_g equ word[ebp+22] .org_col_b equ word[ebp+24] .n equ word[ebp+26] ; shines - not implemented .temp equ word[ebp-2] -.color_sum_r equ dword[ebp-6] -.color_sum_g equ dword[ebp-10] -.color_sum_b equ dword[ebp-14] +.color_sum_r equ [ebp-6] +.color_sum_g equ [ebp-10] +.color_sum_b equ [ebp-14] ; color = ambient+cos(x)*diffuse+(cos(x)^n)*specular mov ebp,esp sub esp,14 + mov ax,.min_col_r add ax,.max_col_r add ax,.org_col_r @@ -95,8 +96,38 @@ calc_one_col: add ax,.org_col_b cwde mov .color_sum_b,eax +if 0 + movq xmm0,.min_col_r + movq xmm1,.max_col_r + movq xmm2,.org_col_r + packuswb xmm0,[the_zero] + packuswb xmm1,[the_zero] + packuswb xmm2,[the_zero] + punpcklbw xmm0,xmm1 + punpcklbw xmm2,[the_zero] + packusdw xmm2,[the_zero] + cvtdq2ps xmm0,xmm0 + cvtdq2ps xmm1,xmm1 + cvtdq2ps xmm2,xmm2 + haddps xmm0,xmm0 + haddps xmm1,xmm1 + haddps xmm2,xmm2 + haddps xmm0,xmm0 + haddps xmm1,xmm1 + haddps xmm2,xmm2 + + cvtss2si eax,xmm0 + cvtss2si ebx,xmm1 + cvtss2si ecx,xmm2 + mov .color_sum_r,eax + mov .color_sum_g,ebx + mov .color_sum_b,ecx + + +end if + ; fld .dot_prd ; fild .n ; fxch st1 @@ -123,7 +154,7 @@ calc_one_col: faddp ; st0=first piece of col, st1=dot_pr^n.. fiadd .min_col_b fimul .max_col_b - fidiv .color_sum_b + fidiv dword .color_sum_b fistp .temp movzx eax,.temp shl eax,16 @@ -135,19 +166,19 @@ calc_one_col: faddp fiadd .min_col_g fimul .max_col_g - fidiv .color_sum_g + fidiv dword .color_sum_g fistp .temp mov ax,.temp mov ah,al shl eax,8 - fimul .max_col_r - fild .org_col_r + fimul word .max_col_r + fild word .org_col_r fmulp st2,st faddp - fiadd .min_col_r - fimul .max_col_r - fidiv .color_sum_r + fiadd word .min_col_r + fimul word .max_col_r + fidiv dword .color_sum_r fistp .temp mov ax,.temp ;eax - 0xbbgg00rr ; mov ah,al diff --git a/programs/demos/3DS/BUMP_CAT.INC b/programs/demos/view3ds/bump_cat.inc similarity index 100% rename from programs/demos/3DS/BUMP_CAT.INC rename to programs/demos/view3ds/bump_cat.inc diff --git a/programs/demos/3DS/BUMP_TEX.INC b/programs/demos/view3ds/bump_tex.inc similarity index 100% rename from programs/demos/3DS/BUMP_TEX.INC rename to programs/demos/view3ds/bump_tex.inc diff --git a/programs/demos/3DS/DATA.INC b/programs/demos/view3ds/data.inc similarity index 89% rename from programs/demos/3DS/DATA.INC rename to programs/demos/view3ds/data.inc index 145c6cb43c..2a93693e76 100644 --- a/programs/demos/3DS/DATA.INC +++ b/programs/demos/view3ds/data.inc @@ -1,4 +1,4 @@ - ; DATA AREA ************************************ + ; DATA AREA ************************************ i3 dw 3 i12 dd 12 @@ -7,7 +7,6 @@ dot_max dd 1.0 ; dot product max and min dot_min dd 0.0 env_const dd 1.05 - correct_tex dw 255 tex_x_div2 dw TEX_X / 2 tex_y_div2 dw TEX_Y / 2 xobs dw 0 ;SIZE_X / 2 ;200 ;observer = camera @@ -26,6 +25,12 @@ vect_x dw SIZE_X / 2 vect_y dw SIZE_Y / 2 vect_z dw 0 + size_y_var: + yres_var dw SIZE_Y + + size_x_var: + xres_var dw SIZE_X + angle_x dw 0 angle_y dw 0 angle_z dw 0 @@ -52,8 +57,6 @@ screen_ptr dd 0 Zbuffer_ptr dd 0 vertices_index_ptr dd 0 - - vertex_edit_no dw 0 edit_start_x: dw 0 @@ -63,6 +66,7 @@ edit_end_y dw 0 mouse_state dd 0 + menu: db 2 ; button number = index db 'rotary ' ; label @@ -73,7 +77,7 @@ db 3 db 'shd. model' if Ext >= SSE3 - db 14 + max_dr_flg db 15 else db 12 end if @@ -267,6 +271,7 @@ flags: ; flags description db 'grdl' db 'rphg' db 'glas' + db 'ptex' spd_f: db 'idle' db 'full' @@ -338,24 +343,24 @@ base_vector: if Ext=SSE3 db ' (SSE3)' end if - db ' 0.071',0 + db ' 0.072',0 labellen: STRdata db '-1 ' all_lights_size dw lightsend-lights - file_info: - dd 0 - dd 0 - dd 0 - fsize dd 0 ;180000 ; sizeof(workarea) - fptr dd 0 ;workarea - file_name: - db '/rd/1/3d/house.3ds',0 - ; db '/tmp0/1/ant.3ds',0 - - rb 256 +; file_info: +; dd 0 +; dd 0 +; dd 0 +; fsize dd 0 ;180000 ; sizeof(workarea) +; fptr dd 0 ;workarea +; file_name: +; db '/rd/1/3d/house.3ds',0 +; ; db '/tmp0/1/ant.3ds',0 +; +; rb 256 ;============================================= lights: @@ -410,9 +415,29 @@ align 16 zero_hgst_dd: dd -1, -1, -1, 0 mask_255f: + correct_texf: times 4 dd 255.0 the_zero: times 4 dd 0.0 + tex_m2: + times 4 dd 510.0 + the_one: + times 4 dd 1.0 + aprox dd 0.0001 + + + file_info: + dd 0 + dd 0 + dd 0 + fsize dd 0 ;180000 ; sizeof(workarea) + fptr dd 0 ;workarea + file_name: + db '/rd/1/3d/house.3ds',0 + ; db '/tmp0/1/ant.3ds',0 + + rb 256 + I_END: SourceFile: @@ -444,11 +469,11 @@ align 8 triangles_count_var dd ? points_count_var dd ? - size_y_var: - yres_var dw ? +; size_y_var: +; yres_var dw ? - size_x_var: - xres_var dw ? +; size_x_var: +; xres_var dw ? x_start: dw ? y_start: @@ -460,8 +485,8 @@ align 8 point_index2 dd ? ; } don't change order point_index3 dd ? ;-/ temp_col dw ? - temp1 dd ? - temp2 dd ? + temp1 dd ? ; > dont change + temp2 dd ? ; > order high dd ? rand_seed dw ? align 8 diff --git a/programs/demos/3DS/FLAT_CAT.INC b/programs/demos/view3ds/flat_cat.inc similarity index 100% rename from programs/demos/3DS/FLAT_CAT.INC rename to programs/demos/view3ds/flat_cat.inc diff --git a/programs/demos/3DS/GRD_CAT.INC b/programs/demos/view3ds/grd_cat.inc similarity index 100% rename from programs/demos/3DS/GRD_CAT.INC rename to programs/demos/view3ds/grd_cat.inc diff --git a/programs/demos/view3ds/grd_line.inc b/programs/demos/view3ds/grd_line.inc new file mode 100644 index 0000000000..d7b26055ed --- /dev/null +++ b/programs/demos/view3ds/grd_line.inc @@ -0,0 +1,643 @@ +;-procedure draws smooth shaded lines (I mean interpolation 24 bit-- +;-color), with z coord interpolation-------------------------------- +;-author: Maciej Guba (www.macgub.hekko.pl)------------------------- +;-in : ------------------------------------------------------------- +;----- edi - pointer to screen buffer ------------------------------ +;----- esi - pointer to Z buffer ----------------------------------- +;------ constans : SIZE_X, SIZE_Y - screen width and height--------- +;----------------- ROUND - fixed point shift------------------------ +;------ other parameters via stack---------------------------------- +smooth_line: +.x1 equ ebp+4 +.y1 equ ebp+6 +.z1 equ ebp+8 +.r1 equ ebp+10 +.g1 equ ebp+12 +.b1 equ ebp+14 +.x2 equ ebp+16 +.y2 equ ebp+18 +.z2 equ ebp+20 +.r2 equ ebp+22 +.g2 equ ebp+24 +.b2 equ ebp+26 + + +.line_lenght equ ebp-2 +.delta equ ebp-6 +.delta_x equ ebp-10 +.delta_y equ ebp-14 +.dr equ ebp-18 +.dg equ ebp-22 +.db equ ebp-26 +.dz equ ebp-30 +.cr equ ebp-34 +.cg equ ebp-38 +.cb equ ebp-42 +.cz equ ebp-46 + +;.line_lenght equ ebp-48 +.screen equ ebp-52 +.zbuffer equ ebp-56 +.ccoord equ ebp-60 ;current coordinate +.czbuf equ ebp-64 +.cscr equ ebp-68 +.xres equ ebp-72 +.yres equ ebp-76 +.xresm1 equ ebp-80 +.yresm1 equ ebp-84 +.xresp1 equ ebp-88 +.yresp1 equ ebp-92 +.xres3 equ ebp-96 +.xres4 equ ebp-100 + +macro .update_cur_var +{ +if Ext=NON + mov ebx,[.dz] + add [.cz],ebx + mov ebx,[.dr] + add [.cr],ebx + mov ebx,[.dg] + add [.cg],ebx + mov ebx,[.db] + add [.cb],ebx +elseif Ext=MMX + movq mm0,[.cz] + movq mm1,[.cg] + paddd mm0,mm2 ;[.dz] + paddd mm1,mm3 ;[.dg] + movq [.cz],mm0 + movq [.cg],mm1 +elseif Ext >= SSE2 +; movups xmm1,[.cz] + paddd xmm1,xmm0 +; movups [.cz],xmm1 +end if +} +macro .draw_pixel +{ + mov [esi],ebx ; actualize Z buffer +if Ext>=SSE2 + movaps xmm7,xmm1 ;[.cb] ;;xmm1 + shufps xmm7,xmm7,00111001b + psrld xmm7,ROUND + packssdw xmm7,xmm7 + packuswb xmm7,xmm7 + pand xmm7,xmm6 ;[.mask] + movd [edi],xmm7 +else + + mov eax,[.cb] + sar eax,ROUND + mov [edi],al +; and eax,0x000000ff ; clean unused bits + mov ebx,[.cg] + sar ebx,ROUND + mov [edi+1],bl +; mov ah,bl + mov edx,[.cr] + sar edx,ROUND + mov [edi+2],dl +end if +; shl ebx,16 +; or eax,ebx +; mov [edi],eax +} +macro .sort +{ + +if Ext >= MMX + movq mm0,[.x1] + movq mm1,[.x2] + movq [.x1],mm1 + movq [.x2],mm0 +else + mov edx,[.x1] + xchg edx,[.x2] + mov [.x1],edx + mov edx,[.z1] + xchg edx,[.z2] + mov [.z1],edx +end if + mov edx,[.g1] + xchg edx,[.g2] + mov [.g1],edx +} + + + + emms + mov ebp,esp + sub esp,128 + mov eax,[.x1] ; check if parameters exceedes screen area + mov ebx,[.x2] + or eax,ebx + test eax,80008000h + jne .end_line + movzx edx,word [size_x_var] + mov [.xres],edx + dec edx + movzx ecx,word [size_y_var] + mov [.yres],ecx + dec ecx + cmp word[.x1],dx ;SIZE_X + jg .end_line + cmp word[.x2],dx ;SIZE_X + jg .end_line + cmp word[.y1],cx ;SIZE_Y + jg .end_line + cmp word[.y2],cx ;SIZE_Y + jg .end_line + + mov edx,[.xres] + shl edx,2 + mov [.xres4],edx + shr edx,2 + lea edx,[edx*3] + mov [.xres3],edx + mov edx,[.xres] + mov ecx,[.yres] + dec edx + dec ecx + mov [.xresm1],edx + mov [.yresm1],ecx + add edx,2 + add ecx,2 + mov [.xresp1],edx + mov [.yresp1],ecx + + mov [.screen],edi + mov cx,[.x1] + cmp cx,[.x2] + je .vertical_l + mov cx,[.y1] + cmp cx,[.y2] + je .horizontal_l + mov ax,[.x1] + sub ax,[.x2] + cmp ax,0 + jg @f + neg ax ; calc absolute value + @@: + mov [.delta_x],ax + mov bx,[.y1] + sub bx,[.y2] + cmp bx,0 + jg @f + neg bx + @@: + mov [.delta_y],bx + cmp ax,bx + je .deg45_l + jl .more_vertical_l + jg .more_horizon_l + jmp .end_line + ; +.horizontal_l: + mov ax,[.x1] + mov bx,[.x2] + cmp bx,ax + jge @f + + .sort +@@: + + mov bx,[.x2] + sub bx,[.x1] + movsx ebx,bx + cmp ebx,0 ;line lenght equql 0 + je .end_line + mov [.delta_x],ebx + + call .calc_delta + + mov eax,[.xres] ;SIZE_X + movsx ebx,word[.y1] + mul ebx + add esi,eax + lea eax,[eax*3] + add esi,eax + add edi,eax + movsx eax,word[.x1] + add esi,eax + lea eax,[eax*3] + add edi,eax + add esi,eax + + mov ecx,[.delta_x] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx +if Ext >= SSE2 + movups xmm1,[.cz] +end if +.hdraw: +if Ext >= SSE2 + movd ebx,xmm1 +else + mov ebx,[.cz] +end if + cmp [esi],ebx + jle .skip + + .draw_pixel + +.skip: + add edi,3 + add esi,4 + + .update_cur_var + + loop .hdraw + jmp .end_line + +.vertical_l: + mov ax,[.y1] + cmp [.y2],ax + jge @f + + .sort +@@: + mov bx,[.y2] + sub bx,[.y1] + movsx ebx,bx + cmp ebx,0 + je .end_line + mov [.delta_y],ebx + + call .calc_delta + + mov eax,[.xres] ;SIZE_X + movsx ebx,word[.y1] + mul ebx + add esi,eax + lea eax,[eax*3] + add edi,eax + add esi,eax + movsx eax,word[.x1] + add esi,eax + lea eax,[eax*3] + add esi,eax + add edi,eax + + mov ecx,[.delta_y] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx +if Ext >= SSE2 + movups xmm1,[.cz] +end if + +.v_draw: +if Ext >= SSE2 + movd ebx,xmm1 +else + mov ebx,[.cz] +end if + cmp [esi],ebx + jle @f + + .draw_pixel + +@@: + add edi,[.xres3] + add esi,[.xres4] + + .update_cur_var + + loop .v_draw + jmp .end_line +.deg45_l: + mov word[.line_lenght],ax + mov ax,[.x1] + cmp [.x2],ax + jge @f + + .sort +@@: + mov bx,[.y2] + sub bx,[.y1] + movsx ebx,bx + cmp ebx,0 + je .end_line + mov [.delta_y],ebx + mov bx,[.x2] + sub bx,[.x1] + movsx ebx,bx + mov [.delta_x],ebx + + call .calc_delta + + mov eax,[.xres] + movsx ebx,word[.y1] ;calc begin values in screen and Z buffers + mul ebx + lea ebx,[3*eax] + add edi,ebx + shl eax,2 + add esi,eax + movsx eax,word[.x1] + lea ebx,[eax*3] + add edi,ebx + shl eax,2 + add esi,eax + + movzx ecx,word[.line_lenght] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx +.d45_draw: +if Ext >= SSE2 + movd ebx,xmm1 +else + mov ebx,[.cz] +end if + cmp [esi],ebx + jle @f + + .draw_pixel + +@@: + cmp dword[.delta_y],0 + jl @f + add edi,[.xres3] ;SIZE_X*3+3 + add edi,3 + add esi,[.xres4] ;SIZE_X*4+4 + add esi,4 + jmp .d45_1 +@@: + sub edi,[.xres3] ;(SIZE_X*3)-3 + sub edi,3 + sub esi,[.xres4] ;(SIZE_X*4)-4 + sub esi,4 +.d45_1: + .update_cur_var + + loop .d45_draw + jmp .end_line + +.more_vertical_l: + mov word[.line_lenght],bx + mov ax,[.y1] + cmp [.y2],ax + jge @f + .sort +@@: + mov bx,[.y2] + sub bx,[.y1] + movsx ebx,bx + cmp ebx,0 + je .end_line ;======================= + mov [.delta_y],ebx + + mov ax,[.x2] + sub ax,[.x1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.delta],eax + + call .calc_delta + + mov eax,[.xres] ;SIZE_X + movsx ebx,word[.y1] ;calc begin values in screen and Z buffers + mul ebx + lea ebx,[3*eax] + add esi,ebx + add esi,eax + add edi,ebx + mov [.cscr],edi + mov [.czbuf],esi + + movzx ecx,word[.line_lenght] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx +if Ext >= SSE2 + movups xmm1,[.cz] +end if + movsx ebx,word[.x1] + shl ebx,ROUND + mov [.ccoord],ebx ; .ccoord -> x coordinate +.draw_m_v: + mov edi,[.cscr] + mov esi,[.czbuf] + mov eax,[.ccoord] + sar eax,ROUND + lea ebx,[eax*3] + add edi,ebx + add esi,ebx + add esi,eax +if Ext >= SSE2 + movd ebx,xmm1 +else + mov ebx,[.cz] +end if + cmp [esi],ebx + jle @f + + .draw_pixel + +@@: + mov eax,[.delta] + mov ebx,[.xres3] + add [.ccoord],eax + mov eax,[.xres4] + add dword[.cscr],ebx ;SIZE_X*3 ; + add dword[.czbuf],eax ;SIZE_X*4 +.d_m_v1: + + .update_cur_var + + dec ecx + jnz .draw_m_v + jmp .end_line + +.more_horizon_l: + mov word[.line_lenght],ax + mov ax,[.x1] + cmp [.x2],ax + jge @f + + .sort +@@: + mov bx,[.x2] + sub bx,[.x1] + movsx ebx,bx + cmp ebx,0;======================= + je .end_line + mov [.delta_x],ebx + + mov ax,[.y2] + sub ax,[.y1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.delta],eax + + call .calc_delta + + ;calc begin values in screen and Z buffers + movsx ebx,word[.x1] + mov eax,ebx + add esi,ebx + lea ebx,[3*ebx] + add esi,ebx + add edi,ebx + mov [.cscr],edi + mov [.czbuf],esi + + movzx ecx,word[.line_lenght] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx +if Ext >= SSE2 + movups xmm1,[.cz] +end if + movsx ebx,word[.y1] + shl ebx,ROUND + mov [.ccoord],ebx ; .ccoord -> y coordinate + +.draw_m_h: + mov edi,[.cscr] + mov esi,[.czbuf] + mov eax,[.ccoord] ; ccoord - cur y coordinate + sar eax,ROUND + mov ebx,[.xres] ;SIZE_X + mul ebx + add esi,eax + lea eax,[eax*3] + add esi,eax + add edi,eax +if Ext >= SSE2 + movd ebx,xmm1 +else + mov ebx,[.cz] +end if + cmp [esi],ebx + jle @f + + .draw_pixel + +@@: + mov eax,[.delta] + add [.ccoord],eax + add dword[.cscr],3 ; + add dword[.czbuf],4 + + .update_cur_var + + dec ecx + jnz .draw_m_h + +.end_line: + mov esp,ebp + ret 24 + +.calc_delta: + mov ax,[.z2] + sub ax,[.z1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.dz],eax + + mov ax,[.r2] + sub ax,[.r1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.dr],eax + + mov ax,[.g2] + sub ax,[.g1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.dg],eax + + mov ax,[.b2] + sub ax,[.b1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.db],eax +if Ext=MMX | Ext = SSE + movq mm2,[.dz] + movq mm3,[.dg] +else if Ext >= SSE2 + movups xmm0,[.dz] + movups xmm6,[.mask] +end if +ret +.mask: + dq 0xffffffff00ffffff + dq 0xffffffffffffffff + + + + + + + + + + + + + diff --git a/programs/demos/3DS/GRD_TEX.INC b/programs/demos/view3ds/grd_tex.inc similarity index 100% rename from programs/demos/3DS/GRD_TEX.INC rename to programs/demos/view3ds/grd_tex.inc diff --git a/programs/demos/3DS/History.txt b/programs/demos/view3ds/history.txt similarity index 92% rename from programs/demos/3DS/History.txt rename to programs/demos/view3ds/history.txt index e3c10375a9..2c84fd9eaa 100644 --- a/programs/demos/3DS/History.txt +++ b/programs/demos/view3ds/history.txt @@ -1,6 +1,12 @@ +View3ds 0.071 - VIII 2020 +1. New displaying model - glass - it's two pass rendering. First pass calculates + Z position of all front pixels, second render image with adding reflective + component of light only for front pixels. Transparent effect by adding with saturation. +2. I removed bug with performing generation object after choosing 'emboss' option. +----------------------------------------------------------------------------------- + View3ds 0.070 - VII 2020 - 1. Some keys support by Leency. 2. New displaying model - real Phong - real not fake normal vector interpolation, normalising it and calculating dot product (one for each light). diff --git a/programs/demos/3DS/README.TXT b/programs/demos/view3ds/readme.txt similarity index 74% rename from programs/demos/3DS/README.TXT rename to programs/demos/view3ds/readme.txt index 6dc8273faf..4978561dec 100644 --- a/programs/demos/3DS/README.TXT +++ b/programs/demos/view3ds/readme.txt @@ -1,12 +1,14 @@ -View3ds 0.071 - tiny viewer to .3ds and .asc files with several graphics +View3ds 0.072 - tiny viewer to .3ds and .asc files with several graphics effects implementation. - What's new? -1. New displaying model - glass - it's two pass rendering. First pass calculates - Z position of all front pixels, second render image with adding reflective - component of light only for front pixels. Transparent effect by adding with saturation. -2. I removed bug with performing generation object after choosing 'emboss' option. +1. New displaying model - texturing with bilinear filtering and transparency + simultanusly. Note that filtering is done only inside polygon. To better + quality of image there is a need to use floats coordinates of texture to pass + as arguments to single triangle rendering proc. +2. Optimizations. +3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and + transparented texturing with filtering rendering models are disabled. Buttons description: 1. rotary: choosing rotary axle: x, y, x+y. @@ -15,7 +17,8 @@ Buttons description: pos (position shading depend), dots (app draws only points - nodes of object), txgrd (texture mapping + smooth shading), 2tex (texture mapping + spherical environment mapping), bmap (bump + texture mapping), cenv (cubic environment - mapping), grdl (Gouraud lines - edges only), rphg (real Phong). + mapping), grdl (Gouraud lines - edges only), rphg (real Phong), glas (glass effect), + ptex (real Phong + texturing + transparency). 3. speed: idle, full. 4,5. zoom in, out: no comment. 6. catmull: disabled @@ -40,4 +43,4 @@ Buttons description: is released apply current position. You may also decrease whole handlers count by enable culling (using appropriate button) - some back handlers become hidden. - Maciej Guba VIII 2020 + Maciej Guba III 2021 diff --git a/programs/demos/3DS/TEX_CAT.INC b/programs/demos/view3ds/tex_cat.inc similarity index 100% rename from programs/demos/3DS/TEX_CAT.INC rename to programs/demos/view3ds/tex_cat.inc diff --git a/programs/demos/3DS/TWO_TEX.INC b/programs/demos/view3ds/two_tex.inc similarity index 100% rename from programs/demos/3DS/TWO_TEX.INC rename to programs/demos/view3ds/two_tex.inc diff --git a/programs/demos/3DS/VIEW3DS.ASM b/programs/demos/view3ds/view3ds.asm similarity index 88% rename from programs/demos/3DS/VIEW3DS.ASM rename to programs/demos/view3ds/view3ds.asm index 05fd85e0cd..c426b5e5b2 100644 --- a/programs/demos/3DS/VIEW3DS.ASM +++ b/programs/demos/view3ds/view3ds.asm @@ -1,11 +1,11 @@ -; application : View3ds ver. 0.071 - tiny .3ds and .asc files viewer +; application : View3ds ver. 0.072 - tiny .3ds and .asc files viewer ; with a few graphics effects demonstration. ; compiler : FASM ; system : KolibriOS ; author : Macgub aka Maciej Guba ; email : macgub3@wp.pl -; web : http://macgub.vxm.pl +; web : http://macgub.j.pl, http://macgub.co.pl ; Fell free to use this intro in your own distribution of KolibriOS. ; Special greetings to KolibriOS team . ; I hope because my demos Christian Belive will be near to each of You. @@ -19,8 +19,8 @@ ; 2) Written in manually (at the end of the code) ; now not exist -SIZE_X equ 512 -SIZE_Y equ 512 ; ///// I want definitely +SIZE_X equ 500 +SIZE_Y equ 600 ; ///// I want definitely TIMEOUT equ 10 ; ------ say: ROUND equ 10 ; \ @ @/ keep smiling every TEX_X equ 512 ; texture width ; \ ./ / day. @@ -37,7 +37,7 @@ MMX = 1 SSE = 2 SSE2 = 3 SSE3 = 4 -Ext = SSE3 ;Ext={ NON | MMX | SSE | SSE2 | SSE3 } +Ext = SSE3 ;Ext={ NON | MMX | SSE | SSE2 | SSE3 } ; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) USE_LFN = 1 ; App is Kolibri only now. @@ -55,28 +55,12 @@ use32 START: ; start of execution cld - mov eax, 500 shl 16 + 600 ; ... or set manually - mov [size_y_var],ax - shr ax,1 - mov [vect_y],ax - - - shr ax,2 - movzx ebx,ax - movzx ebx,ax - lea ebx,[ebx*3] - push ebx + push dword (SIZE_Y shr 3) * 3 fninit fild dword[esp] fstp [rsscale] pop ebx - shr eax,16 - mov [size_x_var],ax - shr ax,1 - mov [vect_x],ax - - ; mov eax, 20 shl 16 + 20 mov [x_start],dword 20 shl 16 + 20 ;eax @@ -117,7 +101,7 @@ START: ; start of execution if Ext >= SSE3 call copy_lights ; to aligned float end if - call init_triangles_normals2 + call init_triangles_normals2 call init_point_normals call init_envmap2 call init_envmap_cub @@ -132,7 +116,14 @@ START: ; start of execution ;mov eax,40 ; set events mask ;mov ebx,1100000000000000000000000100111b ;int 0x40 - + if Ext >= SSE3 + mov eax,1 + cpuid + bt ecx,0 ; is sse3 on board? + jc @f + mov [max_dr_flg],12 + @@: + end if still: cmp [edit_flag],1 @@ -266,22 +257,30 @@ still: ; ah = 5 -> scale- cmp ah,5 jne @f - .zoom_out: + .zoom_out: mov dword[scale],0.7 - fninit - fld [rsscale] - fmul [scale] - fstp [rsscale] + movss xmm0,[rsscale] + mulss xmm0,[scale] + movss [rsscale],xmm0 + ; fninit + ; fld [rsscale] + ; fmul [scale] + ; fstp [rsscale] + @@: cmp ah,6 ; ah = 6 -> scale+ jne @f - .zoom_in: + .zoom_in: mov dword[scale],1.3 - fninit - fld [rsscale] - fmul [scale] - fstp [rsscale] + movss xmm0,[rsscale] + mulss xmm0,[scale] + movss [rsscale],xmm0 + + ; fninit + ; fld [rsscale] + ; fmul [scale] + ; fstp [rsscale] @@: cmp ah,9 ; lights random ; 'flat' 0 @@ -548,10 +547,6 @@ still: jne @f call clrscr ; clear the screen @@: - ; cmp [catmull_flag],1 ;non sort if Catmull = on - ; je .no_sort - ; 64 indexes call sort_triangles - .no_sort: cmp [dr_flag],7 ; fill if 2tex and texgrd jge @f cmp [catmull_flag],0 ;non fill if Catmull = off @@ -573,11 +568,6 @@ still: call draw_handlers ; call edit - - - - - .no_edit: .blurrr: @@ -605,7 +595,6 @@ still: @@: - cmp [inc_bright_flag],0 ; increase brightness je .no_inc_bright movzx ebx,[inc_bright_flag] @@ -780,22 +769,23 @@ end if ;-------------------------------------------------------------------------------- ;-------------------------PROCEDURES--------------------------------------------- ;-------------------------------------------------------------------------------- -include "FLAT_CAT.INC" -include "TEX_CAT.INC" -include "BUMP_CAT.INC" -include "3DMATH.INC" -include "GRD_LINE.INC" -include "B_PROCS.INC" -include "A_PROCS.INC" -include "GRD_CAT.INC" -include "BUMP_TEX.INC" -include "GRD_TEX.INC" -include "TWO_TEX.INC" -include "ASC.INC" +include "flat_cat.inc" +include "tex_cat.inc" +include "bump_cat.inc" +include "3dmath.inc" +include "grd_line.inc" +include "b_procs.inc" +include "a_procs.inc" +include "grd_cat.inc" +include "bump_tex.inc" +include "grd_tex.inc" +include "two_tex.inc" +include "asc.inc" if Ext >= SSE3 -include "3R_PHG.INC" -include '3STENCIL.INC' -include '3GLASS.INC' +include "3r_phg.inc" +include '3stencil.inc' +include '3glass.inc' +include '3glass_tex.inc' end if clear_vertices_index: mov edi,[vertices_index_ptr] @@ -1245,6 +1235,13 @@ init_envmap2: ; do env_map using many light sources push ax mov al,byte[esi+12] ; r push ax + ; pxor xmm1,xmm1 + ; movd xmm0,[esi+12] + ; punpckhbw xmm0,xmm1 + ; sub esp,8 + ; movq [esp],xmm0 + + mov al,byte[esi+20] ; b max color push ax mov al,byte[esi+19] ; g @@ -1345,7 +1342,7 @@ do_color_buffer: ; do color buffer for Gouraud, flat shading lea edi,.nx call dot_product pop edi - fcom [dot_min] + fcom [dot_min] fstsw ax sahf ja .env_ok1 ;compare with dot_max @@ -1421,7 +1418,7 @@ do_color_buffer: ; do color buffer for Gouraud, flat shading mov esp,ebp pop ebp ret -if Ext >= SSE3 +if Ext >= SSE2 init_point_normals: .z equ dword [ebp-8] .y equ dword [ebp-12] @@ -1429,8 +1426,7 @@ init_point_normals: .point_number equ dword [ebp-28] .hit_faces equ dword [ebp-32] - fninit - push ebp + push ebp mov ebp,esp sub esp,64 and ebp,-16 @@ -1470,19 +1466,25 @@ init_point_normals: jne .ipn_check_face cvtsi2ss xmm6,.hit_faces movaps xmm7,.x + rcpss xmm6,xmm6 shufps xmm6,xmm6,11000000b mulps xmm7,xmm6 - movaps xmm6,xmm7 - mulps xmm6,xmm6 - andps xmm6,[zero_hgst_dd] - haddps xmm6,xmm6 - haddps xmm6,xmm6 - rsqrtps xmm6,xmm6 - mulps xmm7,xmm6 movlps [edi],xmm7 movhlps xmm7,xmm7 movss [edi+8],xmm7 + call normalize_vector + ; movaps xmm6,xmm7 + ; mulps xmm6,xmm6 + ; andps xmm6,[zero_hgst_dd] + ; haddps xmm6,xmm6 + ; haddps xmm6,xmm6 + ; rsqrtps xmm6,xmm6 + ; mulps xmm7,xmm6 + ; movlps [edi],xmm7 + ; movhlps xmm7,xmm7 + ; movss [edi+8],xmm7 + add edi,12 inc .point_number mov edx,.point_number @@ -1704,7 +1706,7 @@ draw_triangles: @@: if Ext >= SSE3 cmp [dr_flag],13 - jne .no_stencil + jnge .no_stencil mov esi,[triangles_ptr] mov ecx,[triangles_count_var] @@: @@ -1768,7 +1770,7 @@ draw_triangles: .again_dts: push ecx mov ebp,[points_translated_ptr] - if Ext >= SSE2 + if Ext = NON mov eax,dword[esi] mov [point_index1],eax lea eax,[eax*3] @@ -1812,7 +1814,7 @@ draw_triangles: mov [zz3],ax else movq mm0,[esi] ; don't know MMX - mov qword[point_index1],mm0 + movq qword[point_index1],mm0 ; shr eax,16 ; mov [point_index2],ax mov eax,dword[esi+8] @@ -1888,17 +1890,19 @@ end if lea eax,[eax*3] shl eax,2 add eax,[points_normals_rot_ptr] + bt dword[eax+8],+31 + jc .no_culling ; lea eax,[eax+point_normals_rotated] - fld dword[eax+8] ; ***************************** - ftst ; CHECKING OF Z COOFICIENT OF - fstsw ax ; NORMAL VECTOR - sahf - jb @f - ffree st + ; fld dword[eax+8] ; ***************************** + ; ftst ; CHECKING OF Z COOFICIENT OF + ; fstsw ax ; NORMAL VECTOR + ; sahf + ; jb @f + ; ffree st loop @b jmp .end_draw ; non visable - @@: - ffree st ;is visable + ; @@: + ; ffree st ;is visable .no_culling: cmp [dr_flag],0 ; draw type flag je .flat_draw @@ -1925,6 +1929,9 @@ end if je .r_phg cmp [dr_flag],13 je .glass + cmp [dr_flag],14 + je .glass_tex + end if ; **************** mov esi,point_index3 ; do Gouraud shading mov ecx,3 @@ -1933,19 +1940,28 @@ end if shl eax,2 lea eax,[eax*3] add eax,[points_normals_rot_ptr] + if Ext < SSE ; texture x=(rotated point normal -> x * 255)+255 fld dword[eax] ; x cooficient of normal vector - fimul [correct_tex] - fiadd [correct_tex] + fmul dword[correct_texf] + fadd dword[correct_texf] fistp [temp1] ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] ; y cooficient - fimul [correct_tex] - fiadd [correct_tex] + fmul dword[correct_texf] + fadd dword[correct_texf] fistp [temp2] - mov eax,[temp2] mov ebx,[temp1] + else + movlps xmm0,[eax] + mulps xmm0,[correct_texf] + addps xmm0,[correct_texf] + cvtps2dq xmm0,xmm0 + movd eax,xmm0 + psrldq xmm0,4 + movd ebx,xmm0 + end if and ebx,0xfffffff shl eax,TEX_SHIFT add eax,ebx @@ -2050,28 +2066,31 @@ end if mov eax,[point_index1] mov ebx,[point_index2] mov ecx,[point_index3] - shl eax,2 - shl ebx,2 - shl ecx,2 - lea eax,[eax*3] ;+point_normals_rotated] + ; shl eax,2 + imul eax,[i12] + imul ebx,[i12] + imul ecx,[i12] + ; shl ebx,2 + ; shl ecx,2 + ; lea eax,[eax*3] ;+point_normals_rotated] add eax,[points_normals_rot_ptr] - lea ebx,[ebx*3] ;+point_normals_rotated] + ; lea ebx,[ebx*3] ;+point_normals_rotated] add ebx,[points_normals_rot_ptr] - lea ecx,[ecx*3] ;+point_normals_rotated] + ; lea ecx,[ecx*3] ;+point_normals_rotated] add ecx,[points_normals_rot_ptr] fld dword[eax] ; x cooficient of normal vector fadd dword[ebx] fadd dword[ecx] fidiv [i3] - fimul [correct_tex] - fiadd [correct_tex] + fmul dword[correct_texf] + fadd dword[correct_texf] fistp [temp1] ;dword[esp-4] ; x temp variables fld dword[eax+4] ; y cooficient of normal vector fadd dword[ebx+4] fadd dword[ecx+4] fidiv [i3] - fimul [correct_tex] - fiadd [correct_tex] + fmul dword[correct_texf] + fadd dword[correct_texf] fistp [temp2] ;dword[esp-8] ; y mov edx,[temp2] ;dword[esp-8] and edx,0xfffffff @@ -2119,29 +2138,24 @@ end if push [zz2] push [zz1] - mov esi,point_index1 - sub esp,12 - mov edi,esp - mov ecx,3 + mov esi,point_index1 + sub esp,12 + mov edi,esp + mov ecx,3 @@: - mov eax,dword[esi] - lea eax,[eax*3] - shl eax,2 - add eax,[points_normals_rot_ptr] ;point_normals_rotated - ; texture x=(rotated point normal -> x * 255)+255 - fld dword[eax] - fimul [correct_tex] - fiadd [correct_tex] - fistp word[edi] - ; texture y=(rotated point normal -> y * 255)+255 - fld dword[eax+4] - fimul [correct_tex] - fiadd [correct_tex] - fistp word[edi+2] - - add edi,4 - add esi,4 - loop @b + mov eax,dword[esi] + lea eax,[eax*3] + shl eax,2 + add eax,[points_normals_rot_ptr] ;point_normals_rotated + movlps xmm0,[eax] + mulps xmm0,[correct_texf] + addps xmm0,[correct_texf] + cvtps2dq xmm0,xmm0 + packssdw xmm0,xmm0 + movd [edi],xmm0 + add edi,4 + add esi,4 + loop @b mov eax,dword[xx1] ror eax,16 @@ -2178,8 +2192,8 @@ end if fld1 faddp fmulp - fimul [correct_tex] - fiadd [correct_tex] + fmul dword[correct_texf] + fadd dword[correct_texf] fistp word[edi] mov word[edi+2],0 ; fistp word[edi+2] @@ -2230,21 +2244,19 @@ end if mov eax,dword[esi] lea eax,[eax*3] shl eax,2 + ; imul eax,[i12] add eax,[points_normals_rot_ptr] ;point_normals_rotated ; texture x=(rotated point normal -> x * 255)+255 - fld dword[eax] - fimul [correct_tex] - fiadd [correct_tex] - fistp word[edi] - ; texture y=(rotated point normal -> y * 255)+255 - fld dword[eax+4] - fimul [correct_tex] - fiadd [correct_tex] - fistp word[edi+2] - add edi,4 - add esi,4 - loop @b + movlps xmm0,[eax] + mulps xmm0,[correct_texf] + addps xmm0,[correct_texf] + cvtps2dq xmm0,xmm0 + packssdw xmm0,xmm0 + movd [edi],xmm0 + add edi,4 + add esi,4 + loop @b mov esi,[point_index3] ; bump map coords shl esi,2 @@ -2253,17 +2265,11 @@ end if mov esi,[point_index2] shl esi,2 add esi,tex_points -; lea esi,[esi*3] -; lea esi,[points+2+esi*2] push dword[esi] - ; push dword[xx2] mov esi,[point_index1] shl esi,2 add esi,tex_points -; lea esi,[esi*3] -; lea esi,[points+2+esi*2] push dword[esi] - ; push dword[xx1] mov eax,dword[xx1] ror eax,16 @@ -2371,24 +2377,33 @@ end if lea edx,[ecx*3] push word[edx*2+xx1-2] ; zz1 ,2 ,3 - fninit + ; fninit mov eax,dword[esi] shl eax,2 lea eax,[eax*3] ;+point_normals_rotated] add eax,[points_normals_rot_ptr] + if Ext < SSE ; texture x=(rotated point normal -> x * 255)+255 fld dword[eax] ; x cooficient of normal vector - fimul [correct_tex] - fiadd [correct_tex] + fmul dword[correct_texf] + fadd dword[correct_texf] fistp [temp1] ;word[ebp-2] ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] ; y cooficient - fimul [correct_tex] - fiadd [correct_tex] + fmul dword[correct_texf] + fadd dword[correct_texf] fistp [temp2] ;word[ebp-4] - - mov eax,[temp2] ;word[ebp-4] - mov ebx,[temp1] ;word[ebp-2] + mov eax,[temp2] + mov ebx,[temp1] + else + movlps xmm0,[eax] + mulps xmm0,[correct_texf] + addps xmm0,[correct_texf] + cvtps2dq xmm0,xmm0 + movd eax,xmm0 + psrldq xmm0,4 + movd ebx,xmm0 + end if and ebx,0xfffffff ; some onjects need thid 'and' shl eax,TEX_SHIFT add eax,ebx @@ -2456,16 +2471,22 @@ end if shl eax,2 add eax,[points_normals_rot_ptr] ; texture x=(rotated point normal -> x * 255)+255 - fld dword[eax] - fimul [correct_tex] - fiadd [correct_tex] - fistp word[edi] + movlps xmm0,[eax] + mulps xmm0,[correct_texf] + addps xmm0,[correct_texf] + cvtps2dq xmm0,xmm0 + packssdw xmm0,xmm0 + movd [edi],xmm0 + ; fld dword[eax] + ; fmul dword[correct_texf] + ; fadd dword[correct_texf] + ; fistp word[edi] ; texture y=(rotated point normal -> y * 255)+255 - fld dword[eax+4] - fimul [correct_tex] - fiadd [correct_tex] - fistp word[edi+2] + ; fld dword[eax+4] + ; fmul dword[correct_texf] + ; fadd dword[correct_texf] + ; fistp word[edi+2] and word[edi+2],0x7fff ; some objects need it add edi,4 add esi,4 @@ -2516,7 +2537,15 @@ end if lea eax,[eax*3] shl eax,2 add eax,[points_normals_rot_ptr] + if Ext >= SSE ; texture x=(rotated point normal -> x * 255)+255 + movlps xmm0,[eax] + mulps xmm0,[correct_texf] + addps xmm0,[correct_texf] + cvtps2dq xmm0,xmm0 + packssdw xmm0,xmm0 + movd [edi],xmm0 + else fld dword[eax] fimul [correct_tex] fiadd [correct_tex] @@ -2526,7 +2555,7 @@ end if fimul [correct_tex] fiadd [correct_tex] fistp word[edi+2] - + end if add edi,4 add esi,4 loop @b @@ -2575,19 +2604,28 @@ end if shl eax,2 lea eax,[eax*3] add eax,[points_normals_rot_ptr] - ; texture ;x=(rotated point normal -> x * 255)+255 + if Ext>=SSE2 + movlps xmm0,[eax] + mulps xmm0,[correct_texf] + addps xmm0,[correct_texf] + cvtps2dq xmm0,xmm0 + movd eax,xmm0 + psrldq xmm0,4 + movd ebx,xmm0 + else if + ; texture ;x=(rotated point normal -> x * 255)+255 fld dword[eax] ; x cooficient of normal vector - fimul [correct_tex] - fiadd [correct_tex] + fmul dword[correct_texf] + fadd dword[correct_texf] fistp [temp1] ;word[esp-2] ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] ; y cooficient - fimul [correct_tex] - fiadd [correct_tex] + fmul dword[correct_texf] + fadd dword[correct_texf] fistp [temp2] ;word[esp-4] - mov eax,[temp2] ;word[esp-4] mov ebx,[temp1] ;word[esp-2] + end if and ebx,0xfffffff shl eax,TEX_SHIFT add eax,ebx @@ -2799,9 +2837,85 @@ if Ext >= SSE3 mov ecx,dword[xx3] ror ecx,16 mov edi,[screen_ptr] + mov edx,[Zbuffer_ptr] mov esi,[Zbuffer_ptr] call glass_tri + jmp .end_draw + + .glass_tex: + movd xmm5,[size_y_var] + punpcklwd xmm5,[the_zero] + pshufd xmm5,xmm5,01110011b + + mov eax,[point_index1] + mov ebx,[point_index2] + mov ecx,[point_index3] + imul eax,[i12] + imul ebx,[i12] + imul ecx,[i12] + add eax,[points_normals_rot_ptr] + add ebx,[points_normals_rot_ptr] + add ecx,[points_normals_rot_ptr] + movups xmm0,[eax] + movups xmm1,[ebx] + movups xmm2,[ecx] + andps xmm0,[zero_hgst_dd] + andps xmm1,[zero_hgst_dd] + andps xmm2,[zero_hgst_dd] + xorps xmm3,xmm3 + + mov eax,[point_index1] + mov ebx,[point_index2] + mov ecx,[point_index3] + imul eax,[i12] + imul ebx,[i12] + imul ecx,[i12] + add eax,[points_rotated_ptr] + add ebx,[points_rotated_ptr] + add ecx,[points_rotated_ptr] + push dword[ecx+8] + push dword[ebx+8] + push dword[eax+8] + movups xmm4,[esp] + add esp,12 + andps xmm4,[zero_hgst_dd] + + mov esi,[point_index3] ; tex map coords + shl esi,2 + add esi,tex_points + push dword[esi] + mov esi,[point_index2] + shl esi,2 + add esi,tex_points + push dword[esi] + mov esi,[point_index1] + shl esi,2 + add esi,tex_points + push dword[esi] + movups xmm6,[esp] + add esp,12 + ; pshuflw xmm6,xmm6,10110001b + ; pshufhw xmm6,xmm6,10110001b + + movzx eax,word[size_x_var] + andps xmm6,[zero_hgst_dd] + movd xmm7,eax + pslldq xmm7,12 + por xmm6,xmm7 + + + mov eax,dword[xx1] + ror eax,16 + mov ebx,dword[xx2] + ror ebx,16 + mov ecx,dword[xx3] + ror ecx,16 + mov edx,texmap + mov edi,[screen_ptr] + mov esi,[Zbuffer_ptr] + + call glass_tex_tri jmp .end_draw end if @@ -3184,14 +3298,17 @@ read_from_file: ;mov edi,triangles @@: movzx eax,word[esi] + add eax,ebp stosd movzx eax,word[esi+2] + add eax,ebp stosd movzx eax,word[esi+4] + add eax,ebp stosd - add dword[edi-12],ebp - add dword[edi-8],ebp - add dword[edi-4],ebp + ; add dword[edi-12],ebp + ; add dword[edi-8],ebp + ; add dword[edi-4],ebp add esi,8 dec ecx jnz @b @@ -3271,33 +3388,10 @@ ret read_from_disk: -if USE_LFN -;- mov eax, 68 mov ebx, 11 int 0x40 ; -> create heap - ; mov eax, 70 - ; mov ebx, file_info - ; mov dword[ebx], 5 ; -> subfunction number - ; int 0x40 ; -> read file size - ; mov ebx, [fptr] - ; mov ebx, dword[ebx+32] - ; inc ebx - ; mov [fsize], ebx - - - - ; mov eax, 68 - ; mov ebx, 12 - ; mov ecx, [fsize] - ; int 0x40 ; -> allocate memory for file - ; mov [fptr], eax ; -> eax = pointer to allocated mem - - ; mov eax, 70 - ; mov ebx, file_info - ; mov dword[ebx],0 - ; int 0x40 ; -> read file ;load kpacked files by Leency mov eax,68 mov ebx,27 @@ -3314,33 +3408,9 @@ if USE_LFN .open_opened_well: xor eax,eax @@: -else - mov eax,58 - mov ebx,file_info - int 0x40 - - mov eax,ebx - shr eax,9 - inc eax - mov [fsize],eax - - - -; mov ecx,ebx -; add ecx,MEM_END -; mov ebx,1 -; mov eax,64 ; allocate mem - resize app mem - ; for points and triangles - int 0x40 - - mov eax,58 - mov ebx,file_info - int 0x40 -end if - ; eax = 0 -> ok file loaded -ret ; eax = 0 -> ok file loaded ret + read_param: mov esi,I_Param cmp dword[esi],0 @@ -3596,6 +3666,6 @@ ret ; DATA AREA ************************************ - include 'DATA.INC' + include 'data.inc' align 16 MEM_END: diff --git a/programs/develop/libraries/TinyGL/asm_fork/examples/test_array1.asm b/programs/develop/libraries/TinyGL/asm_fork/examples/test_array1.asm index 46dacdb8e8..0ae58241e9 100644 --- a/programs/develop/libraries/TinyGL/asm_fork/examples/test_array1.asm +++ b/programs/develop/libraries/TinyGL/asm_fork/examples/test_array1.asm @@ -171,7 +171,7 @@ delt_size dd 3.0 align 4 house_3ds: ;внедряем файл внутрь программы (в идеальном случае должен открыватся через окно диалога, но для облегчения примера вшит внутрь) -file '../../../../../demos/3DS/3ds_objects/House.3ds' +file '../../../../../demos/view3ds/3ds_objects/House.3ds' align 4 Indices rb 0x1a6*6 ;0x1a6 - число граней, на каждую грань по 3 точки, индекс точки 2 байта