diff --git a/programs/demos/3DS/3DMATH.INC b/programs/demos/3DS/3DMATH.INC index 9b22fa6521..e0bbf948c9 100644 --- a/programs/demos/3DS/3DMATH.INC +++ b/programs/demos/3DS/3DMATH.INC @@ -304,6 +304,7 @@ else ; ecx - number of points(normals) ;align 32 movups xmm4,[ebx] + ; lddqu xmm4,[ebx] ; I tried sse3 :D movups xmm5,[ebx+12] movups xmm6,[ebx+24] ;align 32 diff --git a/programs/demos/3DS/BUMP_CAT.INC b/programs/demos/3DS/BUMP_CAT.INC index 4078192dc3..37757dbbf0 100644 --- a/programs/demos/3DS/BUMP_CAT.INC +++ b/programs/demos/3DS/BUMP_CAT.INC @@ -837,7 +837,7 @@ end if mov ebx,.x2 sub ebx,.x1 -if 0 ;Ext >= SSE +if Ext >= SSE sub esp,16 cvtsi2ss xmm3,ebx ;rcps @@ -954,7 +954,7 @@ if Ext>=MMX movq mm2,.dby movq mm3,.dey end if - +;align 16 .draw: ; if TEX = SHIFTING ;bump drawing only in shifting mode mov esi,.czbuff ; .czbuff current address in buffer diff --git a/programs/demos/3DS/BUMP_TEX.INC b/programs/demos/3DS/BUMP_TEX.INC index fb19b1643c..f65dc4efff 100644 --- a/programs/demos/3DS/BUMP_TEX.INC +++ b/programs/demos/3DS/BUMP_TEX.INC @@ -60,6 +60,38 @@ bump_tex_triangle_z: .x3 equ word[ebp-18] .y3 equ word[ebp-20] +if 0 ;Ext <= SSE2 + +.dx12 equ dword[edi-4] +.dz12 equ [edi-8] +.dbx12 equ dword[edi-12] +.dby12 equ [edi-16] +.dex12 equ dword[edi-20] +.dey12 equ [edi-24] +.dtx12 equ dword[edi-28] +.dty12 equ [edi-32] + +.dx13 equ dword[ebp-52-4*1] +.dz13 equ [ebp-52-4*2] +.dbx13 equ dword[ebp-52-4*3] +.dby13 equ [ebp-52-4*4] +.dex13 equ dword[ebp-52-4*5] +.dey13 equ [ebp-52-4*6] +.dtx13 equ dword[ebp-52-4*7] +.dty13 equ [ebp-52-4*8] + + +.dx23 equ dword[ebp-(52+4*9)] +.dz23 equ [ebp-(52+4*10)] +.dbx23 equ dword[ebp-(52+4*11)] +.dby23 equ [ebp-(52+4*12)] +.dex23 equ dword[ebp-(52+4*13)] +.dey23 equ [ebp-(52+4*14)] +.dtx23 equ dword[ebp-(52+4*15)] +.dty23 equ [ebp-(52+4*16)] + +else + .dx12 equ dword[ebp-24] .dz12 equ [ebp-28] .dbx12 equ dword[ebp-32] @@ -88,6 +120,8 @@ bump_tex_triangle_z: .dtx23 equ dword[ebp-(52+4*15)] .dty23 equ [ebp-(52+4*16)] +end if + if Ext < SSE .cx1 equ dword[ebp-(52+4*17)] ; current variables @@ -194,11 +228,18 @@ end if mov bx,.y2 ; calc delta 12 sub bx,.y1 jnz .bt_dx12_make +if 0 ;Ext >= SSE2 + pxor xmm0,xmm0 + movups .dty12,xmm0 + movups .dey12,xmm0 + sub esp,16 +else mov ecx,8 xor edx,edx @@: push edx ;dword 0 loop @b +end if jmp .bt_dx12_done .bt_dx12_make: movsx ebx,bx @@ -208,7 +249,12 @@ if Ext>=SSE sub esp,32 ; mov eax,256 cvtsi2ss xmm4,[i255d] - cvtsi2ss xmm3,ebx ;rcps + cvtsi2ss xmm3,ebx ;rcps +if 0 ;Ext >= SSE2 + mov edi,ebp + sub edi,512 + or edi,0x0000000f +end if divss xmm3,xmm4 shufps xmm3,xmm3,0 @@ -797,16 +843,36 @@ end if ; push edx ; push edx -;if Ext >= SSE2 -; movups xmm0,.cby1 -; movups xmm1,.cty1 -; movups xmm2,.cby2 -; movups xmm3,.cty2 -; movups xmm4,.dby13 -; movups xmm5,.dty13 -; movups xmm6,.dby12 -; movups xmm7,.dty12 -;end if +if Ext >= SSE2 + movups xmm0,.cby1 + movups xmm1,.cty1 + movups xmm2,.cby2 + movups xmm3,.cty2 + movups xmm4,.dby13 + movups xmm5,.dty13 + movups xmm6,.dby12 + movups xmm7,.dty12 + .scby1 equ [edi] + .scty1 equ [edi+16] + .scby2 equ [edi+32] + .scty2 equ [edi+48] + .sdby13 equ [edi+64] + .sdty13 equ [edi+80] + .sdby12 equ [edi+96] + .sdty12 equ [edi+128] + push edi + mov edi,sse_repository + movaps .scby1,xmm0 + movaps .scty1,xmm1 + movaps .scby2,xmm2 + movaps .scty2,xmm3 + movaps .sdby13,xmm4 + movaps .sdty13,xmm5 + movaps .sdby12,xmm6 + movaps .sdty12,xmm7 + pop edi + +end if movsx ecx,.y1 cmp cx,.y2 jge .loop12_done @@ -821,14 +887,21 @@ if Ext >= SSE2 movups xmm1,.cty1 movups xmm2,.cby2 movups xmm3,.cty2 - movups xmm4,.dby13 - movups xmm5,.dty13 - movups xmm6,.dby12 - movups xmm7,.dty12 - paddd xmm0,xmm4 - paddd xmm1,xmm5 - paddd xmm2,xmm6 - paddd xmm3,xmm7 + ; movups xmm4,.dby13 + ; movups xmm5,.dty13 + ; movups xmm6,.dby12 + ; movups xmm7,.dty12 + ; paddd xmm0,xmm4 + ; paddd xmm1,xmm5 + ; paddd xmm2,xmm6 + ; paddd xmm3,xmm7 + push edi + mov edi,sse_repository + paddd xmm0,.sdby13 + paddd xmm1,.sdty13 + paddd xmm2,.sdby12 + paddd xmm3,.sdty12 + pop edi movups .cby1,xmm0 movups .cty1,xmm1 movups .cby2,xmm2 @@ -939,7 +1012,65 @@ end if movzx ebx,word[.t_y2] shl ebx,ROUND mov .cty2,ebx +if Ext >= SSE2 + movups xmm2,.cby2 + movups xmm3,.cty2 + ; movups xmm4,.dby13 + ; movups xmm5,.dty13 + movups xmm6,.dby23 + movups xmm7,.dty23 +; .scby1 equ [edi] +; .scty1 equ [edi+16] +; .scby2 equ [edi+32] +; .scty2 equ [edi+48] +; .sdby13 equ [edi+64] +; .sdty13 equ [edi+80] + .sdby23 equ [edi+160] + .sdty23 equ [edi+192] + push edi + mov edi,sse_repository +; movaps .scby1,xmm0 +; movaps .scty1,xmm1 + movaps .scby2,xmm2 + movaps .scty2,xmm3 +; movaps .sdby13,xmm4 +; movaps .sdty13,xmm5 + movaps .sdby23,xmm6 + movaps .sdty23,xmm7 + pop edi + +end if + + .loop23: ;if Ext >= SSE2 +; fxsave [sse_repository] +;end if + call .call_line + +if Ext >= SSE2 + + movups xmm0,.cby1 + movups xmm1,.cty1 + movups xmm2,.cby2 + movups xmm3,.cty2 + + + push edi + mov edi,sse_repository + paddd xmm0,.sdby13 + paddd xmm1,.sdty13 + paddd xmm2,.sdby23 + paddd xmm3,.sdty23 + pop edi + movups .cby1,xmm0 + movups .cty1,xmm1 + movups .cby2,xmm2 + movups .cty2,xmm3 + + + + +; fxrstor [sse_repository] ; movups xmm0,.cby1 ; movups xmm1,.cty1 ; movups xmm2,.cby2 @@ -948,31 +1079,14 @@ end if ; movups xmm5,.dty13 ; movups xmm6,.dby23 ; movups xmm7,.dty23 -;end if - .loop23: -;if Ext >= SSE2 -; fxsave [sse_repository] -;end if - call .call_line - -if Ext >= SSE2 -; fxrstor [sse_repository] - movups xmm0,.cby1 - movups xmm1,.cty1 - movups xmm2,.cby2 - movups xmm3,.cty2 - movups xmm4,.dby13 - movups xmm5,.dty13 - movups xmm6,.dby23 - movups xmm7,.dty23 - paddd xmm0,xmm4 - paddd xmm1,xmm5 - paddd xmm2,xmm6 - paddd xmm3,xmm7 - movups .cby1,xmm0 - movups .cty1,xmm1 - movups .cby2,xmm2 - movups .cty2,xmm3 +; paddd xmm0,xmm4 +; paddd xmm1,xmm5 +; paddd xmm2,xmm6 + ; paddd xmm3,xmm7 + ; movups .cby1,xmm0 + ; movups .cty1,xmm1 + ; movups .cby2,xmm2 + ; movups .cty2,xmm3 ; end if if (Ext = MMX) | (Ext = SSE) @@ -1049,31 +1163,50 @@ ret 50 .call_line: pushad - push .tex_ptr + ; xmm0= cby1,cbx1,cz1,cx1 + ; xmm1= cty1,ctx1,cey1,cex1 +if Ext >= SSE2 + sub esp,8 + shufps xmm1,xmm1,10110001b + shufps xmm3,xmm3,10110001b + movlps [esp],xmm1 +else push dword .cty1 push .ctx1 +end if push dword .cz1 +if Ext>=SSE2 + sub esp,8 + movlps [esp],xmm3 +else push dword .cty2 push .ctx2 +end if push dword .cz2 - push .z_buff - push .t_emap - push .t_bmap +if Ext>=SSE2 + sub esp,32 + movhps [esp+24],xmm3 + shufps xmm2,xmm2,10110001b + movlps [esp+16],xmm2 + movhps [esp+8],xmm1 + shufps xmm0,xmm0,10110001b + movlps [esp],xmm0 ;================================ + +else push dword .cey2 push .cex2 push dword .cby2 push .cbx2 push dword .cey1 push .cex1 -;if Ext >= SSE2 -; sub esp,8 -; shufps xmm0,xmm0,10110100b -; movhps [esp],xmm0 ;================================ -;else - push dword .cby1 push .cbx1 -;end if +end if + + push .tex_ptr + push .z_buff + push .t_emap + push .t_bmap push ecx @@ -1093,25 +1226,26 @@ bump_tex_line_z: ;-------------- edi - pointer to screen buffer ;stack - another parameters : .y equ dword [ebp+4] +.bmap equ dword [ebp+8] ; bump map pointer +.emap equ dword [ebp+12] ; env map pointer +.z_buff equ dword [ebp+16] ; z buffer +.tex_map equ dword [ebp+20] ; texture pointer + +.bx1 equ [ebp+24] ; --- +.by1 equ [ebp+28] ; | +.ex1 equ [ebp+32] ; | +.ey1 equ [ebp+36] ; | +.bx2 equ [ebp+40] ; | +.by2 equ [ebp+44] ; |> b. map and e. map coords +.ex2 equ [ebp+48] ; |> shifted shl ROUND +.ey2 equ [ebp+52] ; --- +.z2 equ [ebp+56] +.tx2 equ [ebp+60] +.ty2 equ [ebp+64] +.z1 equ [ebp+68] +.tx1 equ [ebp+72] +.ty1 equ [ebp+76] -.bx1 equ [ebp+8] ; --- -.by1 equ [ebp+12] ; | -.ex1 equ [ebp+16] ; | -.ey1 equ [ebp+20] ; | -.bx2 equ [ebp+24] ; | -.by2 equ [ebp+28] ; |> b. map and e. map coords -.ex2 equ [ebp+32] ; |> shifted shl ROUND -.ey2 equ [ebp+36] ; --- -.bmap equ [ebp+40] ; bump map offset -.emap equ [ebp+44] ; env map offset -.z_buff equ [ebp+48] -.z2 equ [ebp+52] -.tx2 equ [ebp+56] -.ty2 equ [ebp+60] -.z1 equ [ebp+64] -.tx1 equ [ebp+68] -.ty1 equ [ebp+72] -.tex_map equ dword [ebp+76] ; texture offset ( pointer ) .x1 equ [ebp-4] @@ -1152,7 +1286,7 @@ bump_tex_line_z: jl .bl_ok je .bl_end - xchg eax,ebx + if Ext=NON mov edx,.bx1 xchg edx,.bx2 @@ -1199,11 +1333,51 @@ if Ext>=SSE movq .tx1,mm1 movq .tx2,mm0 end if +;if Ext>=SSE2 +; movaps xmm4,xmm0 +; movaps xmm0,xmm2 +; movaps xmm2,xmm4 +; movaps xmm5,xmm1 +; movaps xmm1,xmm3 +; movaps xmm3,xmm5 +;else + xchg eax,ebx mov edx,.z1 xchg edx,.z2 mov .z1,edx +;end if .bl_ok: +;if Ext >= SSE2 +; shufps xmm0,xmm0,11100001b +; shufps xmm2,xmm2,11100001b +; movlps .bx1,xmm0 +; movlps .bx2,xmm2 + + +; shufps xmm0,xmm0,00011011b +; shufps xmm2,xmm2,00011011b +; movd eax,xmm0 +; movd ebx,xmm2 +; shufps xmm0,xmm0,11000110b +; shufps xmm2,xmm2,11000110b +; movd .z1,xmm0 +; movd .z2,xmm2 +; shufps xmm1,xmm1,10110001b +; shufps xmm3,xmm3,10110001b +; movlps .ex1,xmm1 +; movlps .ex2,xmm2 +; movhps .tx1,xmm1 +; movhps .tx2,xmm2 + +; xchg eax,ebx +; mov edx,.z1 +; xchg edx,.z2 +; mov .z1,edx + + +;end if + push eax push ebx ;store x1, x2 cmp dword .x1,SIZE_X diff --git a/programs/demos/3DS/DATA.INC b/programs/demos/3DS/DATA.INC index b9ef034e14..a68f06611e 100644 --- a/programs/demos/3DS/DATA.INC +++ b/programs/demos/3DS/DATA.INC @@ -41,7 +41,7 @@ db 3 db 'shd. model' - db 11 + db 12 dr_flag db 0 dd shd_f @@ -214,6 +214,7 @@ flags: ; flags description db '2tex' db 'btex' db 'cenv' + db 'grdl' spd_f: db 'idle' db 'full' @@ -298,7 +299,7 @@ base_vector: if Ext=SSE2 db ' (SSE2)' end if - db ' 0.061',0 + db ' 0.062',0 labellen: STRdata db '-1 ' @@ -401,7 +402,11 @@ align 8 xx3 dw ?;+56 yy3 dw ? zz3 dw ? ; xx1 + 16 - scale dd ? ; help scale variable + col1 dd ? + col2 dd ? + col3 dd ? + scale dd ? ; help scale variable + edges_counter dd ? ;== triangles_count_var dw ? points_count_var dw ? @@ -415,6 +420,7 @@ align 8 points_translated_ptr dd ? screen_ptr dd ? Zbuffer_ptr dd ? + edges_ptr dd ? ;=== @@ -492,6 +498,9 @@ align 16 tex_points rb points_count * 4 ; bump_map and texture coords ; each point word x, word y align 16 +if Ext >= SSE2 + sse_repository rb 1024 +end if ; SourceFile: ; source file temporally in screen area ; workarea dd ? diff --git a/programs/demos/3DS/FLAT_CAT.INC b/programs/demos/3DS/FLAT_CAT.INC index 79dabae351..8f11b96a66 100644 --- a/programs/demos/3DS/FLAT_CAT.INC +++ b/programs/demos/3DS/FLAT_CAT.INC @@ -370,6 +370,8 @@ flat_line_z: jecxz .draw_last .ddraw: cmp ebx,dword[esi] + ; cmovl [edi],eax + ; cmovl [esi],ebx jge @f stosd dec edi diff --git a/programs/demos/3DS/GRD_LINE.INC b/programs/demos/3DS/GRD_LINE.INC new file mode 100644 index 0000000000..b7d252da40 --- /dev/null +++ b/programs/demos/3DS/GRD_LINE.INC @@ -0,0 +1,574 @@ +;-procedure draws smooth shaded lines (I mean interpolation 24 bit-- +;-color), with z coord interpolation-------------------------------- +;-author: Maciej Guba (www.macgub.hekko.pl)------------------------- +;-in : ------------------------------------------------------------- +;----- edi - pointer to screen buffer ------------------------------ +;----- esi - pointer to Z buffer ----------------------------------- +;------ constans : SIZE_X, SIZE_Y - screen width and height--------- +;----------------- ROUND - fixed point shift------------------------ +;------ other parameters via stack---------------------------------- +smooth_line: +.x1 equ ebp+4 +.y1 equ ebp+6 +.z1 equ ebp+8 +.r1 equ ebp+10 +.g1 equ ebp+12 +.b1 equ ebp+14 +.x2 equ ebp+16 +.y2 equ ebp+18 +.z2 equ ebp+20 +.r2 equ ebp+22 +.g2 equ ebp+24 +.b2 equ ebp+26 + + +.line_lenght equ ebp-2 +.delta equ ebp-6 +.delta_x equ ebp-10 +.delta_y equ ebp-14 +.dr equ ebp-18 +.dg equ ebp-22 +.db equ ebp-26 +.dz equ ebp-30 +.cr equ ebp-34 +.cg equ ebp-38 +.cb equ ebp-42 +.cz equ ebp-46 + +;.line_lenght equ ebp-48 +.screen equ ebp-52 +.zbuffer equ ebp-56 +.ccoord equ ebp-60 ;current coordinate +.czbuf equ ebp-64 +.cscr equ ebp-68 +;.lasty equ ebp-72 +macro .update_cur_var +{ +if Ext=NON + mov ebx,[.dz] + add [.cz],ebx + mov ebx,[.dr] + add [.cr],ebx + mov ebx,[.dg] + add [.cg],ebx + mov ebx,[.db] + add [.cb],ebx +elseif Ext=MMX + movq mm0,[.cz] + movq mm1,[.cg] + paddd mm0,mm2 ;[.dz] + paddd mm1,mm3 ;[.dg] + movq [.cz],mm0 + movq [.cg],mm1 +elseif Ext >= SSE2 + movups xmm1,[.cz] + paddd xmm1,xmm0 + movups [.cz],xmm1 +end if +} +macro .draw_pixel +{ + mov [esi],ebx ; actualize Z buffer +;if Ext=SSE2 +; movups xmm2,[.cb] + ; shufps xmm1,xmm1,11000110b + ; pand xmm1,[.mask] +; psrld xmm2,ROUND +; packssdw xmm2,xmm2 +; packuswb xmm2,xmm2 +; movss [edi],xmm2 +;else + + mov eax,[.cb] + sar eax,ROUND + mov [edi],al +; and eax,0x000000ff ; clean unused bits + mov ebx,[.cg] + sar ebx,ROUND + mov [edi+1],bl +; mov ah,bl + mov edx,[.cr] + sar edx,ROUND + mov [edi+2],dl +;end if +; shl ebx,16 +; or eax,ebx +; mov [edi],eax +} +macro .sort +{ + +if Ext >= MMX + movq mm0,[.x1] + movq mm1,[.x2] + movq [.x1],mm1 + movq [.x2],mm0 +else + mov edx,[.x1] + xchg edx,[.x2] + mov [.x1],edx + mov edx,[.z1] + xchg edx,[.z2] + mov [.z1],edx +end if + mov edx,[.g1] + xchg edx,[.g2] + mov [.g1],edx +} + + + + emms + mov ebp,esp + sub esp,128 + mov eax,[.x1] ; check if parameters exceedes screen area + mov ebx,[.x2] + or eax,ebx + test eax,80008000h + jne .end_line + cmp word[.x1],SIZE_X + jg .end_line + cmp word[.x2],SIZE_X + jg .end_line + cmp word[.y1],SIZE_Y + jg .end_line + cmp word[.y2],SIZE_Y + jg .end_line + + + mov [.screen],edi + mov cx,[.x1] + cmp cx,[.x2] + je .vertical_l + mov cx,[.y1] + cmp cx,[.y2] + je .horizontal_l + mov ax,[.x1] + sub ax,[.x2] + cmp ax,0 + jg @f + neg ax ; calc absolute value + @@: + mov [.delta_x],ax + mov bx,[.y1] + sub bx,[.y2] + cmp bx,0 + jg @f + neg bx + @@: + mov [.delta_y],bx + cmp ax,bx + je .deg45_l + jl .more_vertical_l + jg .more_horizon_l + jmp .end_line + ; +.horizontal_l: + mov ax,[.x1] + mov bx,[.x2] + cmp bx,ax + jge @f + + .sort +@@: + + mov bx,[.x2] + sub bx,[.x1] + movsx ebx,bx + cmp ebx,0 ;line lenght equql 0 + je .end_line + mov [.delta_x],ebx + + call .calc_delta + + mov eax,SIZE_X + movsx ebx,word[.y1] + mul ebx + add esi,eax + lea eax,[eax*3] + add esi,eax + add edi,eax + movsx eax,word[.x1] + add esi,eax + lea eax,[eax*3] + add edi,eax + add esi,eax + + mov ecx,[.delta_x] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx +.hdraw: + mov ebx,[.cz] + cmp [esi],ebx + jle .skip + + .draw_pixel + +.skip: + add edi,3 + add esi,4 + + .update_cur_var + + loop .hdraw + jmp .end_line + +.vertical_l: + mov ax,[.y1] + cmp [.y2],ax + jge @f + + .sort +@@: + mov bx,[.y2] + sub bx,[.y1] + movsx ebx,bx + cmp ebx,0 + je .end_line + mov [.delta_y],ebx + + call .calc_delta + + mov eax,SIZE_X + movsx ebx,word[.y1] + mul ebx + add esi,eax + lea eax,[eax*3] + add edi,eax + add esi,eax + movsx eax,word[.x1] + add esi,eax + lea eax,[eax*3] + add esi,eax + add edi,eax + + mov ecx,[.delta_y] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx +.v_draw: + mov ebx,[.cz] + cmp [esi],ebx + jle @f + + .draw_pixel + +@@: + add edi,SIZE_X*3 + add esi,SIZE_X*4 + + .update_cur_var + + loop .v_draw + jmp .end_line +.deg45_l: + mov word[.line_lenght],ax + mov ax,[.x1] + cmp [.x2],ax + jge @f + + .sort +@@: + mov bx,[.y2] + sub bx,[.y1] + movsx ebx,bx + cmp ebx,0 + je .end_line + mov [.delta_y],ebx + mov bx,[.x2] + sub bx,[.x1] + movsx ebx,bx + mov [.delta_x],ebx + + call .calc_delta + + mov eax,SIZE_X + movsx ebx,word[.y1] ;calc begin values in screen and Z buffers + mul ebx + lea ebx,[3*eax] + add edi,ebx + shl eax,2 + add esi,eax + movsx eax,word[.x1] + lea ebx,[eax*3] + add edi,ebx + shl eax,2 + add esi,eax + + movzx ecx,word[.line_lenght] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx +.d45_draw: + mov ebx,[.cz] + cmp [esi],ebx + jle @f + + .draw_pixel + +@@: + cmp dword[.delta_y],0 + jl @f + add edi,SIZE_X*3+3 + add esi,SIZE_X*4+4 + jmp .d45_1 +@@: + sub edi,(SIZE_X*3)-3 + sub esi,(SIZE_X*4)-4 +.d45_1: + .update_cur_var + + loop .d45_draw + jmp .end_line + +.more_vertical_l: + mov word[.line_lenght],bx + mov ax,[.y1] + cmp [.y2],ax + jge @f + .sort +@@: + mov bx,[.y2] + sub bx,[.y1] + movsx ebx,bx + cmp ebx,0 + je .end_line ;======================= + mov [.delta_y],ebx + + mov ax,[.x2] + sub ax,[.x1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.delta],eax + + call .calc_delta + + mov eax,SIZE_X + movsx ebx,word[.y1] ;calc begin values in screen and Z buffers + mul ebx + lea ebx,[3*eax] + add esi,ebx + add esi,eax + add edi,ebx + mov [.cscr],edi + mov [.czbuf],esi + + movzx ecx,word[.line_lenght] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx + movsx ebx,word[.x1] + shl ebx,ROUND + mov [.ccoord],ebx ; .ccoord -> x coordinate +.draw_m_v: + mov edi,[.cscr] + mov esi,[.czbuf] + mov eax,[.ccoord] + sar eax,ROUND + lea ebx,[eax*3] + add edi,ebx + add esi,ebx + add esi,eax + mov ebx,[.cz] + cmp [esi],ebx + jle @f + + .draw_pixel + +@@: + mov eax,[.delta] + add [.ccoord],eax + add dword[.cscr],SIZE_X*3 ; + add dword[.czbuf],SIZE_X*4 +.d_m_v1: + + .update_cur_var + + dec ecx + jnz .draw_m_v + jmp .end_line + +.more_horizon_l: + mov word[.line_lenght],ax + mov ax,[.x1] + cmp [.x2],ax + jge @f + + .sort +@@: + mov bx,[.x2] + sub bx,[.x1] + movsx ebx,bx + cmp ebx,0;======================= + je .end_line + mov [.delta_x],ebx + + mov ax,[.y2] + sub ax,[.y1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.delta],eax + + call .calc_delta + + ;calc begin values in screen and Z buffers + movsx ebx,word[.x1] + mov eax,ebx + add esi,ebx + lea ebx,[3*ebx] + add esi,ebx + add edi,ebx + mov [.cscr],edi + mov [.czbuf],esi + + movzx ecx,word[.line_lenght] + + movsx ebx,word[.r1] + shl ebx,ROUND + mov [.cr],ebx + movsx ebx,word[.g1] + shl ebx,ROUND + mov [.cg],ebx + movsx ebx,word[.b1] + shl ebx,ROUND + mov [.cb],ebx + movsx ebx,word[.z1] + shl ebx,ROUND + mov [.cz],ebx + movsx ebx,word[.y1] + shl ebx,ROUND + mov [.ccoord],ebx ; .ccoord -> y coordinate + +.draw_m_h: + mov edi,[.cscr] + mov esi,[.czbuf] + mov eax,[.ccoord] ; ccoord - cur y coordinate + sar eax,ROUND + mov ebx,SIZE_X + mul ebx + add esi,eax + lea eax,[eax*3] + add esi,eax + add edi,eax + mov ebx,[.cz] + cmp [esi],ebx + jle @f + + .draw_pixel + +@@: + mov eax,[.delta] + add [.ccoord],eax + add dword[.cscr],3 ; + add dword[.czbuf],4 + + .update_cur_var + + dec ecx + jnz .draw_m_h + +.end_line: + mov esp,ebp + ret 24 + +.calc_delta: + mov ax,[.z2] + sub ax,[.z1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.dz],eax + + mov ax,[.r2] + sub ax,[.r1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.dr],eax + + mov ax,[.g2] + sub ax,[.g1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.dg],eax + + mov ax,[.b2] + sub ax,[.b1] + cwde + shl eax,ROUND + cdq + idiv ebx + mov [.db],eax +if Ext=MMX | Ext = SSE + movq mm2,[.dz] + movq mm3,[.dg] +else if Ext >= SSE2 + movups xmm0,[.dz] +end if +ret +;align 16 +;.mask: +; dq 0xffffffffffffffff +; dq 0xffffffff00000000 + + + + + + + + + + + + + diff --git a/programs/demos/3DS/VIEW3DS.ASM b/programs/demos/3DS/VIEW3DS.ASM index 86cf2acef6..a53ea832a2 100644 --- a/programs/demos/3DS/VIEW3DS.ASM +++ b/programs/demos/3DS/VIEW3DS.ASM @@ -1,5 +1,5 @@ -; application : View3ds ver. 0.061 - tiny .3ds files viewer. +; application : View3ds ver. 0.062 - tiny .3ds files viewer. ; compiler : FASM ; system : KolibriOS ; author : Macgub aka Maciej Guba @@ -35,7 +35,7 @@ NON = 0 ; -/ \- MMX = 1 SSE = 2 SSE2 = 3 -Ext = SSE2 ;Ext={ NON | MMX | SSE | SSE2 } +Ext = SSE2 ;Ext={ NON | MMX | SSE | SSE2 } ; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) USE_LFN = 1 @@ -665,6 +665,8 @@ end if int 40h + ; addsubps xmm0,xmm0 + jmp still @@ -676,6 +678,7 @@ include "FLAT_CAT.INC" include "TEX_CAT.INC" include "BUMP_CAT.INC" include "3DMATH.INC" +include "GRD_LINE.INC" include "GRD3.INC" include "FLAT3.INC" include "BUMP3.INC" @@ -687,6 +690,7 @@ include "GRD_TEX.INC" include "TWO_TEX.INC" + alloc_buffer_mem: movzx ecx,[size_x] movzx eax,[size_y] @@ -1585,6 +1589,7 @@ ret draw_triangles: mov esi,[triangles_ptr] + mov [edges_counter],0 .again_dts: mov ebp,[points_translated_ptr] if Ext=NON @@ -1690,6 +1695,8 @@ draw_triangles: je .bump_tex cmp [dr_flag],10 je .cubic_env_mapping + cmp [dr_flag],11 + je .draw_smooth_line ; **************** mov esi,point_index3 ; do Gouraud shading mov ecx,3 @@ -2379,9 +2386,151 @@ draw_triangles: call bump_tex_triangle_z + jmp .end_draw + + .draw_smooth_line: + mov esi,point_index3 + mov ecx,3 + .again_line_param: + movzx eax,word[esi] + shl eax,2 + lea eax,[eax*3] + add eax,[points_normals_rot_ptr] + ; texture ;x=(rotated point normal -> x * 255)+255 + fld dword[eax] ; x cooficient of normal vector + fimul [correct_tex] + fiadd [correct_tex] + fistp word[esp-2] + ; texture y=(rotated point normal -> y * 255)+255 + fld dword[eax+4] ; y cooficient + fimul [correct_tex] + fiadd [correct_tex] + fistp word[esp-4] + + movzx eax,word[esp-4] + movzx ebx,word[esp-2] + shl eax,TEX_SHIFT + add eax,ebx + lea eax,[eax*3+color_map] + mov eax,dword[eax] + lea ebx,[ecx-1] + shl ebx,2 + mov [ebx+col1],eax + + sub esi,2 + dec ecx + jnz .again_line_param + + mov eax,[edges_ptr] + add eax,[edges_counter] + mov bl,[eax] + test bl,00000001b + jz @f + mov edi,screen + mov esi,[Zbuffer_ptr] + + mov eax,[col1] + movzx bx,al + push bx ; b + movzx bx,ah + push bx + rol eax,16 + xor ah,ah + push ax + push [zz1] + push [yy1] + push [xx1] + + mov eax,[col2] + movzx bx,al + push bx ; b + movzx bx,ah + push bx + rol eax,16 + xor ah,ah + push ax + push [zz2] + push [yy2] + push [xx2] + + call smooth_line + @@: + mov eax,[edges_ptr] + add eax,[edges_counter] + mov bl,[eax] + test bl,00000010b + jz @f + + mov edi,screen + mov esi,[Zbuffer_ptr] + + mov eax,[col1] + movzx bx,al + push bx ; b + movzx bx,ah + push bx + rol eax,16 + xor ah,ah + push ax + push [zz1] + push [yy1] + push [xx1] + + mov eax,[col3] + movzx bx,al + push bx ; b + movzx bx,ah + push bx + rol eax,16 + xor ah,ah + push ax + push [zz3] + push [yy3] + push [xx3] + + call smooth_line + @@: + + mov eax,[edges_ptr] + add eax,[edges_counter] + mov bl,[eax] + test bl,00000100b + jz @f + + mov edi,screen + mov esi,[Zbuffer_ptr] + + mov eax,[col3] + movzx bx,al + push bx ; b + movzx bx,ah + push bx + rol eax,16 + xor ah,ah + push ax + push [zz3] + push [yy3] + push [xx3] + + mov eax,[col2] + movzx bx,al + push bx ; b + movzx bx,ah + push bx + rol eax,16 + xor ah,ah + push ax + push [zz2] + push [yy2] + push [xx2] + + call smooth_line + @@: + .end_draw: pop esi add esi,6 + inc [edges_counter] cmp dword[esi],-1 jne .again_dts ret @@ -2419,6 +2568,7 @@ read_from_file: mov [EndFile],eax ; add esi,6 + mov eax,[edges_ptr] @@: cmp [esi],word 3D3Dh je @f @@ -2503,6 +2653,9 @@ read_from_file: add word[edi-6],bp add word[edi-4],bp add word[edi-2],bp + mov dl,byte[esi] + mov [eax],dl + inc eax add esi,2 dec ecx jnz @b @@ -2620,6 +2773,7 @@ alloc_mem_for_tp: @@: mov ebx,20 .alloc: + movzx ecx, [triangles_count_var] inc ecx lea ecx, [ecx*3] @@ -2628,6 +2782,14 @@ alloc_mem_for_tp: int 0x40 ; -> allocate memory to triangles mov [triangles_ptr], eax ; -> eax = pointer to allocated mem + mov eax, 68 + movzx ecx, [triangles_count_var] + inc ecx + mov edx,[edges_ptr] + int 0x40 ; -> allocate memory to edges + mov [edges_ptr], eax ; -> eax = pointer to allocated mem + + ; mov eax, 68 ; mov ebx, 12 ; movzx ecx, [triangles_count_var]