diff --git a/programs/demos/3DS/BUMP_CAT.INC b/programs/demos/3DS/BUMP_CAT.INC index bae911c2b4..1e004b67a4 100644 --- a/programs/demos/3DS/BUMP_CAT.INC +++ b/programs/demos/3DS/BUMP_CAT.INC @@ -805,7 +805,7 @@ end if mov ebx,.x2 sub ebx,.x1 -if Ext >= SSE +if 0 ;Ext >= SSE sub esp,16 cvtsi2ss xmm3,ebx ;rcps @@ -913,29 +913,15 @@ end if push .z1 ; current z shl CATMULL_SHIFT push esi -; It's my first attempt at MMX :), have mercy - Macgub - -;; if Ext = MMX -; mov dword[.temp1],esi -; mov dword[.temp1+4],esi -;; movq mm0,.cbyq ; mm0 - current bump coords -;; movq mm1,.ceyq ; mm1 - current env coords -;; movq mm2,.dbyq ; mm2 - delta bump -;; movq mm3,.deyq ; mm3 - delta env -; movd mm6,.z1 ; mm6 - cur z -; movq mm7,qword.[temp1] ; mm7 = lo = hi dword = current z buff -;; mov dword [.temp2],1 -;; mov dword [.temp2+4],-1 -;; mov dword [.temp3],TEXTURE_SIZE -;; mov dword [.temp3+4],TEXTURE_SIZE -;; mov esi,.bmap -;; mov dword [.temp4],esi -;; mov dword [.temp4+4],esi -;; mov dword [.temp5],TEX_X -;; mov dword [.temp5+4],- TEX_X -; mov dword [.temp1],TEX_SHIFT -; mov dword [.temp1+4],0 -;; end if +;if Ext = SSE2 +; movups xmm1,.dey +;end if +if Ext>=MMX + movq mm0,.cby + movq mm1,.cey + movq mm2,.dby + movq mm3,.dey +end if .draw: ; if TEX = SHIFTING ;bump drawing only in shifting mode @@ -943,29 +929,22 @@ end if mov ebx,.cz ; .cz - cur z position cmp ebx,dword[esi] jge .skip -;; if Ext=NON + +if Ext>=MMX + movq mm6,mm0 + psrld mm6,ROUND + movd eax,mm6 + psrlq mm6,32 + movd esi,mm6 +else mov eax,.cby sar eax,ROUND mov esi,.cbx sar esi,ROUND -;; else -;; movq mm4,mm0 ; mm4 - copies of cur bump coords -;; psrad mm4,ROUND ; mm4 = lo dword = y b coord, hi dword = x b coord -;; movd eax,mm4 ; - -;; psrlq mm4,32 ; - -;; movd esi,mm4 ; - -;; -;;; punpckldq mm5,mm4 ; -;;; psllq mm5,TEX_SHIFT -;;; paddq mm4,mm5 ; mm4 - lo dword index to b. map -;; -;; ; packqd mm4,mm5 - ; movq mm5,mm4 ; mm5 ~~ current bump map index? - -;; end if +end if shl eax,TEX_SHIFT ;- add esi,eax ;- ; esi - current bump map index -;; if Ext = NON + mov ebx,esi dec ebx and ebx,TEXTURE_SIZE @@ -977,21 +956,9 @@ end if and ebx,TEXTURE_SIZE add ebx,.bmap movzx ebx,byte [ebx] -;; else ;------------------------------------------- -;; mov dword [.temp1],esi ;- -;; mov dword [.temp1+4],esi ;- -;; movq mm5, qword[.temp1] ;- -;; paddd mm5, qword[.temp2] ; .temp2 == low dword = 1, high dword = -1 -;; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE -;; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = .bmap -;; movd ebx,mm5 -;; psrlq mm5,32 -;; movd eax,mm5 -;; movzx ebx,byte[ebx] -;; movzx eax,byte[eax] -;; end if + sub eax,ebx -;; if Ext=NON + mov ebx,esi sub ebx,TEX_X and ebx,TEXTURE_SIZE @@ -1003,21 +970,11 @@ end if and ebx,TEXTURE_SIZE add ebx,.bmap movzx ebx,byte [ebx] -;; else -;; movq mm5, qword[.temp1] ;- -;; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X -;; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE -;; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap -;; movd ebx,mm5 -;; psrlq mm5,32 -;; movd edx,mm5 -;; movzx ebx,byte[ebx] -;; movzx edx,byte[edx] -;; end if + sub edx,ebx ; eax - horizontal sub ; edx - vertical sub -;; if Ext=NON +if Ext = NON mov ebx,.cex ;.cex - current env map X sar ebx,ROUND add eax,ebx ; eax - modified x coord @@ -1025,19 +982,15 @@ end if mov ebx,.cey ;.cey - current env map y sar ebx,ROUND add edx,ebx ; edx - modified y coord -;; else -;; movq mm5,mm1 ; mm5 - copy of cur env coords -;; psrad mm5,ROUND -;; movq qword[.temp1],mm5 -;; add eax,dword [.temp1] -;; add edx,dword [.temp1+4] -;; ; movd ebx,mm5 -;; ; add eax,ebx -;; ; psrlq mm5,32 -;; ; movd ebx,mm5 - ; add edx,ebx -;; end if - +else + movq mm6,mm1 ; mm5 - copy of cur env coords + psrld mm6,ROUND + movd ebx,mm6 + psrlq mm6,32 + add eax,ebx + movd ebx,mm6 + add edx,ebx +end if or eax,eax jl .black cmp eax,TEX_X @@ -1049,9 +1002,9 @@ end if shl edx,TEX_SHIFT add edx,eax - lea edx,[edx*3] - add edx,.emap - mov eax,dword[edx] + lea esi,[edx*3] + add esi,.emap + lodsd jmp .put_pixel .black: xor eax,eax @@ -1066,15 +1019,19 @@ end if add edi,3 .no_skip: add .czbuff,4 -;; if Ext = NON + +;if Ext = SSE2 +; movups xmm0,.cey +; paddd xmm0,xmm1 +; movups .cey,xmm0 +; +;end if if Ext >= MMX - movq mm0,.cby - movq mm1,.cey - paddd mm0,.dby - paddd mm1,.dey - movq .cby,mm0 - movq .cey,mm1 -else + paddd mm0,mm2 + paddd mm1,mm3 +end if + +if Ext=NON mov eax,.dbx add .cbx,eax mov eax,.dby @@ -1084,10 +1041,6 @@ else mov eax,.dey add .cey,eax end if -;; else -;; paddd mm0,mm2 -;; paddd mm1,mm3 -;; end if mov eax,.dz add .cz,eax diff --git a/programs/demos/3DS/BUMP_TEX.INC b/programs/demos/3DS/BUMP_TEX.INC index c09972e76a..43b5ad152c 100644 --- a/programs/demos/3DS/BUMP_TEX.INC +++ b/programs/demos/3DS/BUMP_TEX.INC @@ -1593,6 +1593,19 @@ end if push dword .tx1 ; .ctx push dword .ty1 ; .cty push edi ; .c_scr +;if Ext = SSE2 +; mov eax,TEXTURE_SIZE +; movd xmm1,eax +; shufps xmm1,xmm1,0 +; push dword TEX_X +; push dword -TEX_X +; push dword 1 +; push dword -1 +; movups xmm2,[esp] +; movd xmm3,.bmap +; shufps xmm3,xmm3,0 +;end if + if Ext>=MMX movq mm7,.cty movq mm6,.cby @@ -1620,10 +1633,35 @@ else movd esi,mm1 end if - shl eax,TEX_SHIFT add esi,eax ;- ; esi - current bump map index +;if Ext = SSE2 +; +; movd xmm0,esi +; shufps xmm0,xmm0,0 +; paddd xmm0,xmm2 +; pand xmm0,xmm1 +; paddd xmm0,xmm3 +; +; movd ebx,xmm0 +; movzx eax,byte[ebx] +; +; shufps xmm0,xmm0,11100001b +; movd ebx,xmm0 +; movzx ebx,byte[ebx] +; sub eax,ebx +; +; shufps xmm0,xmm0,11111110b +; movd ebx,xmm0 +; movzx edx, byte [ebx] +; +; shufps xmm0,xmm0,11111111b +; movd ebx,xmm0 +; movzx ebx, byte [ebx] +; sub edx,ebx +; +;else mov ebx,esi dec ebx and ebx,TEXTURE_SIZE @@ -1649,7 +1687,7 @@ end if add ebx,.bmap movzx ebx,byte [ebx] sub edx,ebx - +;end if ; eax - horizontal sub modificated x coord ; edx - vertical sub modificated y coord diff --git a/programs/demos/3DS/B_PROCS.INC b/programs/demos/3DS/B_PROCS.INC index ed265575e5..342a779cd3 100644 --- a/programs/demos/3DS/B_PROCS.INC +++ b/programs/demos/3DS/B_PROCS.INC @@ -739,7 +739,93 @@ blur_screen: ;blur n times ; blur or fire ;in - ecx times count ;.counter equ dword[esp-4] .counter1 equ dword[esp-8] -if Ext>=MMX +if Ext>=SSE2 + push ebp + mov ebp,esp + push dword 0x01010101 + movss xmm5,[esp] + shufps xmm5,xmm5,0 + .again_blur: + push ecx + mov edi,screen + mov ecx,SIZE_X*3/4 + xor eax,eax + rep stosd + + mov ecx,(SIZE_X*(SIZE_Y-3))*3/16 + .blr: + @@: + movups xmm0,[edi+SIZE_X*3] + movups xmm1,[edi-SIZE_X*3] + movups xmm2,[edi-3] + movups xmm3,[edi+3] + + pavgb xmm0,xmm1 + pavgb xmm2,xmm3 + pavgb xmm0,xmm2 + + psubusb xmm0,xmm5 ; importand if fire + + movups [edi],xmm0 + add edi,16 + add esi,16 + + loop .blr + + xor eax,eax + mov ecx,SIZE_X*3/4 + rep stosd + pop ecx + loop .again_blur + mov esp,ebp + pop ebp +end if + +if Ext=SSE + emms + push ebp + mov ebp,esp + push dword 0x01010101 + push dword 0x01010101 + movq mm4,[esp] + .again_blur: + push ecx + mov edi,screen + mov ecx,SIZE_X*3/4 + ; pxor mm5,mm5 + xor eax,eax + rep stosd + + mov ecx,(SIZE_X*(SIZE_Y-3))*3/8 + .blr: + @@: + movq mm0,[edi+SIZE_X*3] + movq mm1,[edi-SIZE_X*3] + movq mm2,[edi-3] + movq mm3,[edi+3] + + pavgb mm0,mm1 + pavgb mm2,mm3 + pavgb mm0,mm2 + + psubusb mm0,mm4 ; importand if fire + + movq [edi],mm0 + add edi,8 + add esi,8 + + loop .blr + + xor eax,eax + mov ecx,SIZE_X*3/4 + rep stosd + pop ecx + loop .again_blur + mov esp,ebp + pop ebp +end if + +if Ext=MMX emms push ebp mov ebp,esp diff --git a/programs/demos/3DS/data.inc b/programs/demos/3DS/DATA.INC similarity index 93% rename from programs/demos/3DS/data.inc rename to programs/demos/3DS/DATA.INC index f1cedd29dc..d5967e30d2 100644 --- a/programs/demos/3DS/data.inc +++ b/programs/demos/3DS/DATA.INC @@ -268,7 +268,10 @@ base_vector: if Ext=SSE db ' (SSE)' end if - db ' 0.057' + if Ext=SSE2 + db ' (SSE2)' + end if + db ' 0.059' labellen: STRdata db '-1 ' @@ -294,7 +297,7 @@ else dd workarea dd hash_table file_name: - db '/rd/1/house.3ds',0 + db '/rd/1/teapot.3ds',0 end if I_END: diff --git a/programs/demos/3DS/History.txt b/programs/demos/3DS/History.txt index 44a10ac418..87ed414698 100644 --- a/programs/demos/3DS/History.txt +++ b/programs/demos/3DS/History.txt @@ -1,3 +1,14 @@ +View3ds 0.059 - June 2011. +1. Bump and pararell two texture mapping functions optimizations. + (files bump_cat.inc & two_tex.inc) + On my P4 changes are rather non visable, but on dual core in KlbrInWin + optimizations runs preety nice. +----------------------------------------------------------------------------------- + +View3ds 0.058 - June 2011. +1. Blur function optimization on SSE and SSE2 (buttons 'blur' and 'fire'). +----------------------------------------------------------------------------------- + View3ds 0.057 - April 2011. 1. By opening file bigger then ~18 KB, and choosing env mode program terminate. I remove this bug diff --git a/programs/demos/3DS/TWO_TEX.INC b/programs/demos/3DS/TWO_TEX.INC index c2f38fa130..1654d6975e 100644 --- a/programs/demos/3DS/TWO_TEX.INC +++ b/programs/demos/3DS/TWO_TEX.INC @@ -990,10 +990,10 @@ if Ext=NON else - movq mm5,.cey + movq mm5,mm4 ;.cey psrad mm5,ROUND pslld mm5,TEX_SHIFT - movq mm6,.cex + movq mm6,mm3 ;.cex psrad mm6,ROUND paddd mm5,mm6 movq mm6,mm5 @@ -1061,8 +1061,8 @@ end if ; movq mm6,mm4 ; psrad mm5,ROUND ; psrad mm6,ROUND - movq .cex,mm3 - movq .cey,mm4 + ; movq .cex,mm3 + ; movq .cey,mm4 end if mov eax,.dz add .cz,eax diff --git a/programs/demos/3DS/VIEW3DS.ASM b/programs/demos/3DS/VIEW3DS.ASM index ea0d1ffb7d..1633c0bcee 100644 --- a/programs/demos/3DS/VIEW3DS.ASM +++ b/programs/demos/3DS/VIEW3DS.ASM @@ -1,5 +1,5 @@ -; application : View3ds ver. 0.057 - tiny .3ds files viewer. +; application : View3ds ver. 0.059 - tiny .3ds files viewer. ; compiler : FASM ; system : KolibriOS ; author : Macgub aka Maciej Guba @@ -34,7 +34,8 @@ LIGHT_SIZE equ 22 ; | | NON = 0 ; -/ \- MMX = 1 SSE = 2 -Ext = MMX ;Ext={ NON | MMX | SSE} +SSE2 = 3 +Ext = MMX ;Ext={ NON | MMX | SSE | SSE2 } ; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) USE_LFN = 1 @@ -2796,6 +2797,6 @@ ret ; DATA AREA ************************************ - include 'data.inc' + include 'DATA.INC' MEM_END: diff --git a/programs/demos/3DS/readme.txt b/programs/demos/3DS/readme.txt index 7f9305a1c5..e701010755 100644 --- a/programs/demos/3DS/readme.txt +++ b/programs/demos/3DS/readme.txt @@ -1,8 +1,11 @@ -View3ds 0.057 - tiny viewer to .3ds files. +View3ds 0.059 - tiny viewer to .3ds files. What's new? -1. By opening file bigger then ~18 KB, and choosing env mode program terminate. - I remove this bug +1. Bump and pararell two texture mapping functions optimizations. + (files bump_cat.inc & two_tex.inc) + On my P4 changes are rather non visable, but on dual core in KlbrInWin + optimizations runs preety nice. + Buttons description: 1. rotary: choosing rotary axle: x, y, x+y. @@ -30,4 +33,4 @@ Buttons description: 18. re-map tex -> re-map texture and bump map coordinates, to change spherical mapping around axle use 'xchg' and 'mirror' buttons, then press 're-map tex' button. - Macgub April 2011 \ No newline at end of file + Macgub Jun 2011