code optimize, fix 's' coordinate in 'ZB_fillTriangleMapping'

git-svn-id: svn://kolibrios.org@6141 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
IgorA 2016-02-05 11:57:16 +00:00
parent b6896eb0f1
commit 48b0ebd898
2 changed files with 82 additions and 171 deletions

View File

@ -1,3 +1,36 @@
;
; Опции для функций:
;
;INTERP_Z - использование Z буфера (всегда включено)
;INTERP_RGB - сглажевание цветов
;INTERP_ST - наложение текстуры в ортогональной проэкции
;INTERP_STZ - наложение текстуры в перспективной проэкции
;
; Функции рисования треугольников:
;
;ZB_fillTriangleFlat - треугольник одного цвета
;ZB_fillTriangleSmooth - треугольник с разными цветами вершин
;ZB_fillTriangleMapping - треугольник с текстурой в ортогональной проэкции
;ZB_fillTriangleMappingPerspective - треугольник с текстурой в перспективной проэкции
;
macro calc_d1d2 f, r1, r2
{
fld dword[fdy2]
fmul st0,st2
fld dword[fdy1]
fmul st0,st2
fsubp
f#stp dword[r1] ;r1 = (fdy2*d1 - fdy1*d2)
fld dword[fdx1]
fmulp
fld dword[fdx2]
fmul st0,st2
fsubp
f#stp dword[r2] ;r2 = (fdx1*d2 - fdx2*d1)
ffree st0 ;d1
fincstp
}
INTERP_Z equ 1 INTERP_Z equ 1
@ -120,7 +153,6 @@ macro DRAW_INIT
macro PUT_PIXEL _a macro PUT_PIXEL _a
{ {
local .end_0 local .end_0
local .in_mem
mov eax,[z] mov eax,[z]
shr eax,ZB_POINT_Z_FRAC_BITS shr eax,ZB_POINT_Z_FRAC_BITS
cmp ax,word[esi+2*_a] ;if (zz >= pz[_a]) cmp ax,word[esi+2*_a] ;if (zz >= pz[_a])
@ -130,12 +162,10 @@ local .in_mem
if TGL_FEATURE_RENDER_BITS eq 24 if TGL_FEATURE_RENDER_BITS eq 24
mov ebx,[t] mov ebx,[t]
and ebx,0x3fc00000 and ebx,0x3fc00000
or ebx,[s] mov eax,[s]
and eax,0x003fc000
or ebx,eax
shr ebx,14 shr ebx,14
cmp ebx,256*256-1 ;проверка на выход за пределы текстуры
jl .in_mem ;координата 1.0,1.0 может выползать
mov ebx,256*256-1 ;переход на последний пиксель текстуры
.in_mem:
imul ebx,3 imul ebx,3
add ebx,[texture] ;ptr = texture + (((t & 0x3fc00000) | s) >> 14) * 3 add ebx,[texture] ;ptr = texture + (((t & 0x3fc00000) | s) >> 14) * 3
mov ax,word[ebx] mov ax,word[ebx]
@ -193,7 +223,6 @@ macro DRAW_INIT
macro PUT_PIXEL _a macro PUT_PIXEL _a
{ {
local .end_0 local .end_0
local .in_mem
mov eax,[z] mov eax,[z]
shr eax,ZB_POINT_Z_FRAC_BITS shr eax,ZB_POINT_Z_FRAC_BITS
cmp ax,word[esi+2*_a] ;if (zz >= pz[_a]) cmp ax,word[esi+2*_a] ;if (zz >= pz[_a])
@ -207,15 +236,6 @@ if TGL_FEATURE_RENDER_BITS eq 24
and eax,0x003fc000 and eax,0x003fc000
or ebx,eax or ebx,eax
shr ebx,14 shr ebx,14
if 1
; не знаю нужна ли сдесь эта проверка
; функция ZB_fillTriangleMapping без нее не работает
; на всякий случай делаю и тут такое же
cmp ebx,256*256-1 ;проверка на выход за пределы текстуры
jl .in_mem ;координата 1.0,1.0 может выползать
mov ebx,256*256-1 ;переход на последний пиксель текстуры
.in_mem:
end if
imul ebx,3 imul ebx,3
add ebx,[texture] ;ptr = texture + (((t & 0x3fc00000) | (s & 0x003FC000)) >> 14) * 3 add ebx,[texture] ;ptr = texture + (((t & 0x3fc00000) | (s & 0x003FC000)) >> 14) * 3
mov ax,word[ebx] mov ax,word[ebx]

View File

@ -154,15 +154,9 @@ pushad
mov [fdy2],eax ;p2.y - p0.y mov [fdy2],eax ;p2.y - p0.y
fild dword[fdx1] fild dword[fdx1]
fst dword[fdx1] fimul dword[fdy2]
fild dword[fdy2]
fst dword[fdy2]
fmulp
fild dword[fdx2] fild dword[fdx2]
fst dword[fdx2] fimul dword[fdy1]
fild dword[fdy1]
fst dword[fdy1]
fmulp
fsubp ;st0 = st1-st0 fsubp ;st0 = st1-st0
fst dword[fz] ;fz = fdx1 * fdy2 - fdx2 * fdy1 fst dword[fz] ;fz = fdx1 * fdy2 - fdx2 * fdy1
fldz fldz
@ -175,20 +169,18 @@ pushad
fdiv dword[fz] ;fz = 1.0 / fz fdiv dword[fz] ;fz = 1.0 / fz
fst dword[fz] ;st0 = fz fst dword[fz] ;st0 = fz
fld dword[fdx1] fild dword[fdx1]
fmul st0,st1 fmul st0,st1
fstp dword[fdx1] ;fdx1 *= fz fstp dword[fdx1] ;fdx1 *= fz
fld dword[fdy1] fild dword[fdy1]
fmul st0,st1 fmul st0,st1
fstp dword[fdy1] ;fdy1 *= fz fstp dword[fdy1] ;fdy1 *= fz
fld dword[fdx2] fild dword[fdx2]
fmul st0,st1 fmul st0,st1
fstp dword[fdx2] ;fdx2 *= fz fstp dword[fdx2] ;fdx2 *= fz
fld dword[fdy2] fild dword[fdy2]
fmul st0,st1 fmulp
fstp dword[fdy2] ;fdy2 *= fz fstp dword[fdy2] ;fdy2 *= fz
ffree st0
fincstp
if INTERP_Z eq 1 if INTERP_Z eq 1
mov eax,[ecx+offs_zbup_z] mov eax,[ecx+offs_zbup_z]
@ -200,22 +192,9 @@ if INTERP_Z eq 1
fild dword[d1] ;d1 = p1.z - p0.z fild dword[d1] ;d1 = p1.z - p0.z
fild dword[d2] ;d2 = p2.z - p0.z fild dword[d2] ;d2 = p2.z - p0.z
fld dword[fdy2] ;dzdx = (int) (fdy2*d1 - fdy1*d2)
fmul st0,st2 ;dzdy = (int) (fdx1*d2 - fdx2*d1)
fld dword[fdy1] calc_d1d2 fi, dzdx, dzdy
fmul st0,st2
fsubp
fistp dword[dzdx] ;dzdx = (int) (fdy2*d1 - fdy1*d2)
fld dword[fdx1]
fmul st0,st1
fld dword[fdx2]
fmul st0,st3
fsubp
fistp dword[dzdy] ;dzdy = (int) (fdx1*d2 - fdx2*d1)
ffree st0 ;free d2
fincstp
ffree st0 ;free d1
fincstp
end if end if
if INTERP_RGB eq 1 if INTERP_RGB eq 1
@ -228,22 +207,9 @@ if INTERP_RGB eq 1
fild dword[d1] ;d1 = p1.r - p0.r fild dword[d1] ;d1 = p1.r - p0.r
fild dword[d2] ;d2 = p2.r - p0.r fild dword[d2] ;d2 = p2.r - p0.r
fld dword[fdy2] ;drdx = (int) (fdy2*d1 - fdy1*d2)
fmul st0,st2 ;drdy = (int) (fdx1*d2 - fdx2*d1)
fld dword[fdy1] calc_d1d2 fi, drdx, drdy
fmul st0,st2
fsubp
fistp dword[drdx] ;drdx = (int) (fdy2*d1 - fdy1*d2)
fld dword[fdx1]
fmul st0,st1
fld dword[fdx2]
fmul st0,st3
fsubp
fistp dword[drdy] ;drdy = (int) (fdx1*d2 - fdx2*d1)
ffree st0 ;free d2
fincstp
ffree st0 ;free d1
fincstp
mov eax,[ecx+offs_zbup_g] mov eax,[ecx+offs_zbup_g]
sub eax,[ebx+offs_zbup_g] sub eax,[ebx+offs_zbup_g]
@ -254,22 +220,9 @@ if INTERP_RGB eq 1
fild dword[d1] ;d1 = p1.g - p0.g fild dword[d1] ;d1 = p1.g - p0.g
fild dword[d2] ;d2 = p2.g - p0.g fild dword[d2] ;d2 = p2.g - p0.g
fld dword[fdy2] ;dgdx = (int) (fdy2*d1 - fdy1*d2)
fmul st0,st2 ;dgdy = (int) (fdx1*d2 - fdx2*d1)
fld dword[fdy1] calc_d1d2 fi, dgdx, dgdy
fmul st0,st2
fsubp
fistp dword[dgdx] ;dgdx = (int) (fdy2*d1 - fdy1*d2)
fld dword[fdx1]
fmul st0,st1
fld dword[fdx2]
fmul st0,st3
fsubp
fistp dword[dgdy] ;dgdy = (int) (fdx1*d2 - fdx2*d1)
ffree st0 ;free d2
fincstp
ffree st0 ;free d1
fincstp
mov eax,[ecx+offs_zbup_b] mov eax,[ecx+offs_zbup_b]
sub eax,[ebx+offs_zbup_b] sub eax,[ebx+offs_zbup_b]
@ -280,22 +233,9 @@ if INTERP_RGB eq 1
fild dword[d1] ;d1 = p1.b - p0.b fild dword[d1] ;d1 = p1.b - p0.b
fild dword[d2] ;d2 = p2.b - p0.b fild dword[d2] ;d2 = p2.b - p0.b
fld dword[fdy2] ;dbdx = (int) (fdy2*d1 - fdy1*d2)
fmul st0,st2 ;dbdy = (int) (fdx1*d2 - fdx2*d1)
fld dword[fdy1] calc_d1d2 fi, dbdx, dbdy
fmul st0,st2
fsubp
fistp dword[dbdx] ;dbdx = (int) (fdy2*d1 - fdy1*d2)
fld dword[fdx1]
fmul st0,st1
fld dword[fdx2]
fmul st0,st3
fsubp
fistp dword[dbdy] ;dbdy = (int) (fdx1*d2 - fdx2*d1)
ffree st0 ;free d2
fincstp
ffree st0 ;free d1
fincstp
end if end if
if INTERP_ST eq 1 if INTERP_ST eq 1
@ -308,24 +248,9 @@ if INTERP_ST eq 1
fild dword[d1] ;d1 = p1.s - p0.s fild dword[d1] ;d1 = p1.s - p0.s
fild dword[d2] ;d2 = p2.s - p0.s fild dword[d2] ;d2 = p2.s - p0.s
fld dword[fdy2] ;dsdx = (int) (fdy2*d1 - fdy1*d2)
fmul st0,st2 ;dsdy = (int) (fdx1*d2 - fdx2*d1)
fld dword[fdy1] calc_d1d2 fi, dsdx, dsdy
fmul st0,st2
fsubp
fistp dword[dsdx] ;dsdx = (int) (fdy2*d1 - fdy1*d2)
fld dword[fdx2]
fmul st0,st2
fld dword[fdx1]
fmul st0,st2
fsub st0,st1
fistp dword[dsdy] ;dsdy = (int) (fdx1*d2 - fdx2*d1)
ffree st0
fincstp
ffree st0 ;free d2
fincstp
ffree st0 ;free d1
fincstp
mov eax,[ecx+offs_zbup_t] mov eax,[ecx+offs_zbup_t]
sub eax,[ebx+offs_zbup_t] sub eax,[ebx+offs_zbup_t]
@ -336,26 +261,9 @@ if INTERP_ST eq 1
fild dword[d1] ;d1 = p1.t - p0.t fild dword[d1] ;d1 = p1.t - p0.t
fild dword[d2] ;d2 = p2.t - p0.t fild dword[d2] ;d2 = p2.t - p0.t
fld dword[fdy1] ;dtdx = (int) (fdy2*d1 - fdy1*d2)
fmul st0,st1 ;dtdy = (int) (fdx1*d2 - fdx2*d1)
fld dword[fdy2] calc_d1d2 fi, dtdx, dtdy
fmul st0,st3
fsub st0,st1
fistp dword[dtdx] ;dtdx = (int) (fdy2*d1 - fdy1*d2)
ffree st0
fincstp
fld dword[fdx2]
fmul st0,st2
fld dword[fdx1]
fmul st0,st2
fsub st0,st1
fistp dword[dtdy] ;dtdy = (int) (fdx1*d2 - fdx2*d1)
ffree st0
fincstp
ffree st0 ;free d2
fincstp
ffree st0 ;free d1
fincstp
end if end if
if INTERP_STZ eq 1 if INTERP_STZ eq 1
@ -388,50 +296,18 @@ if INTERP_STZ eq 1
fld dword[edx+offs_zbup_sz] fld dword[edx+offs_zbup_sz]
fsub dword[ebx+offs_zbup_sz] ;d2 = p2.sz - p0.sz fsub dword[ebx+offs_zbup_sz] ;d2 = p2.sz - p0.sz
fld dword[fdy2] ;dszdx = (fdy2*d1 - fdy1*d2)
fmul st0,st2 ;dszdy = (fdx1*d2 - fdx2*d1)
fld dword[fdy1] calc_d1d2 f, dszdx, dszdy
fmul st0,st2
fsubp
fstp dword[dszdx] ;dszdx = (fdy2*d1 - fdy1*d2)
fld dword[fdx2]
fmul st0,st2
fld dword[fdx1]
fmul st0,st2
fsub st0,st1
fstp dword[dszdy] ;dszdy = (fdx1*d2 - fdx2*d1)
ffree st0
fincstp
ffree st0 ;free d2
fincstp
ffree st0 ;free d1
fincstp
fld dword[ecx+offs_zbup_tz] fld dword[ecx+offs_zbup_tz]
fsub dword[ebx+offs_zbup_tz] ;d1 = p1.tz - p0.tz fsub dword[ebx+offs_zbup_tz] ;d1 = p1.tz - p0.tz
fld dword[edx+offs_zbup_tz] fld dword[edx+offs_zbup_tz]
fsub dword[ebx+offs_zbup_tz] ;d2 = p2.tz - p0.tz fsub dword[ebx+offs_zbup_tz] ;d2 = p2.tz - p0.tz
fld dword[fdy1] ;dtzdx = (fdy2*d1 - fdy1*d2)
fmul st0,st1 ;dtzdy = (fdx1*d2 - fdx2*d1)
fld dword[fdy2] calc_d1d2 f, dtzdx, dtzdy
fmul st0,st3
fsub st0,st1
fstp dword[dtzdx] ;dtzdx = (fdy2*d1 - fdy1*d2)
ffree st0
fincstp
fld dword[fdx2]
fmul st0,st2
fld dword[fdx1]
fmul st0,st2
fsub st0,st1
fstp dword[dtzdy] ;dtzdy = (fdx1*d2 - fdx2*d1)
ffree st0
fincstp
ffree st0 ;free d2
fincstp
ffree st0 ;free d1
fincstp
end if end if
; screen coordinates ; screen coordinates
@ -468,6 +344,7 @@ end if
mov [l2],edx mov [l2],edx
mov [pr2],ecx mov [pr2],ecx
jmp .end_1 jmp .end_1
align 4
.els_1: .els_1:
mov [l2],ecx mov [l2],ecx
mov [pr2],edx mov [pr2],edx
@ -476,6 +353,7 @@ end if
sub eax,[ebx+offs_zbup_y] sub eax,[ebx+offs_zbup_y]
mov [nb_lines],eax ;nb_lines = p1.y - p0.y mov [nb_lines],eax ;nb_lines = p1.y - p0.y
jmp .end_0 jmp .end_0
align 4
.els_0: .els_0:
; second part ; second part
fldz fldz
@ -489,6 +367,7 @@ end if
mov [pr1],ecx mov [pr1],ecx
mov [pr2],edx mov [pr2],edx
jmp .end_2 jmp .end_2
align 4
.els_2: .els_2:
mov dword[update_left],1 mov dword[update_left],1
mov dword[update_right],0 mov dword[update_right],0
@ -520,6 +399,7 @@ end if
shl eax,16 shl eax,16
div dword[dy1] ;eax = (dx1 << 16) / dy1 div dword[dy1] ;eax = (dx1 << 16) / dy1
jmp .end_3 jmp .end_3
align 4
.otr_dx1: .otr_dx1:
neg eax neg eax
inc eax inc eax
@ -528,6 +408,7 @@ end if
neg eax neg eax
inc eax inc eax
jmp .end_3 jmp .end_3
align 4
.els_3: .els_3:
xor eax,eax xor eax,eax
.end_3: .end_3:
@ -584,6 +465,7 @@ end if
if INTERP_ST eq 1 if INTERP_ST eq 1
mov ebx,[l1] mov ebx,[l1]
mov eax,[ebx+offs_zbup_s] mov eax,[ebx+offs_zbup_s]
add eax,0x00200000 ;прокручиваем горизонтальную координату на 0.5
mov [s1],eax ;s1 = l1.s mov [s1],eax ;s1 = l1.s
mov eax,[dsdx] mov eax,[dsdx]
imul eax,[dxdy_min] imul eax,[dxdy_min]
@ -646,6 +528,7 @@ end if
shl eax,16 shl eax,16
div dword[dy2] ;eax = (dx2 << 16) / dy2 div dword[dy2] ;eax = (dx2 << 16) / dy2
jmp .end_4 jmp .end_4
align 4
.otr_dx2: .otr_dx2:
neg eax neg eax
inc eax ;dx2 *= -1 inc eax ;dx2 *= -1
@ -654,6 +537,7 @@ end if
neg eax neg eax
inc eax inc eax
jmp .end_4 jmp .end_4
align 4
.els_4: .els_4:
xor eax,eax xor eax,eax
.end_4: .end_4:
@ -760,6 +644,7 @@ end if
add edi,4*PSZB add edi,4*PSZB
sub dword[n],4 sub dword[n],4
jmp .cycle_1 jmp .cycle_1
align 4
.cycle_2: ;while (n>=0) .cycle_2: ;while (n>=0)
cmp dword[n],0 cmp dword[n],0
jl .cycle_2_end jl .cycle_2_end
@ -769,6 +654,7 @@ if INTERP_RGB eq 1
jnc @f jnc @f
mov dword[or1],0 mov dword[or1],0
jmp .end_r jmp .end_r
align 4
@@: @@:
bt dword[or1],16 bt dword[or1],16
jnc .end_r jnc .end_r
@ -778,6 +664,7 @@ if INTERP_RGB eq 1
jnc @f jnc @f
mov dword[og1],0 mov dword[og1],0
jmp .end_g jmp .end_g
align 4
@@: @@:
bt dword[og1],16 bt dword[og1],16
jnc .end_g jnc .end_g
@ -787,6 +674,7 @@ if INTERP_RGB eq 1
jnc @f jnc @f
mov dword[ob1],0 mov dword[ob1],0
jmp .end_b jmp .end_b
align 4
@@: @@:
bt dword[ob1],16 bt dword[ob1],16
jnc .end_b jnc .end_b
@ -800,6 +688,7 @@ end if
add edi,PSZB add edi,PSZB
dec dword[n] dec dword[n]
jmp .cycle_2 jmp .cycle_2
align 4
.cycle_2_end: .cycle_2_end:
end if ;проверка от макроса DRAW_LINE end if ;проверка от макроса DRAW_LINE
@ -838,6 +727,7 @@ if INTERP_STZ eq 1
fstp dword[tz1] fstp dword[tz1]
end if end if
jmp .end_er jmp .end_er
align 4
.els_er: .els_er:
mov eax,[dxdy_min] mov eax,[dxdy_min]
add [x1],eax add [x1],eax
@ -880,6 +770,7 @@ end if
shl eax,1 shl eax,1
add [pz1],eax add [pz1],eax
jmp .beg_w_lin jmp .beg_w_lin
align 4
.end_w_lin: .end_w_lin:
inc dword[part] inc dword[part]
cmp dword[part],2 cmp dword[part],2