small speed optimize

git-svn-id: svn://kolibrios.org@6172 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
IgorA 2016-02-08 15:32:44 +00:00
parent ceac3b10b3
commit 68825a9e23
2 changed files with 94 additions and 125 deletions

View File

@ -131,58 +131,59 @@ endp
; line
align 16
proc interpolate uses eax ebx ecx, q:dword,p0:dword,p1:dword,t:dword
mov eax,[q]
mov ebx,[p0]
mov ecx,[p1]
;input:
;q - регистр с адресом вершины для интерполяции
;p0 - регистр с адресом 1-й вершины
;p1 - регистр с адресом 2-й вершины
;t - float
macro interpolate q, p0, p1, t
{
fld dword[t]
; интерполяция по координатам
fld dword[ecx+offs_vert_pc]
fsub dword[ebx+offs_vert_pc]
fld dword[p1+offs_vert_pc]
fsub dword[p0+offs_vert_pc]
fmul st0,st1
fadd dword[ebx+offs_vert_pc]
fstp dword[eax+offs_vert_pc] ;q.pc.X = p0.pc.X + (p1.pc.X - p0.pc.X) * t
fadd dword[p0+offs_vert_pc]
fstp dword[q+offs_vert_pc] ;q.pc.X = p0.pc.X + (p1.pc.X - p0.pc.X) * t
fld dword[ecx+offs_vert_pc+offs_Y]
fsub dword[ebx+offs_vert_pc+offs_Y]
fld dword[p1+offs_vert_pc+offs_Y]
fsub dword[p0+offs_vert_pc+offs_Y]
fmul st0,st1
fadd dword[ebx+offs_vert_pc+offs_Y]
fstp dword[eax+offs_vert_pc+offs_Y]
fadd dword[p0+offs_vert_pc+offs_Y]
fstp dword[q+offs_vert_pc+offs_Y]
fld dword[ecx+offs_vert_pc+offs_Z]
fsub dword[ebx+offs_vert_pc+offs_Z]
fld dword[p1+offs_vert_pc+offs_Z]
fsub dword[p0+offs_vert_pc+offs_Z]
fmul st0,st1
fadd dword[ebx+offs_vert_pc+offs_Z]
fstp dword[eax+offs_vert_pc+offs_Z]
fadd dword[p0+offs_vert_pc+offs_Z]
fstp dword[q+offs_vert_pc+offs_Z]
fld dword[ecx+offs_vert_pc+offs_W]
fsub dword[ebx+offs_vert_pc+offs_W]
fld dword[p1+offs_vert_pc+offs_W]
fsub dword[p0+offs_vert_pc+offs_W]
fmul st0,st1
fadd dword[ebx+offs_vert_pc+offs_W]
fstp dword[eax+offs_vert_pc+offs_W]
fadd dword[p0+offs_vert_pc+offs_W]
fstp dword[q+offs_vert_pc+offs_W]
; интерполяция по цвету
fld dword[ecx+offs_vert_color]
fsub dword[ebx+offs_vert_color]
fld dword[p1+offs_vert_color]
fsub dword[p0+offs_vert_color]
fmul st0,st1
fadd dword[ebx+offs_vert_color]
fstp dword[eax+offs_vert_color]
fadd dword[p0+offs_vert_color]
fstp dword[q+offs_vert_color]
fld dword[ecx+offs_vert_color+4]
fsub dword[ebx+offs_vert_color+4]
fld dword[p1+offs_vert_color+4]
fsub dword[p0+offs_vert_color+4]
fmul st0,st1
fadd dword[ebx+offs_vert_color+4]
fstp dword[eax+offs_vert_color+4]
fadd dword[p0+offs_vert_color+4]
fstp dword[q+offs_vert_color+4]
fld dword[ecx+offs_vert_color+8]
fsub dword[ebx+offs_vert_color+8]
fld dword[p1+offs_vert_color+8]
fsub dword[p0+offs_vert_color+8]
fmulp
fadd dword[ebx+offs_vert_color+8]
fstp dword[eax+offs_vert_color+8]
ret
endp
fadd dword[p0+offs_vert_color+8]
fstp dword[q+offs_vert_color+8]
}
;
; Line Clipping
@ -421,10 +422,10 @@ align 4
mov eax,ebp
sub eax,8+2*sizeof.GLVertex ;eax = &q1
stdcall interpolate, eax,edi,esi,[tmin]
interpolate eax,edi,esi,tmin
stdcall gl_transform_to_viewport, edx,eax
add eax,sizeof.GLVertex ;eax = &q2
stdcall interpolate, eax,edi,esi,[tmax]
interpolate eax,edi,esi,tmax
stdcall gl_transform_to_viewport, edx,eax
sub eax,sizeof.GLVertex ;eax = &q1
@ -586,9 +587,10 @@ endp
align 4
clip_proc dd clip_xmin,clip_xmax, clip_ymin,clip_ymax, clip_zmin,clip_zmax
;input:
;edi - q
align 16
proc updateTmp uses eax ebx ecx edx, context:dword, q:dword, p0:dword, p1:dword, t:dword
mov ebx,[q]
proc updateTmp uses eax ecx edx, context:dword, p0:dword, p1:dword, t:dword
mov edx,[context]
mov eax,[p0]
cmp dword[edx+offs_cont_current_shade_model],GL_SMOOTH ;if (context.current_shade_model == GL_SMOOTH)
@ -598,26 +600,26 @@ proc updateTmp uses eax ebx ecx edx, context:dword, q:dword, p0:dword, p1:dword,
fsub dword[eax+offs_vert_color]
fmul dword[t]
fadd dword[eax+offs_vert_color]
fstp dword[ebx+offs_vert_color] ;q.color.v[0]=p0.color.v[0] + (p1.color.v[0]-p0.color.v[0])*t
fstp dword[edi+offs_vert_color] ;q.color.v[0]=p0.color.v[0] + (p1.color.v[0]-p0.color.v[0])*t
fld dword[ecx+offs_vert_color+4]
fsub dword[eax+offs_vert_color+4]
fmul dword[t]
fadd dword[eax+offs_vert_color+4]
fstp dword[ebx+offs_vert_color+4] ;q.color.v[1]=p0.color.v[1] + (p1.color.v[1]-p0.color.v[1])*t
fstp dword[edi+offs_vert_color+4] ;q.color.v[1]=p0.color.v[1] + (p1.color.v[1]-p0.color.v[1])*t
fld dword[ecx+offs_vert_color+8]
fsub dword[eax+offs_vert_color+8]
fmul dword[t]
fadd dword[eax+offs_vert_color+8]
fstp dword[ebx+offs_vert_color+8] ;q.color.v[2]=p0.color.v[2] + (p1.color.v[2]-p0.color.v[2])*t
fstp dword[edi+offs_vert_color+8] ;q.color.v[2]=p0.color.v[2] + (p1.color.v[2]-p0.color.v[2])*t
jmp @f
align 4
.els_0:
mov ecx,[eax+offs_vert_color]
mov [ebx+offs_vert_color],ecx ;q.color.v[0]=p0.color.v[0]
mov [edi+offs_vert_color],ecx ;q.color.v[0]=p0.color.v[0]
mov ecx,[eax+offs_vert_color+4]
mov [ebx+offs_vert_color+4],ecx ;q.color.v[1]=p0.color.v[1]
mov [edi+offs_vert_color+4],ecx ;q.color.v[1]=p0.color.v[1]
mov ecx,[eax+offs_vert_color+8]
mov [ebx+offs_vert_color+8],ecx ;q.color.v[2]=p0.color.v[2]
mov [edi+offs_vert_color+8],ecx ;q.color.v[2]=p0.color.v[2]
@@:
cmp dword[edx+offs_cont_texture_2d_enabled],0 ;if (context.texture_2d_enabled)
@ -627,28 +629,28 @@ align 4
fsub dword[eax+offs_vert_tex_coord+offs_X]
fmul dword[t]
fadd dword[eax+offs_vert_tex_coord+offs_X]
fstp dword[ebx+offs_vert_tex_coord+offs_X] ;q.tex_coord.X=p0.tex_coord.X + (p1.tex_coord.X-p0.tex_coord.X)*t
fstp dword[edi+offs_vert_tex_coord+offs_X] ;q.tex_coord.X=p0.tex_coord.X + (p1.tex_coord.X-p0.tex_coord.X)*t
fld dword[ecx+offs_vert_tex_coord+offs_Y]
fsub dword[eax+offs_vert_tex_coord+offs_Y]
fmul dword[t]
fadd dword[eax+offs_vert_tex_coord+offs_Y]
fstp dword[ebx+offs_vert_tex_coord+offs_Y] ;q.tex_coord.Y=p0.tex_coord.Y + (p1.tex_coord.Y-p0.tex_coord.Y)*t
fstp dword[edi+offs_vert_tex_coord+offs_Y] ;q.tex_coord.Y=p0.tex_coord.Y + (p1.tex_coord.Y-p0.tex_coord.Y)*t
@@:
stdcall gl_clipcode, [ebx+offs_vert_pc+offs_X],[ebx+offs_vert_pc+offs_Y],\
[ebx+offs_vert_pc+offs_Z],[ebx+offs_vert_pc+offs_W]
mov dword[ebx+offs_vert_clip_code],eax
stdcall gl_clipcode, [edi+offs_vert_pc+offs_X],[edi+offs_vert_pc+offs_Y],\
[edi+offs_vert_pc+offs_Z],[edi+offs_vert_pc+offs_W]
mov dword[edi+offs_vert_clip_code],eax
or eax,eax ;if (q.clip_code==0)
jnz @f
stdcall gl_transform_to_viewport,[context],ebx
mov eax,ebx
stdcall gl_transform_to_viewport,[context],edi
mov eax,edi
add eax,offs_vert_zp+offs_zbup_b
push eax
add eax,offs_zbup_g-offs_zbup_b
push eax
add eax,offs_zbup_r-offs_zbup_g
push eax
stdcall RGBFtoRGBI, dword[ebx+offs_vert_color],dword[ebx+offs_vert_color+4],dword[ebx+offs_vert_color+8]
stdcall RGBFtoRGBI, dword[edi+offs_vert_color],dword[edi+offs_vert_color+4],dword[edi+offs_vert_color+8]
@@:
ret
endp
@ -885,7 +887,7 @@ align 4
sub ebx,offs_vert_pc
sub ecx,offs_vert_pc
stdcall updateTmp,[context],edi,ebx,ecx,eax ;updateTmp(c,&tmp1,q[0],q[1],tt)
stdcall updateTmp,[context],ebx,ecx,eax ;(c,&tmp1,q[0],q[1],tt)
add ebx,offs_vert_pc
lea eax,[clip_proc]
@ -898,7 +900,7 @@ align 4
sub edi,offs_vert_pc
sub ebx,offs_vert_pc
sub edx,offs_vert_pc
stdcall updateTmp,[context],edi,ebx,edx,eax ;updateTmp(c,&tmp2,q[0],q[2],tt)
stdcall updateTmp,[context],ebx,edx,eax ;(c,&tmp2,q[0],q[2],tt)
mov eax,[ebx+offs_vert_edge_flag]
mov [tmp1.edge_flag],eax ;q[0].edge_flag
@ -967,7 +969,7 @@ align 4
sub edi,(2*sizeof.GLVertex)-offs_vert_pc
stdcall dword[eax],edi,ebx,ecx ;clip_proc[clip_bit](&tmp1.pc,&q[0].pc,&q[1].pc)
sub edi,offs_vert_pc
stdcall updateTmp,[context],edi,[q],[q+4],eax
stdcall updateTmp,[context],[q],[q+4],eax
lea eax,[clip_proc]
mov edi,[clip_bit]
@ -977,7 +979,7 @@ align 4
sub edi,sizeof.GLVertex-offs_vert_pc
stdcall dword[eax],edi,ebx,edx ;clip_proc[clip_bit](&tmp2.pc,&q[0].pc,&q[2].pc)
sub edi,offs_vert_pc
stdcall updateTmp,[context],edi,[q],[q+8],eax
stdcall updateTmp,[context],[q],[q+8],eax
mov dword[tmp1.edge_flag],1
mov eax,[edx+offs_vert_edge_flag-offs_vert_pc]

View File

@ -7,7 +7,7 @@
;output:
; eax - указатель на ZBuffer (0 если не удача)
align 4
align 16
proc ZB_open uses ecx edi, xsize:dword, ysize:dword, mode:dword,\
nb_colors:dword, color_indexes:dword, color_table:dword, frame_buffer:dword
@ -30,13 +30,6 @@ proc ZB_open uses ecx edi, xsize:dword, ysize:dword, mode:dword,\
mov eax,[mode]
mov [edi+offs_zbuf_mode],eax
if TGL_FEATURE_8_BITS eq 1
cmp eax,ZB_MODE_INDEX
jne @f
;ZB_initDither(edi, nb_colors, color_indexes, color_table);
jmp .end_s
@@:
end if
if TGL_FEATURE_32_BITS eq 1
cmp eax,ZB_MODE_RGBA
je .correct
@ -83,19 +76,14 @@ endp
;void ZB_close(ZBuffer * zb)
;{
if TGL_FEATURE_8_BITS eq 1
; if (zb->mode == ZB_MODE_INDEX)
; ZB_closeDither(zb);
end if
;
; if (zb->frame_buffer_allocated)
; gl_free(zb->pbuf);
;
; gl_free(zb->zbuf);
; gl_free(zb);
;}
align 4
align 16
proc ZB_resize uses eax ebx ecx edi esi, zb:dword, frame_buffer:dword, xsize:dword, ysize:dword
mov ebx,[zb]
@ -149,7 +137,7 @@ endp
; unsigned char *p1;
; PIXEL *q;
; int y, n;
;
; q = zb->pbuf;
; p1 = buf;
; n = zb->xsize * PSZB;
@ -159,7 +147,7 @@ endp
; q = (PIXEL *) ((char *) q + zb->linesize);
; }
;}
;
;#if TGL_FEATURE_RENDER_BITS == 16
;/* 32 bpp copy */
@ -183,10 +171,10 @@ endp
; unsigned short *q;
; unsigned int *p, *p1, v, w0, w1;
; int y, n;
;
; q = zb->pbuf;
; p1 = (unsigned int *) buf;
;
; for (y = 0; y < zb->ysize; y++) {
; p = p1;
; n = zb->xsize >> 2;
@ -199,7 +187,7 @@ endp
;#endif
; p[0] = w0;
; p[1] = w1;
;
; v = *(unsigned int *) (q + 2);
;#if BYTE_ORDER == BIG_ENDIAN
; RGB16_TO_RGB32(w1, w0, v);
@ -208,11 +196,11 @@ endp
;#endif
; p[2] = w0;
; p[3] = w1;
;
; q += 4;
; p += 4;
; } while (--n > 0);
;
; p1 += linesize;
; }
;}
@ -272,11 +260,11 @@ endp
; unsigned short *q;
; unsigned int *p, *p1, w0, w1, w2, v0, v1;
; int y, n;
;
; q = zb->pbuf;
; p1 = (unsigned int *) buf;
; linesize = linesize * 3;
;
; for (y = 0; y < zb->ysize; y++) {
; p = p1;
; n = zb->xsize >> 2;
@ -302,16 +290,6 @@ endp
; int linesize)
;{
; switch (zb->mode) {
;#ifdef TGL_FEATURE_8_BITS
; case ZB_MODE_INDEX:
; ZB_ditherFrameBuffer(zb, buf, linesize >> 1);
; break;
;#endif
;#ifdef TGL_FEATURE_16_BITS
; case ZB_MODE_5R6G5B:
; ZB_copyBuffer(zb, buf, linesize);
; break;
;#endif
;#ifdef TGL_FEATURE_32_BITS
; case ZB_MODE_RGBA:
; ZB_copyFrameBufferRGB32(zb, buf, linesize >> 1);
@ -341,10 +319,10 @@ endp
; PIXEL *q;
; unsigned short *p, *p1;
; int y, n;
;
; q = zb->pbuf;
; p1 = (unsigned short *) buf;
;
; for (y = 0; y < zb->ysize; y++) {
; p = p1;
; n = zb->xsize >> 2;
@ -364,11 +342,6 @@ endp
; int linesize)
;{
; switch (zb->mode) {
;#ifdef TGL_FEATURE_16_BITS
; case ZB_MODE_5R6G5B:
; ZB_copyFrameBuffer5R6G5B(zb, buf, linesize);
; break;
;#endif
;#ifdef TGL_FEATURE_24_BITS
; case ZB_MODE_RGB24:
; ZB_copyBuffer(zb, buf, linesize);
@ -393,10 +366,10 @@ endp
; PIXEL *q;
; unsigned short *p, *p1;
; int y, n;
;
; q = zb->pbuf;
; p1 = (unsigned short *) buf;
;
; for (y = 0; y < zb->ysize; y++) {
; p = p1;
; n = zb->xsize >> 2;
@ -411,16 +384,11 @@ endp
; p1 = (unsigned short *)((char *)p1 + linesize);
; }
;}
;
;void ZB_copyFrameBuffer(ZBuffer * zb, void *buf,
; int linesize)
;{
; switch (zb->mode) {
;#ifdef TGL_FEATURE_16_BITS
; case ZB_MODE_5R6G5B:
; ZB_copyFrameBuffer5R6G5B(zb, buf, linesize);
; break;
;#endif
;#ifdef TGL_FEATURE_32_BITS
; case ZB_MODE_RGBA:
; ZB_copyBuffer(zb, buf, linesize);
@ -430,15 +398,17 @@ endp
; assert(0);
; }
;}
;
;#endif /* TGL_FEATURE_RENDER_BITS == 32 */
;
; adr must be aligned on an 'int'
;
align 4
proc memset_s uses eax ecx edi, adr:dword, val:dword, count:dword
;destroy:
; ecx, edi
align 16
proc memset_s uses eax, adr:dword, val:dword, count:dword
mov eax,[val]
mov di,ax
ror eax,16
@ -455,7 +425,7 @@ proc memset_s uses eax ecx edi, adr:dword, val:dword, count:dword
ret
endp
align 4
align 16
proc memset_l uses eax ecx edi, adr:dword, val:dword, count:dword
mov eax,[val]
mov ecx,[count]
@ -465,8 +435,10 @@ proc memset_l uses eax ecx edi, adr:dword, val:dword, count:dword
endp
; count must be a multiple of 4 and >= 4
align 4
proc memset_RGB24 uses eax ecx edi esi, adr:dword, r:dword, g:dword, b:dword, count:dword
;destroy:
; edi, esi
align 16
proc memset_RGB24 uses eax ecx, adr:dword, r:dword, g:dword, b:dword, count:dword
mov esi,[adr]
mov eax,[r] ;копируем в буфер первые 12 байт (минимальное число кратное 3 и 4)
mov byte[esi],al
@ -501,10 +473,8 @@ proc memset_RGB24 uses eax ecx edi esi, adr:dword, r:dword, g:dword, b:dword, co
sub ecx,esi ;ecx*=3
rep stosd
jmp .end_f
@@:
;если r!=g или g!=b или b!=r
@@:
align 16
@@: ;если r!=g или g!=b или b!=r
movsd
movsd
movsd
@ -514,9 +484,9 @@ proc memset_RGB24 uses eax ecx edi esi, adr:dword, r:dword, g:dword, b:dword, co
ret
endp
align 4
proc ZB_clear uses eax ebx ecx, zb:dword, clear_z:dword, z:dword, clear_color:dword,\
r:dword, g:dword, b:dword
align 16
proc ZB_clear uses eax ebx ecx edi esi, zb:dword, clear_z:dword, z:dword,\
clear_color:dword, r:dword, g:dword, b:dword
;if TGL_FEATURE_RENDER_BITS != 24
; color dd ?
;end if
@ -546,14 +516,11 @@ if TGL_FEATURE_RENDER_BITS eq 24
end if
mov ebx,[eax+offs_zbuf_pbuf]
mov ecx,[eax+offs_zbuf_ysize]
align 4
.cycle_0:
if (TGL_FEATURE_RENDER_BITS eq 15) ;or (TGL_FEATURE_RENDER_BITS eq 16)
;color = RGB_TO_PIXEL(r, g, b);
;memset_s(ebx, color, zb->xsize);
end if
if TGL_FEATURE_RENDER_BITS eq 32
;color = RGB_TO_PIXEL(r, g, b);
;memset_l(ebx, color, zb->xsize);
;color = RGB_TO_PIXEL(r, g, b)
;memset_l(ebx, color, zb->xsize)
end if
if TGL_FEATURE_RENDER_BITS eq 24
sub esp,16