diff --git a/programs/develop/libraries/TinyGL/asm_fork/clip.asm b/programs/develop/libraries/TinyGL/asm_fork/clip.asm index 5ce45361ec..3fa0371258 100644 --- a/programs/develop/libraries/TinyGL/asm_fork/clip.asm +++ b/programs/develop/libraries/TinyGL/asm_fork/clip.asm @@ -169,29 +169,29 @@ proc interpolate uses eax ebx ecx, q:dword,p0:dword,p1:dword,t:dword fld dword[t] ; интерполяция по координатам - fld dword[ecx+offs_vert_pc] - fsub dword[ebx+offs_vert_pc] + fld dword[ecx+offs_vert_pc+offs_X] + fsub dword[ebx+offs_vert_pc+offs_X] fmul st0,st1 - fadd dword[ebx+offs_vert_pc] - fstp dword[eax+offs_vert_pc] + fadd dword[ebx+offs_vert_pc+offs_X] + fstp dword[eax+offs_vert_pc+offs_X] - fld dword[ecx+offs_vert_pc+4] - fsub dword[ebx+offs_vert_pc+4] + fld dword[ecx+offs_vert_pc+offs_Y] + fsub dword[ebx+offs_vert_pc+offs_Y] fmul st0,st1 - fadd dword[ebx+offs_vert_pc+4] - fstp dword[eax+offs_vert_pc+4] + fadd dword[ebx+offs_vert_pc+offs_Y] + fstp dword[eax+offs_vert_pc+offs_Y] - fld dword[ecx+offs_vert_pc+8] - fsub dword[ebx+offs_vert_pc+8] + fld dword[ecx+offs_vert_pc+offs_Z] + fsub dword[ebx+offs_vert_pc+offs_Z] fmul st0,st1 - fadd dword[ebx+offs_vert_pc+8] - fstp dword[eax+offs_vert_pc+8] + fadd dword[ebx+offs_vert_pc+offs_Z] + fstp dword[eax+offs_vert_pc+offs_Z] - fld dword[ecx+offs_vert_pc+12] - fsub dword[ebx+offs_vert_pc+12] + fld dword[ecx+offs_vert_pc+offs_W] + fsub dword[ebx+offs_vert_pc+offs_W] fmul st0,st1 - fadd dword[ebx+offs_vert_pc+12] - fstp dword[eax+offs_vert_pc+12] + fadd dword[ebx+offs_vert_pc+offs_W] + fstp dword[eax+offs_vert_pc+offs_W] ; интерполяция по цвету fld dword[ecx+offs_vert_color] @@ -531,17 +531,17 @@ endl mov edx,[a] mov ebx,[b] mov ecx,[c] - fld dword[ebx] - fsub dword[edx] + fld dword[ebx+offs_X] + fsub dword[edx+offs_X] fstp dword[d_X] ;d_X = (b.X - a.X) - fld dword[ebx+4] - fsub dword[edx+4] + fld dword[ebx+offs_Y] + fsub dword[edx+offs_Y] fstp dword[d_Y] ;d_Y = (b.Y - a.Y) - fld dword[ebx+8] - fsub dword[edx+8] + fld dword[ebx+offs_Z] + fsub dword[edx+offs_Z] fstp dword[d_Z] ;d_Z = (b.Z - a.Z) - fld dword[ebx+12] - fsub dword[edx+12] + fld dword[ebx+offs_W] + fsub dword[edx+offs_W] fst dword[d_W] ;d_W = (b.W - a.W) if sign eq 0 fadd dword[d#dir] @@ -550,15 +550,13 @@ else end if fldz - fcomp st1 + fcompp fstsw ax sahf ja @f - fincstp fst dword[t] ;t=0 jmp .e_zero @@: ;else - fincstp fld dword[edx+offs#dir] if sign eq 0 fchs @@ -628,115 +626,189 @@ endp align 4 clip_proc dd clip_xmin,clip_xmax, clip_ymin,clip_ymax, clip_zmin,clip_zmax -;static inline void updateTmp(GLContext *c, GLVertex *q,GLVertex *p0,GLVertex *p1,float t) -;{ -; if (c->current_shade_model == GL_SMOOTH) { +align 4 +proc updateTmp uses edx, context:dword, q:dword, p0:dword, p1:dword, t:dword + mov eax,[context] + cmp dword[edx+offs_cont_current_shade_model],GL_SMOOTH ;if (context.current_shade_model == GL_SMOOTH) + jne .els_0 ; q->color.v[0]=p0->color.v[0] + (p1->color.v[0]-p0->color.v[0])*t; ; q->color.v[1]=p0->color.v[1] + (p1->color.v[1]-p0->color.v[1])*t; ; q->color.v[2]=p0->color.v[2] + (p1->color.v[2]-p0->color.v[2])*t; -; } else { + jmp @f + .els_0: ; q->color.v[0]=p0->color.v[0]; ; q->color.v[1]=p0->color.v[1]; ; q->color.v[2]=p0->color.v[2]; -; } + @@: -; if (c->texture_2d_enabled) { + cmp dword[edx+offs_cont_texture_2d_enabled],0 ;if (context.texture_2d_enabled) + je @f ; q->tex_coord.X=p0->tex_coord.X + (p1->tex_coord.X-p0->tex_coord.X)*t; ; q->tex_coord.Y=p0->tex_coord.Y + (p1->tex_coord.Y-p0->tex_coord.Y)*t; -; } + @@: ; q->clip_code=gl_clipcode(q->pc.X,q->pc.Y,q->pc.Z,q->pc.W); ; if (q->clip_code==0){ ; gl_transform_to_viewport(c,q); ; RGBFtoRGBI(q->color.v[0],q->color.v[1],q->color.v[2],q->zp.r,q->zp.g,q->zp.b); ; } -;} + ret +endp -;static void gl_draw_triangle_clip(GLContext *c, GLVertex *p0,GLVertex *p1,GLVertex *p2,int clip_bit); +align 4 +proc gl_draw_triangle, context:dword, p0:dword, p1:dword, p2:dword +locals + cc rd 3 + front dd ? + norm dd ? ;float +endl +pushad + mov ebx,[p0] + mov ecx,[p1] + mov edx,[p2] + + mov edi,[ebx+offs_vert_clip_code] + mov dword[cc],edi + mov eax,[ecx+offs_vert_clip_code] + mov dword[cc+4],eax + or edi,eax + mov eax,[edx+offs_vert_clip_code] + mov dword[cc+8],eax + or edi,eax -;void gl_draw_triangle(GLContext *c, GLVertex *p0,GLVertex *p1,GLVertex *p2) -;{ -; int co,c_and,cc[3],front; -; float norm; -; -; cc[0]=p0->clip_code; -; cc[1]=p1->clip_code; -; cc[2]=p2->clip_code; -; -; co=cc[0] | cc[1] | cc[2]; -; -; /* we handle the non clipped case here to go faster */ -; if (co==0) { -; -; norm=(float)(p1->zp.x-p0->zp.x)*(float)(p2->zp.y-p0->zp.y)- -; (float)(p2->zp.x-p0->zp.x)*(float)(p1->zp.y-p0->zp.y); -; -; if (norm == 0) return; -; -; front = norm < 0.0; -; front = front ^ c->current_front_face; -; -; /* back face culling */ -; if (c->cull_face_enabled) { -; /* most used case first */ -; if (c->current_cull_face == GL_BACK) { -; if (front == 0) return; -; c->draw_triangle_front(c,p0,p1,p2); -; } else if (c->current_cull_face == GL_FRONT) { -; if (front != 0) return; -; c->draw_triangle_back(c,p0,p1,p2); -; } else { -; return; -; } -; } else { -; /* no culling */ -; if (front) { -; c->draw_triangle_front(c,p0,p1,p2); -; } else { -; c->draw_triangle_back(c,p0,p1,p2); -; } -; } -; } else { -; c_and=cc[0] & cc[1] & cc[2]; -; if (c_and==0) { -; gl_draw_triangle_clip(c,p0,p1,p2,0); -; } -; } -;} + ; we handle the non clipped case here to go faster + cmp edi,0 + jne .els_0 + mov edi,dword[edx+offs_vert_zp+offs_zbup_x] + mov edi,dword[ebx+offs_vert_zp+offs_zbup_x] + mov dword[norm],edi + fild dword[norm] + mov esi,dword[ecx+offs_vert_zp+offs_zbup_y] + mov esi,dword[ebx+offs_vert_zp+offs_zbup_y] + mov dword[norm],edi + fimul dword[norm] + mov edi,dword[ecx+offs_vert_zp+offs_zbup_x] + sub edi,dword[ebx+offs_vert_zp+offs_zbup_x] + mov dword[norm],edi + fild dword[norm] + mov edi,dword[edx+offs_vert_zp+offs_zbup_y] + mov edi,dword[ebx+offs_vert_zp+offs_zbup_y] + mov dword[norm],edi + fimul dword[norm] + fsub st0,st1 -;static void gl_draw_triangle_clip(GLContext *c, GLVertex *p0,GLVertex *p1,GLVertex *p2,int clip_bit) -;{ -; int co,c_and,co1,cc[3],edge_flag_tmp,clip_mask; -; GLVertex tmp1,tmp2,*q[3]; -; float tt; + ;st0 = (p1.zp.x-p0.zp.x)*(p2.zp.y-p0.zp.y) - (p2.zp.x-p0.zp.x)*(p1.zp.y-p0.zp.y) -; cc[0]=p0->clip_code; -; cc[1]=p1->clip_code; -; cc[2]=p2->clip_code; + mov dword[front],0 + fldz + fcom st1 + fstsw ax + sahf + je .end_f + jb @f + inc dword[front] ;front = 0.0 > norm + @@: + mov edi,[context] + mov eax,dword[edi+offs_cont_current_front_face] + xor dword[front],eax ;front ^= context.current_front_face -; co=cc[0] | cc[1] | cc[2]; -; if (co == 0) { -; gl_draw_triangle(c,p0,p1,p2); -; } else { -; c_and=cc[0] & cc[1] & cc[2]; -; /* the triangle is completely outside */ -; if (c_and!=0) return; + ; back face culling + cmp dword[edi+offs_cont_cull_face_enabled],0 + je .els_1 + ; most used case first + cmp dword[edi+offs_cont_current_cull_face],GL_BACK + jne @f + cmp dword[front],0 + je .end_f + stdcall dword[edi+offs_cont_draw_triangle_front], edi,ebx,ecx,edx + jmp .end_f + @@: + cmp dword[edi+offs_cont_current_cull_face],GL_FRONT + jne .end_f + cmp dword[front],0 + jne .end_f + stdcall dword[edi+offs_cont_draw_triangle_back], edi,ebx,ecx,edx + jmp .end_f + .els_1: + ; no culling + cmp dword[front],0 + je @f + stdcall dword[edi+offs_cont_draw_triangle_front], edi,ebx,ecx,edx + @@: + stdcall dword[edi+offs_cont_draw_triangle_back], edi,ebx,ecx,edx + jmp .end_f + .els_0: + and eax,[cc] + and eax,[cc+4] + cmp eax,0 + jne @f + stdcall gl_draw_triangle_clip, [context],ebx,ecx,edx,0 + .end_f: + finit + @@: +popad + ret +endp -; /* find the next direction to clip */ +align 4 +proc gl_draw_triangle_clip, context:dword, p0:dword, p1:dword, p2:dword, clip_bit:dword +locals + co dd ? + co1 dd ? + cc rd 3 + edge_flag_tmp dd ? + clip_mask dd ? + tmp1 GLVertex ? + tmp2 GLVertex ? + q rd 3 ;GLVertex* + tt dd ? ;float +endl +pushad + + mov ebx,[p0] + mov ecx,[p1] + mov edx,[p2] + + mov edi,[ebx+offs_vert_clip_code] + mov dword[cc],edi + mov eax,[ecx+offs_vert_clip_code] + mov dword[cc+4],eax + or edi,eax + mov eax,[edx+offs_vert_clip_code] + mov dword[cc+8],eax + or edi,eax + mov dword[co],edi + + cmp edi,0 + jne .els_0 + stdcall gl_draw_triangle, [context],ebx,ecx,edx + jmp .end_f + .els_0: + and eax,[cc] + and eax,[cc+4] + + ; the triangle is completely outside + cmp eax,0 + jne .end_f + + ; find the next direction to clip ; while (clip_bit < 6 && (co & (1 << clip_bit)) == 0) { ; clip_bit++; ; } -; /* this test can be true only in case of rounding errors */ -; if (clip_bit == 6) { -;#if 0 + ; this test can be true only in case of rounding errors + cmp dword[clip_bit],6 +if 0 + jne @f ; printf("Error:\n"); ; printf("%f %f %f %f\n",p0->pc.X,p0->pc.Y,p0->pc.Z,p0->pc.W); ; printf("%f %f %f %f\n",p1->pc.X,p1->pc.Y,p1->pc.Z,p1->pc.W); ; printf("%f %f %f %f\n",p2->pc.X,p2->pc.Y,p2->pc.Z,p2->pc.W); -;#endif -; return; -; } + jmp .end_f + @@: +else + je .end_f +end if ; clip_mask = 1 << clip_bit; ; co1=(cc[0] ^ cc[1] ^ cc[2]) & clip_mask; @@ -781,7 +853,10 @@ clip_proc dd clip_xmin,clip_xmax, clip_ymin,clip_ymax, clip_zmin,clip_zmax ; gl_draw_triangle_clip(c,q[0],&tmp1,&tmp2,clip_bit+1); ; } ; } -;} + .end_f: +popad + ret +endp align 4 proc gl_draw_triangle_select uses eax, context:dword, p0:dword,p1:dword,p2:dword @@ -795,14 +870,16 @@ proc gl_draw_triangle_select uses eax, context:dword, p0:dword,p1:dword,p2:dword ret endp -;#ifdef PROFILE -;int count_triangles,count_triangles_textured,count_pixels; -;#endif +if PROFILE eq 1 + count_triangles dd ? + count_triangles_textured dd ? + count_pixels dd ? +end if align 4 -proc gl_draw_triangle_fill uses eax edx, context:dword, p0:dword,p1:dword,p2:dword -;#ifdef PROFILE -; { +proc gl_draw_triangle_fill, context:dword, p0:dword,p1:dword,p2:dword +pushad +if PROFILE eq 1 ; int norm; ; assert(p0->zp.x >= 0 && p0->zp.x < c->zb->xsize); ; assert(p0->zp.y >= 0 && p0->zp.y < c->zb->ysize); @@ -814,32 +891,40 @@ proc gl_draw_triangle_fill uses eax edx, context:dword, p0:dword,p1:dword,p2:dwo ; norm=(p1->zp.x-p0->zp.x)*(p2->zp.y-p0->zp.y)- ; (p2->zp.x-p0->zp.x)*(p1->zp.y-p0->zp.y); ; count_pixels+=abs(norm)/2; -; count_triangles++; -; } -;#endif + inc dword[count_triangles] +end if + mov ebx,[p1] + add ebx,offs_vert_zp + mov ecx,[p2] + add ecx,offs_vert_zp mov edx,[context] cmp dword[edx+offs_cont_texture_2d_enabled],0 je .els_i ;if (context.texture_2d_enabled) -;#ifdef PROFILE -; count_triangles_textured++; -;#endif +if PROFILE eq 1 + inc dword[count_triangles_textured] +end if mov eax,dword[edx+offs_cont_current_texture] mov eax,[eax] ;переход по указателю ;так как offs_text_images+offs_imag_pixmap = 0 то context.current_texture.images[0].pixmap = [eax] stdcall ZB_setTexture, dword[edx+offs_cont_zb],dword[eax] -; ZB_fillTriangleMappingPerspective, dword[edx+offs_cont_zb],&p0->zp,&p1->zp,&p2->zp); + mov eax,[p0] + add eax,offs_vert_zp + stdcall ZB_fillTriangleMappingPerspective, dword[edx+offs_cont_zb],eax,ebx,ecx jmp .end_f .els_i: - cmp dword[edx+offs_cont_current_shade_model],GL_SMOOTH - jne .els - ;else if (context.current_shade_model == GL_SMOOTH) -; ZB_fillTriangleSmooth, dword[edx+offs_cont_zb],&p0->zp,&p1->zp,&p2->zp); - jmp .end_f - .els: -; ZB_fillTriangleFlat, dword[edx+offs_cont_zb],&p0->zp,&p1->zp,&p2->zp); + mov eax,[p0] + add eax,offs_vert_zp + cmp dword[edx+offs_cont_current_shade_model],GL_SMOOTH + jne .els + ;else if (context.current_shade_model == GL_SMOOTH) + stdcall ZB_fillTriangleSmooth, dword[edx+offs_cont_zb],eax,ebx,ecx + jmp .end_f + .els: + stdcall ZB_fillTriangleFlat, dword[edx+offs_cont_zb],eax,ebx,ecx .end_f: +popad ret endp diff --git a/programs/develop/libraries/TinyGL/asm_fork/matrix.asm b/programs/develop/libraries/TinyGL/asm_fork/matrix.asm index bf263f6c5a..4d29df8ed3 100644 --- a/programs/develop/libraries/TinyGL/asm_fork/matrix.asm +++ b/programs/develop/libraries/TinyGL/asm_fork/matrix.asm @@ -410,16 +410,18 @@ proc glopTranslate uses eax ebx ecx, context:dword, p:dword fmul st0,st3 ;m[1] * y fld dword[ebx+8] ;m[2] fmul st0,st3 ;m[2] * z - fadd st0,st1 - fadd st0,st2 + faddp + faddp fadd dword[ebx+12] ;m[3] fstp dword[ebx+12] ;m[3] = m[0] * x + m[1] * y + m[2] * z + m[3] - ffree st0 - fincstp - ffree st0 - fincstp add ebx,16 loop @b + ffree st0 + fincstp + ffree st0 + fincstp + ffree st0 + fincstp if DEBUG ;glopTranslate mov ebx,[eax+offs_cont_matrix_mode] diff --git a/programs/develop/libraries/TinyGL/asm_fork/tinygl.asm b/programs/develop/libraries/TinyGL/asm_fork/tinygl.asm index d3d179186b..39e147d8fe 100644 --- a/programs/develop/libraries/TinyGL/asm_fork/tinygl.asm +++ b/programs/develop/libraries/TinyGL/asm_fork/tinygl.asm @@ -87,9 +87,9 @@ m_5 db '(5)',13,10,0 buf_param rb 80 align 4 -proc str_n_cat uses eax ecx edi esi, str1:dword, str2:dword, n:dword +proc str_n_cat uses eax ecx edi esi, str1:dword, str2:dword, n_len:dword mov esi,dword[str2] - mov ecx,dword[n] + mov ecx,dword[n_len] mov edi,dword[str1] stdcall str_len,edi add edi,eax diff --git a/programs/develop/libraries/TinyGL/asm_fork/vertex.asm b/programs/develop/libraries/TinyGL/asm_fork/vertex.asm index cb8b74dc6d..04d2be473c 100644 --- a/programs/develop/libraries/TinyGL/asm_fork/vertex.asm +++ b/programs/develop/libraries/TinyGL/asm_fork/vertex.asm @@ -236,7 +236,6 @@ pushad je .els_0 ; eye coordinates needed for lighting mov ebx,dword[eax+offs_cont_matrix_stack_ptr] - ;;;mov edx,[v] finit fld dword[edx+offs_vert_coord+offs_X] fld dword[edx+offs_vert_coord+offs_Y] @@ -251,13 +250,9 @@ pushad fld dword[ebx+8] ;st0 = m[2] fmul st0,st3 ;st0 *= v.coord.Z fadd dword[ebx+12] ;st0 += m[3] - fadd st0,st1 ;st0 += v.coord.Z * m[2] - fadd st0,st2 ;st0 += v.coord.Y * m[1] + faddp ;st0 += v.coord.Z * m[2] + faddp ;st0 += v.coord.Y * m[1] fstp dword[edx+offs_vert_ec] ;v.ec.X = v.coord.X * m[0] + v.coord.Y * m[1] + v.coord.Z * m[2] + m[3] - ffree st0 - fincstp - ffree st0 - fincstp add ebx,16 ;следущая строка матрицы add edx,4 ;следущая координата вектора loop .cycle_0 @@ -279,13 +274,9 @@ pushad fld dword[ebx+8] ;st0 = m[2] fmul st0,st3 ;st0 *= v.ec.Z fadd dword[ebx+12] ;st0 += m[3] - fadd st0,st1 ;st0 += v.ec.Z * m[2] - fadd st0,st2 ;st0 += v.ec.Y * m[1] + faddp ;st0 += v.ec.Z * m[2] + faddp ;st0 += v.ec.Y * m[1] fstp dword[edx+offs_vert_pc] ;v.pc.X = v.ec.X * m[0] + v.ec.Y * m[1] + v.ec.Z * m[2] + m[3] - ffree st0 - fincstp - ffree st0 - fincstp add ebx,16 ;следущая строка матрицы add edx,4 ;следущая координата вектора loop .cycle_1 @@ -308,13 +299,9 @@ pushad fmul st0,st3 ;st0 *= n.Y fld dword[ebx+8] ;st0 = m[2] fmul st0,st3 ;st0 *= n.Z - fadd st0,st1 ;st0 += n.Z * m[2] - fadd st0,st2 ;st0 += n.Y * m[1] + faddp ;st0 += n.Z * m[2] + faddp ;st0 += n.Y * m[1] fstp dword[edx+offs_vert_normal] ;v.normal.X = n.X * m[0] + n.Y * m[1] + n.Z * m[2] - ffree st0 - fincstp - ffree st0 - fincstp add ebx,16 ;следущая строка матрицы add edx,4 ;следущая координата вектора loop .cycle_2 @@ -329,7 +316,6 @@ pushad mov ebx,eax add ebx,offs_cont_matrix_model_projection - ;;;mov edx,[v] finit fld dword[edx+offs_vert_coord+offs_X] fld dword[edx+offs_vert_coord+offs_Y] @@ -345,13 +331,10 @@ pushad fld dword[ebx+8] ;st0 = m[2] fmul st0,st3 ;st0 *= v.coord.Z fadd dword[ebx+12] ;st0 += m[3] - fadd st0,st1 ;st0 += v.coord.Z * m[2] - fadd st0,st2 ;st0 += v.coord.Y * m[1] + faddp ;st0 += v.coord.Z * m[2] + faddp ;st0 += v.coord.Y * m[1] fstp dword[esi] ;v.pc.X = v.coord.X * m[0] + v.coord.Y * m[1] + v.coord.Z * m[2] + m[3] - ffree st0 - fincstp - ffree st0 - fincstp + fld dword[ebx+16] ;st0 = m[4] fmul st0,st3 ;st0 *= v.coord.X fld dword[ebx+20] ;st0 = m[5] @@ -359,13 +342,10 @@ pushad fld dword[ebx+24] ;st0 = m[6] fmul st0,st3 ;st0 *= v.coord.Z fadd dword[ebx+28] ;st0 += m[7] - fadd st0,st1 ;st0 += v.coord.Z * m[6] - fadd st0,st2 ;st0 += v.coord.Y * m[5] + faddp ;st0 += v.coord.Z * m[6] + faddp ;st0 += v.coord.Y * m[5] fstp dword[esi+4] ;v.pc.X = v.coord.X * m[4] + v.coord.Y * m[5] + v.coord.Z * m[6] + m[7] - ffree st0 - fincstp - ffree st0 - fincstp + fld dword[ebx+32] ;st0 = m[8] fmul st0,st3 ;st0 *= v.coord.X fld dword[ebx+36] ;st0 = m[9] @@ -373,8 +353,8 @@ pushad fld dword[ebx+40] ;st0 = m[10] fmul st0,st3 ;st0 *= v.coord.Z fadd dword[ebx+44] ;st0 += m[11] - fadd st0,st1 ;st0 += v.coord.Z * m[10] - fadd st0,st2 ;st0 += v.coord.Y * m[9] + faddp ;st0 += v.coord.Z * m[10] + faddp ;st0 += v.coord.Y * m[9] fstp dword[esi+8] ;v.pc.X = v.coord.X * m[8] + v.coord.Y * m[9] + v.coord.Z * m[10] + m[11] cmp dword[eax+offs_cont_matrix_model_projection_no_w_transform],0 @@ -384,10 +364,6 @@ pushad mov dword[esi+12],ebx ;v.pc.W = m[15] jmp .end_els .els_1: - ffree st0 - fincstp - ffree st0 - fincstp fld dword[ebx+48] ;st0 = m[12] fmul st0,st3 ;st0 *= v.coord.X fld dword[ebx+52] ;st0 = m[13] @@ -395,10 +371,17 @@ pushad fld dword[ebx+56] ;st0 = m[14] fmul st0,st3 ;st0 *= v.coord.Z fadd dword[ebx+60] ;st0 += m[15] - fadd st0,st1 ;st0 += v.coord.Z * m[14] - fadd st0,st2 ;st0 += v.coord.Y * m[13] + faddp ;st0 += v.coord.Z * m[14] + faddp ;st0 += v.coord.Y * m[13] fstp dword[esi+12] ;v.pc.W = v.coord.X * m[12] + v.coord.Y * m[13] + v.coord.Z * m[14] + m[15] .end_els: + ffree st0 + fincstp + ffree st0 + fincstp + ffree st0 + fincstp + if DEBUG ;gl_vertex_transform stdcall dbg_print,f_vt,txt_nl mov edx,[v] @@ -509,8 +492,7 @@ pushad mov eax,[edx+offs_cont_vertex] push eax add eax,sizeof.GLVertex - push eax - stdcall gl_draw_line, edx + stdcall gl_draw_line, edx, eax xor eax,eax mov dword[n],eax jmp .end_f @@ -534,8 +516,7 @@ pushad mov eax,[edx+offs_cont_vertex] push eax add eax,sizeof.GLVertex - push eax - stdcall gl_draw_line, edx + stdcall gl_draw_line, edx, eax mov edi,[edx+offs_cont_vertex] mov esi,edi add esi,sizeof.GLVertex @@ -548,7 +529,12 @@ pushad jne @f cmp dword[n],3 jne .end_f -; gl_draw_triangle(c, &c->vertex[0], &c->vertex[1], &c->vertex[2]); + mov eax,[edx+offs_cont_vertex] + push eax + add eax,sizeof.GLVertex + push eax + add eax,sizeof.GLVertex + stdcall gl_draw_triangle, edx, eax xor eax,eax mov dword[n],eax jmp .end_f diff --git a/programs/develop/libraries/TinyGL/asm_fork/zgl.inc b/programs/develop/libraries/TinyGL/asm_fork/zgl.inc index 1d971ddc13..fb02753948 100644 --- a/programs/develop/libraries/TinyGL/asm_fork/zgl.inc +++ b/programs/develop/libraries/TinyGL/asm_fork/zgl.inc @@ -2,19 +2,13 @@ include 'opengl_const.inc' include 'zbuffer.inc' include 'zmath.inc' -;#define fputc(...) /*nothing*/ -;#define fprintf(...) /*nothing*/ -;#define vfprintf(...) /*nothing*/ -;#undef stderr -;#define stderr ((FILE*)-1) - ;enum { OP_ ## a , ... } -s1 equ 0 +sum1 equ 0 macro ADD_OP a,b,c { - OP_#a equ s1 - s1 equ (s1+1) + OP_#a equ sum1 + sum1 equ (sum1+1) } include 'opinfo.inc' diff --git a/programs/develop/libraries/TinyGL/asm_fork/ztriangle.asm b/programs/develop/libraries/TinyGL/asm_fork/ztriangle.asm index b4edd96245..7acdab80f6 100644 --- a/programs/develop/libraries/TinyGL/asm_fork/ztriangle.asm +++ b/programs/develop/libraries/TinyGL/asm_fork/ztriangle.asm @@ -7,13 +7,13 @@ if TGL_FEATURE_RENDER_BITS eq 24 mov ecx,[p2] mov eax,[ecx+offs_zbup_r] shr eax,8 - mov [colorR],eax ;colorR=p2.r>>8 + mov [colorR],al ;colorR=p2.r>>8 mov eax,[ecx+offs_zbup_g] shr eax,8 - mov [colorG],eax ;colorG=p2.g>>8 + mov [colorG],al ;colorG=p2.g>>8 mov eax,[ecx+offs_zbup_b] shr eax,8 - mov [colorB],eax ;colorB=p2.b>>8 + mov [colorB],al ;colorB=p2.b>>8 else ; color=RGB_TO_PIXEL(p2->r,p2->g,p2->b); end if @@ -21,19 +21,27 @@ end if macro PUT_PIXEL _a { -; zz=z >> ZB_POINT_Z_FRAC_BITS; -; if (zz >= pz[_a]) { +local .end_0 + mov eax,[z] + shr eax, ZB_POINT_Z_FRAC_BITS + mov [zz],eax + mov ebx,[pz] + cmp ax,word[ebx+2*_a] ;if (zz >= pz[_a]) + jl .end_0 + ;edi = pp if TGL_FEATURE_RENDER_BITS eq 24 -; pp[3 * _a]=colorR; -; pp[3 * _a + 1]=colorG; -; pp[3 * _a + 2]=colorB; -; pz[_a]=zz; + mov cl,[colorR] + mov ch,[colorG] + mov word[edi+3*_a],cx + mov cl,[colorB] + mov byte[edi+3*_a +2],cl else ; pp[_a]=color; -; pz[_a]=zz; end if -; } -; z+=dzdx; + mov word[ebx+2*_a],ax ;пишем в буфер глубины новое значение + .end_0: + mov eax,[dzdx] + add [z],eax } align 4 @@ -47,8 +55,6 @@ else color dd ? ;int end if include 'ztriangle.inc' - ret -endp ; ; Smooth filled triangle. @@ -69,7 +75,9 @@ end if macro PUT_PIXEL _a { -; zz=z >> ZB_POINT_Z_FRAC_BITS; + mov eax,[z] + shr eax,ZB_POINT_Z_FRAC_BITS + mov [zz],eax if TGL_FEATURE_RENDER_BITS eq 24 ; if (zz >= pz[_a]) { ; pp[3 * _a]=or1 >> 8; @@ -77,7 +85,8 @@ if TGL_FEATURE_RENDER_BITS eq 24 ; pp[3 * _a + 2]=ob1 >> 8; ; pz[_a]=zz; ; } -; z+=dzdx; + mov eax,[dzdx] + add [z],eax ; og1+=dgdx; ; or1+=drdx; ; ob1+=dbdx; @@ -94,20 +103,26 @@ else ; pp[_a] = RGB_TO_PIXEL(or1, og1, ob1); ; pz[_a]=zz; ; } -; z+=dzdx; + mov eax,[dzdx] + add [z],eax ; og1+=dgdx; ; or1+=drdx; ; ob1+=dbdx; end if } -macro DRAW_LINE +DRAW_LINE_M equ 1 + +macro DRAW_LINE code { if TGL_FEATURE_RENDER_BITS eq 16 +if code eq 0 ; register unsigned short *pz; ; register PIXEL *pp; ; register unsigned int tmp,z,zz,rgb,drgbdx; ; register int n; +end if +if code eq 1 ; n=(x2 >> 16) - x1; ; pp=pp1+x1; ; pz=pz1+x1; @@ -132,12 +147,134 @@ if TGL_FEATURE_RENDER_BITS eq 16 ; n-=1; ; } end if +end if +} + +align 4 +proc ZB_fillTriangleSmooth, zb:dword, p0:dword, p1:dword, p2:dword +locals +if TGL_FEATURE_RENDER_BITS eq 16 + _drgbdx dd ? ;int +end if +include 'ztriangle.inc' + +align 4 +proc ZB_setTexture uses eax ebx, zb:dword, texture:dword + mov eax,[zb] + mov ebx,[texture] + mov dword[eax+offs_zbuf_current_texture],ebx + ret +endp + +INTERP_Z equ 1 +INTERP_ST equ 1 + +macro DRAW_INIT +{ +; texture=zb->current_texture; +} + +macro PUT_PIXEL _a +{ +; zz=z >> ZB_POINT_Z_FRAC_BITS; if TGL_FEATURE_RENDER_BITS eq 24 -; register unsigned short *pz; -; register PIXEL *pp; -; register unsigned int s,t,z,zz; -; register int n,dsdx,dtdx; -; float sz,tz,fz,zinv; +; unsigned char *ptr; +; if (zz >= pz[_a]) { +; ptr = texture + (((t & 0x3FC00000) | s) >> 14) * 3; +; pp[3 * _a]= ptr[0]; +; pp[3 * _a + 1]= ptr[1]; +; pp[3 * _a + 2]= ptr[2]; +; pz[_a]=zz; +; } +else +; if (zz >= pz[_a]) { +; pp[_a]=texture[((t & 0x3FC00000) | s) >> 14]; +; pz[_a]=zz; +; } +end if + mov eax,[dzdx] + add [z],eax + mov eax,[dsdx] + add [s],eax + mov eax,[dtdx] + add [t],eax +} + +align 4 +proc ZB_fillTriangleMapping, zb:dword, p0:dword, p1:dword, p2:dword +locals + texture dd ? ;PIXEL* +include 'ztriangle.inc' + +; +; Texture mapping with perspective correction. +; We use the gradient method to make less divisions. +; TODO: pipeline the division +; +if 1 + +INTERP_Z equ 1 +INTERP_STZ equ 1 + +NB_INTERP equ 8 + +macro DRAW_INIT +{ +; texture=zb->current_texture; +; fdzdx=(float)dzdx; +; fndzdx=NB_INTERP * fdzdx; +; ndszdx=NB_INTERP * dszdx; +; ndtzdx=NB_INTERP * dtzdx; +} + +macro PUT_PIXEL _a +{ +; zz=z >> ZB_POINT_Z_FRAC_BITS; +if TGL_FEATURE_RENDER_BITS eq 24 +; unsigned char *ptr; +; if (zz >= pz[_a]) { +; ptr = texture + (((t & 0x3FC00000) | (s & 0x003FC000)) >> 14) * 3; +; pp[3 * _a]= ptr[0]; +; pp[3 * _a + 1]= ptr[1]; +; pp[3 * _a + 2]= ptr[2]; +; pz[_a]=zz; +; } +else +; if (zz >= pz[_a]) { +; pp[_a]=*(PIXEL *)((char *)texture+ +; (((t & 0x3FC00000) | (s & 0x003FC000)) >> (17 - PSZSH))); +; pz[_a]=zz; +; } +end if + mov eax,[dzdx] + add [z],eax + mov eax,[dsdx] + add [s],eax + mov eax,[dtdx] + add [t],eax +} + +DRAW_LINE_M equ 1 + +macro DRAW_LINE code +{ +if TGL_FEATURE_RENDER_BITS eq 24 +if code eq 0 + pz dd ? ;uint * + ;edi = pp dd ? + s dd ? ;uint + t dd ? ;uint + z dd ? ;uint + zz dd ? ;uint + n dd ? ;int + dsdx dd ? ;int + dtdx dd ? ;int + s_z dd ? ;float + t_z dd ? ;float + fz dd ? ;float + zinv dd ? ;float +end if +if code eq 1 ; n=(x2>>16)-x1; ; fz=(float)z1; ; zinv=1.0 / fz; @@ -188,118 +325,18 @@ if TGL_FEATURE_RENDER_BITS eq 24 ; n-=1; ; } end if -} - -align 4 -proc ZB_fillTriangleSmooth, zb:dword, p0:dword, p1:dword, p2:dword -locals -if TGL_FEATURE_RENDER_BITS eq 16 - _drgbdx dd ? ;int end if -include 'ztriangle.inc' - ret -endp - -align 4 -proc ZB_setTexture uses eax ebx, zb:dword, texture:dword - mov eax,[zb] - mov ebx,[texture] - mov dword[eax+offs_zbuf_current_texture],ebx - ret -endp - -INTERP_Z equ 1 -INTERP_ST equ 1 - -macro DRAW_INIT -{ -; texture=zb->current_texture; -} - -macro PUT_PIXEL _a -{ -; zz=z >> ZB_POINT_Z_FRAC_BITS; -if TGL_FEATURE_RENDER_BITS eq 24 -; unsigned char *ptr; -; if (zz >= pz[_a]) { -; ptr = texture + (((t & 0x3FC00000) | s) >> 14) * 3; -; pp[3 * _a]= ptr[0]; -; pp[3 * _a + 1]= ptr[1]; -; pp[3 * _a + 2]= ptr[2]; -; pz[_a]=zz; -; } -else -; if (zz >= pz[_a]) { -; pp[_a]=texture[((t & 0x3FC00000) | s) >> 14]; -; pz[_a]=zz; -; } -end if -; z+=dzdx; -; s+=dsdx; -; t+=dtdx; -} - -align 4 -proc ZB_fillTriangleMapping, zb:dword, p0:dword, p1:dword, p2:dword -locals - texture dd ? ;PIXEL* -include 'ztriangle.inc' - ret -endp - -; -; Texture mapping with perspective correction. -; We use the gradient method to make less divisions. -; TODO: pipeline the division -; -if 1 - -INTERP_Z equ 1 -INTERP_STZ equ 1 - -NB_INTERP equ 8 - -macro DRAW_INIT -{ -; texture=zb->current_texture; -; fdzdx=(float)dzdx; -; fndzdx=NB_INTERP * fdzdx; -; ndszdx=NB_INTERP * dszdx; -; ndtzdx=NB_INTERP * dtzdx; -} - -macro PUT_PIXEL _a -{ -; zz=z >> ZB_POINT_Z_FRAC_BITS; -if TGL_FEATURE_RENDER_BITS eq 24 -; unsigned char *ptr; -; if (zz >= pz[_a]) { -; ptr = texture + (((t & 0x3FC00000) | (s & 0x003FC000)) >> 14) * 3; -; pp[3 * _a]= ptr[0]; -; pp[3 * _a + 1]= ptr[1]; -; pp[3 * _a + 2]= ptr[2]; -; pz[_a]=zz; -; } -else -; if (zz >= pz[_a]) { -; pp[_a]=*(PIXEL *)((char *)texture+ -; (((t & 0x3FC00000) | (s & 0x003FC000)) >> (17 - PSZSH))); -; pz[_a]=zz; -; } -end if -; z+=dzdx; -; s+=dsdx; -; t+=dtdx; } align 4 proc ZB_fillTriangleMappingPerspective, zb:dword, p0:dword, p1:dword, p2:dword locals -; PIXEL *texture; -; float fdzdx,fndzdx,ndszdx,ndtzdx; + texture dd ? ;PIXEL * + fdzdx dd ? ;float + fndzdx dd ? + ndszdx dd ? + ndtzdx dd ? include 'ztriangle.inc' - ret -endp end if @@ -328,9 +365,12 @@ macro PUT_PIXEL _a ; pp[_a]=texture[((t & 0x3FC00000) | s) >> 14]; ; pz[_a]=zz; ; } -; z+=dzdx; -; sz+=dszdx; -; tz+=dtzdx; + mov eax,[dzdx] + add [z],eax + mov eax,[dszdx] + add [sz],eax + mov eax,[dtzdx] + add [tz],eax } align 4 @@ -338,7 +378,5 @@ proc ZB_fillTriangleMappingPerspective, zb:dword, p0:dword, p1:dword, p2:dword locals texture dd ? ;PIXEL* include 'ztriangle.inc' - ret -endp end if diff --git a/programs/develop/libraries/TinyGL/asm_fork/ztriangle.inc b/programs/develop/libraries/TinyGL/asm_fork/ztriangle.inc index 8222176ece..d66bec5ea4 100644 --- a/programs/develop/libraries/TinyGL/asm_fork/ztriangle.inc +++ b/programs/develop/libraries/TinyGL/asm_fork/ztriangle.inc @@ -6,13 +6,13 @@ pr2 dd ? ;ZBufferPoint* l1 dd ? ;ZBufferPoint* l2 dd ? ;ZBufferPoint* - fdx1 dd ? - fdx2 dd ? - fdy1 dd ? - fdy2 dd ? - fz dd ? - d1 dd ? - d2 dd ? + fdx1 dd ? ;float + fdx2 dd ? ;float + fdy1 dd ? ;float + fdy2 dd ? ;float + fz dd ? ;float + d1 dd ? ;float + d2 dd ? ;float pz1 dd ? ;unsigned short* pp1 dd ? ;PIXEL* part dd ? @@ -22,7 +22,6 @@ nb_lines dd ? dx1 dd ? dy1 dd ? - tmp dd ? dx2 dd ? dy2 dd ? @@ -60,7 +59,7 @@ if INTERP_RGB eq 1 dbdl_max dd ? end if if INTERP_ST eq 1 - ;s1 dd ? + s1 dd ? dsdx dd ? dsdy dd ? dsdl_min dd ? @@ -72,344 +71,752 @@ if INTERP_ST eq 1 dtdl_max dd ? end if if INTERP_STZ eq 1 - sz1 dd ? - dszdx dd ? - dszdy dd ? - dszdl_min dd ? - dszdl_max dd ? - tz1 dd ? - dtzdx dd ? - dtzdy dd ? - dtzdl_min dd ? - dtzdl_max dd ? + sz1 dd ? ;float + dszdx dd ? ;float + dszdy dd ? ;float + dszdl_min dd ? ;float + dszdl_max dd ? ;float + tz1 dd ? ;float + dtzdx dd ? ;float + dtzdy dd ? ;float + dtzdl_min dd ? ;float + dtzdl_max dd ? ;float +end if + +if DRAW_LINE_M eq 1 + DRAW_LINE 0 ;переменные делаются в макросе +else + ;edi = pp dd ? + n dd ? ;int +if INTERP_Z eq 1 + pz dd ? ;unsigned short * + z dd ? ;uint + zz dd ? ;uint +end if +if INTERP_RGB eq 1 + or1 dd ? ;uint + og1 dd ? ;uint + ob1 dd ? ;uint +end if +if INTERP_ST eq 1 + s dd ? ;uint + t dd ? ;uint +end if +if INTERP_STZ eq 1 + s_z dd ? ;float + t_z dd ? ;float +end if end if endl -; /* we sort the vertex with increasing y */ -; if (p1->y < p0->y) { -; t = p0; -; p0 = p1; -; p1 = t; -; } -; if (p2->y < p0->y) { -; t = p2; -; p2 = p1; -; p1 = p0; -; p0 = t; -; } else if (p2->y < p1->y) { -; t = p1; -; p1 = p2; -; p2 = t; -; } + ; we sort the vertex with increasing y + mov ebx,[p0] + mov ecx,[p1] + mov eax,[ebx+offs_zbup_y] + cmp [ecx+offs_zbup_y],eax + jge @f + ;if (p1.y < p0.y) + mov [p0],ecx + mov [p1],ebx + xchg ebx,ecx + mov eax,[ebx+offs_zbup_y] ;обновляем p0.y для следующего сравнения + @@: + mov edx,[p2] + cmp [edx+offs_zbup_y],eax + jge @f + ;if (p2.y < p0.y) + mov [p0],edx + mov [p1],ebx + mov [p2],ecx + mov ebx,[p0] + mov ecx,[p1] + mov edx,[p2] + jmp .end_e0 + @@: + mov eax,[ecx+offs_zbup_y] + cmp [edx+offs_zbup_y],eax + jge .end_e0 + ;else if (p2.y < p1.y) + mov [p1],edx + mov [p2],ecx + .end_e0: ; we compute dXdx and dXdy for all interpolated values -; fdx1 = p1->x - p0->x; -; fdy1 = p1->y - p0->y; + mov eax,[ecx+offs_zbup_x] + sub eax,[ebx+offs_zbup_x] + mov [fdx1],eax ;p1.x - p0.x + mov eax,[ecx+offs_zbup_y] + sub eax,[ebx+offs_zbup_y] + mov [fdy1],eax ;p1.y - p0.y -; fdx2 = p2->x - p0->x; -; fdy2 = p2->y - p0->y; + mov eax,[edx+offs_zbup_x] + sub eax,[ebx+offs_zbup_x] + mov [fdx2],eax ;p2.x - p0.x + mov eax,[edx+offs_zbup_y] + sub eax,[ebx+offs_zbup_y] + mov [fdy2],eax ;p2.y - p0.y -; fz = fdx1 * fdy2 - fdx2 * fdy1; -; if (fz == 0) -; return; -; fz = 1.0 / fz; + fild dword[fdx2] + fst dword[fdx2] + fild dword[fdy1] + fst dword[fdy1] + fmulp + fild dword[fdx1] + fst dword[fdx1] + fild dword[fdy2] + fst dword[fdy2] + fmulp + fsubp ;st0 = st0-st1 + fst dword[fz] ;fz = fdx1 * fdy2 - fdx2 * fdy1 + fldz + fcompp ;if (fz == 0) + fstsw ax + sahf + je .end_f + fld1 + fdiv dword[fz] ;fz = 1.0 / fz + fst dword[fz] ;st0 = fz -; fdx1 *= fz; -; fdy1 *= fz; -; fdx2 *= fz; -; fdy2 *= fz; + fld dword[fdx1] + fmul st0,st1 + fstp dword[fdx1] ;fdx1 *= fz + fld dword[fdy1] + fmul st0,st1 + fstp dword[fdy1] ;fdy1 *= fz + fld dword[fdx2] + fmul st0,st1 + fstp dword[fdx2] ;fdx2 *= fz + fld dword[fdy2] + fmul st0,st1 + fstp dword[fdy2] ;fdy2 *= fz + ffree st0 + fincstp if INTERP_Z eq 1 -; d1 = p1->z - p0->z; -; d2 = p2->z - p0->z; -; dzdx = (int) (fdy2 * d1 - fdy1 * d2); -; dzdy = (int) (fdx1 * d2 - fdx2 * d1); + mov eax,[ecx+offs_zbup_z] + sub eax,[ebx+offs_zbup_z] + mov [d1],eax + mov eax,[edx+offs_zbup_z] + sub eax,[ebx+offs_zbup_z] + mov [d2],eax + fild dword[d1] + fst dword[d1] ;d1 = p1.z - p0.z + fild dword[d2] + fst dword[d2] ;d2 = p2.z - p0.z + + fld dword[fdy1] + fmul st0,st1 + fld dword[fdy2] + fmul st0,st3 + fsub st0,st1 + fistp dword[dzdx] ;dzdx = (int) (fdy2*d1 - fdy1*d2) + ffree st0 + fincstp + fld dword[fdx2] + fmul st0,st2 + fld dword[fdx1] + fmul st0,st2 + fsub st0,st1 + fistp dword[dzdy] ;dzdy = (int) (fdx1*d2 - fdx2*d1) + ffree st0 + fincstp end if if INTERP_RGB eq 1 -; d1 = p1->r - p0->r; -; d2 = p2->r - p0->r; -; drdx = (int) (fdy2 * d1 - fdy1 * d2); -; drdy = (int) (fdx1 * d2 - fdx2 * d1); + mov eax,[ecx+offs_zbup_r] + sub eax,[ebx+offs_zbup_r] + mov [d1],eax + mov eax,[edx+offs_zbup_r] + sub eax,[ebx+offs_zbup_r] + mov [d2],eax + fild dword[d1] + fst dword[d1] ;d1 = p1.r - p0.r + fild dword[d2] + fst dword[d2] ;d2 = p2.r - p0.r -; d1 = p1->g - p0->g; -; d2 = p2->g - p0->g; -; dgdx = (int) (fdy2 * d1 - fdy1 * d2); -; dgdy = (int) (fdx1 * d2 - fdx2 * d1); + fld dword[fdy1] + fmul st0,st1 + fld dword[fdy2] + fmul st0,st3 + fsub st0,st1 + fistp dword[drdx] ;drdx = (int) (fdy2*d1 - fdy1*d2) + ffree st0 + fincstp + fld dword[fdx2] + fmul st0,st2 + fld dword[fdx1] + fmul st0,st2 + fsub st0,st1 + fistp dword[drdy] ;drdy = (int) (fdx1*d2 - fdx2*d1) + ffree st0 + fincstp -; d1 = p1->b - p0->b; -; d2 = p2->b - p0->b; -; dbdx = (int) (fdy2 * d1 - fdy1 * d2); -; dbdy = (int) (fdx1 * d2 - fdx2 * d1); + mov eax,[ecx+offs_zbup_g] + sub eax,[ebx+offs_zbup_g] + mov [d1],eax + mov eax,[edx+offs_zbup_g] + sub eax,[ebx+offs_zbup_g] + mov [d2],eax + fild dword[d1] + fst dword[d1] ;d1 = p1.g - p0.g + fild dword[d2] + fst dword[d2] ;d2 = p2.g - p0.g + fld dword[fdy1] + fmul st0,st1 + fld dword[fdy2] + fmul st0,st3 + fsub st0,st1 + fistp dword[dgdx] ;dgdx = (int) (fdy2*d1 - fdy1*d2) + ffree st0 + fincstp + fld dword[fdx2] + fmul st0,st2 + fld dword[fdx1] + fmul st0,st2 + fsub st0,st1 + fistp dword[dgdy] ;dgdy = (int) (fdx1*d2 - fdx2*d1) + ffree st0 + fincstp + + mov eax,[ecx+offs_zbup_b] + sub eax,[ebx+offs_zbup_b] + mov [d1],eax + mov eax,[edx+offs_zbup_b] + sub eax,[ebx+offs_zbup_b] + mov [d2],eax + fild dword[d1] + fst dword[d1] ;d1 = p1.b - p0.b + fild dword[d2] + fst dword[d2] ;d2 = p2.b - p0.b + + fld dword[fdy1] + fmul st0,st1 + fld dword[fdy2] + fmul st0,st3 + fsub st0,st1 + fistp dword[dbdx] ;dbdx = (int) (fdy2*d1 - fdy1*d2) + ffree st0 + fincstp + fld dword[fdx2] + fmul st0,st2 + fld dword[fdx1] + fmul st0,st2 + fsub st0,st1 + fistp dword[dbdy] ;dbdy = (int) (fdx1*d2 - fdx2*d1) + ffree st0 + fincstp end if if INTERP_ST eq 1 -; d1 = p1->s - p0->s; -; d2 = p2->s - p0->s; -; dsdx = (int) (fdy2 * d1 - fdy1 * d2); -; dsdy = (int) (fdx1 * d2 - fdx2 * d1); + mov eax,[ecx+offs_zbup_s] + sub eax,[ebx+offs_zbup_s] + mov [d1],eax + mov eax,[edx+offs_zbup_s] + sub eax,[ebx+offs_zbup_s] + mov [d2],eax + fild dword[d1] + fst dword[d1] ;d1 = p1.s - p0.s + fild dword[d2] + fst dword[d2] ;d2 = p2.s - p0.s -; d1 = p1->t - p0->t; -; d2 = p2->t - p0->t; -; dtdx = (int) (fdy2 * d1 - fdy1 * d2); -; dtdy = (int) (fdx1 * d2 - fdx2 * d1); + fld dword[fdy1] + fmul st0,st1 + fld dword[fdy2] + fmul st0,st3 + fsub st0,st1 + fistp dword[dsdx] ;dsdx = (int) (fdy2*d1 - fdy1*d2) + ffree st0 + fincstp + fld dword[fdx2] + fmul st0,st2 + fld dword[fdx1] + fmul st0,st2 + fsub st0,st1 + fistp dword[dsdy] ;dsdy = (int) (fdx1*d2 - fdx2*d1) + ffree st0 + fincstp + + mov eax,[ecx+offs_zbup_t] + sub eax,[ebx+offs_zbup_t] + mov [d1],eax + mov eax,[edx+offs_zbup_t] + sub eax,[ebx+offs_zbup_t] + mov [d2],eax + fild dword[d1] + fst dword[d1] ;d1 = p1.t - p0.t + fild dword[d2] + fst dword[d2] ;d2 = p2.t - p0.t + + fld dword[fdy1] + fmul st0,st1 + fld dword[fdy2] + fmul st0,st3 + fsub st0,st1 + fistp dword[dtdx] ;dtdx = (int) (fdy2*d1 - fdy1*d2) + ffree st0 + fincstp + fld dword[fdx2] + fmul st0,st2 + fld dword[fdx1] + fmul st0,st2 + fsub st0,st1 + fistp dword[dtdy] ;dtdy = (int) (fdx1*d2 - fdx2*d1) + ffree st0 + fincstp end if if INTERP_STZ eq 1 -; { -; float zz; -; zz=(float) p0->z; -; p0->sz= (float) p0->s * zz; -; p0->tz= (float) p0->t * zz; -; zz=(float) p1->z; -; p1->sz= (float) p1->s * zz; -; p1->tz= (float) p1->t * zz; -; zz=(float) p2->z; -; p2->sz= (float) p2->s * zz; -; p2->tz= (float) p2->t * zz; + fild dword[ebx+offs_zbup_z] + fild dword[ebx+offs_zbup_s] + fmul st0,st1 + fstp dword[ebx+offs_zbup_sz] ;p0.sz = (float) p0.s * p0.z + fild dword[ebx+offs_zbup_t] + fmul st0,st1 + fstp dword[ebx+offs_zbup_tz] ;p0.tz = (float) p0.t * p0.z + ffree st0 + fincstp -; d1 = p1->sz - p0->sz; -; d2 = p2->sz - p0->sz; -; dszdx = (fdy2 * d1 - fdy1 * d2); -; dszdy = (fdx1 * d2 - fdx2 * d1); + fild dword[ecx+offs_zbup_z] + fild dword[ecx+offs_zbup_s] + fmul st0,st1 + fstp dword[ecx+offs_zbup_sz] ;p1.sz = (float) p1.s * p1.z + fild dword[ecx+offs_zbup_t] + fmul st0,st1 + fstp dword[ecx+offs_zbup_tz] ;p1.tz = (float) p1.t * p1.z + ffree st0 + fincstp -; d1 = p1->tz - p0->tz; -; d2 = p2->tz - p0->tz; -; dtzdx = (fdy2 * d1 - fdy1 * d2); -; dtzdy = (fdx1 * d2 - fdx2 * d1); -; } + fild dword[edx+offs_zbup_z] + fild dword[edx+offs_zbup_s] + fmul st0,st1 + fstp dword[edx+offs_zbup_sz] ;p2.sz = (float) p2.s * p2.z + fild dword[edx+offs_zbup_t] + fmul st0,st1 + fstp dword[edx+offs_zbup_tz] ;p2.tz = (float) p2.t * p2.z + ffree st0 + fincstp + + fld dword[ecx+offs_zbup_sz] + fsub dword[ebx+offs_zbup_sz] + fst dword[d1] ;d1 = p1.sz - p0.sz + fld dword[edx+offs_zbup_sz] + fsub dword[ebx+offs_zbup_sz] + fst dword[d2] ;d2 = p2.sz - p0.sz + + fld dword[fdy1] + fmul st0,st1 + fld dword[fdy2] + fmul st0,st3 + fsub st0,st1 + fistp dword[dszdx] ;dszdx = (int) (fdy2*d1 - fdy1*d2) + ffree st0 + fincstp + fld dword[fdx2] + fmul st0,st2 + fld dword[fdx1] + fmul st0,st2 + fsub st0,st1 + fistp dword[dszdy] ;dszdy = (int) (fdx1*d2 - fdx2*d1) + ffree st0 + fincstp + + fld dword[ecx+offs_zbup_tz] + fsub dword[ebx+offs_zbup_tz] + fst dword[d1] ;d1 = p1.tz - p0.tz + fld dword[edx+offs_zbup_tz] + fsub dword[ebx+offs_zbup_tz] + fst dword[d2] ;d2 = p2.tz - p0.tz + + fld dword[fdy1] + fmul st0,st1 + fld dword[fdy2] + fmul st0,st3 + fsub st0,st1 + fistp dword[dtzdx] ;dtzdx = (int) (fdy2*d1 - fdy1*d2) + ffree st0 + fincstp + fld dword[fdx2] + fmul st0,st2 + fld dword[fdx1] + fmul st0,st2 + fsub st0,st1 + fistp dword[dtzdy] ;dtzdy = (int) (fdx1*d2 - fdx2*d1) + ffree st0 + fincstp end if ; screen coordinates -; pp1 = (PIXEL *) ((char *) zb->pbuf + zb->linesize * p0->y); -; pz1 = zb->zbuf + p0->y * zb->xsize; + mov eax,[zb] + mov edx,[eax+offs_zbuf_linesize] + imul edx,[ebx+offs_zbup_y] + add edx,[eax+offs_zbuf_pbuf] + mov [pp1],edx ;pp1 = zb.pbuf + zb.linesize * p0.y + mov edx,[eax+offs_zbuf_xsize] + imul edx,[ebx+offs_zbup_y] + shl edx,1 + add edx,[eax+offs_zbuf_zbuf] + mov [pz1],edx ;pz1 = zb.zbuf + zb.xsize * p0.y DRAW_INIT -; for(part=0;part<2;part++) { -; if (part == 0) { -; if (fz > 0) { -; update_left=1; -; update_right=1; -; l1=p0; -; l2=p2; -; pr1=p0; -; pr2=p1; -; } else { -; update_left=1; -; update_right=1; -; l1=p0; -; l2=p1; -; pr1=p0; -; pr2=p2; -; } -; nb_lines = p1->y - p0->y; -; } else { -; /* second part */ -; if (fz > 0) { -; update_left=0; -; update_right=1; -; pr1=p1; -; pr2=p2; -; } else { -; update_left=1; -; update_right=0; -; l1=p1; -; l2=p2; -; } -; nb_lines = p2->y - p1->y + 1; -; } + mov dword[part],0 + .cycle_0: + mov ebx,[p0] + mov ecx,[p1] + mov edx,[p2] + cmp dword[part],0 + jne .els_0 + mov dword[update_left],1 + mov dword[update_right],1 + mov [l1],ebx + mov [pr1],ebx + fldz + fld dword[fz] + fcompp ;if (fz > 0) + fstsw ax + sahf + jbe .els_1 + mov [l2],edx + mov [pr2],ecx + jmp .end_1 + .els_1: + mov [l2],ecx + mov [pr2],edx + .end_1: + mov eax,[ecx+offs_zbup_y] + sub eax,[ebx+offs_zbup_y] + mov [nb_lines],eax ;nb_lines = p1.y - p0.y + jmp .end_0 + .els_0: + ; second part + mov dword[update_left],0 + mov dword[update_right],1 + fldz + fld dword[fz] + fcompp ;if (fz > 0) + fstsw ax + sahf + jbe .els_2 + mov [pr1],ecx + mov [pr2],edx + jmp .end_2 + .els_2: + mov [l1],ecx + mov [l2],edx + .end_2: + mov eax,[edx+offs_zbup_y] + sub eax,[ecx+offs_zbup_y] + inc eax + mov [nb_lines],eax ;nb_lines = p2.y - p1.y + 1 + .end_0: ; compute the values for the left edge -; if (update_left) { -; dy1 = l2->y - l1->y; -; dx1 = l2->x - l1->x; -; if (dy1 > 0) -; tmp = (dx1 << 16) / dy1; -; else -; tmp = 0; -; x1 = l1->x; -; error = 0; -; derror = tmp & 0x0000ffff; -; dxdy_min = tmp >> 16; -; dxdy_max = dxdy_min + 1; + cmp dword[update_left],0 ;if (update_left) + je .end_upd_l + mov ebx,[l1] + mov ecx,[l2] + mov eax,[ecx+offs_zbup_x] + sub eax,[ebx+offs_zbup_x] + mov [dx1],eax ;dx1 = l2.x - l1.x + mov eax,[ecx+offs_zbup_y] + sub eax,[ebx+offs_zbup_y] + mov [dy1],eax ;dy1 = l2.y - l1.y + cmp eax,0 ;if (dy1 > 0) + jle .els_3 + mov eax,[dx1] + shl eax,16 + xor edx,edx + div dword[dy1] ;eax = (dx1 << 16) / dy1 + jmp .end_3 + .els_3: + xor eax,eax + .end_3: + mov edx,[ebx+offs_zbup_x] + mov [x1],edx + mov dword[error],0 + mov dword[derror],eax + and dword[derror],0xffff + shr eax,16 + mov [dxdy_min],eax + inc eax + mov [dxdy_max],eax if INTERP_Z eq 1 -; z1=l1->z; -; dzdl_min=(dzdy + dzdx * dxdy_min); -; dzdl_max=dzdl_min + dzdx; + mov eax,[l1] + mov eax,[eax+offs_zbup_z] + mov [z1],eax ;z1 = l1.z + mov eax,[dzdx] + imul eax,[dxdy_min] + add eax,[dzdy] + mov [dzdl_min],eax ;dzdl_min = (dzdy +dzdx*dxdy_min) + add eax,[dzdx] + mov [dzdl_max],eax ;dzdl_max = dzdl_min +dzdx end if if INTERP_RGB eq 1 -; r1=l1->r; -; drdl_min=(drdy + drdx * dxdy_min); -; drdl_max=drdl_min + drdx; + mov ebx,[l1] + mov eax,[ebx+offs_zbup_r] + mov [r1],eax ;r1 = l1.r + mov eax,[drdx] + imul eax,[dxdy_min] + add eax,[drdy] + mov [drdl_min],eax ;drdl_min = (drdy +drdx*dxdy_min) + add eax,[drdx] + mov [drdl_max],eax ;drdl_max = drdl_min +drdx -; g1=l1->g; -; dgdl_min=(dgdy + dgdx * dxdy_min); -; dgdl_max=dgdl_min + dgdx; + mov eax,[ebx+offs_zbup_g] + mov [g1],eax ;g1 = l1.g + mov eax,[dgdx] + imul eax,[dxdy_min] + add eax,[dgdy] + mov [dgdl_min],eax ;dgdl_min = (dgdy +dgdx*dxdy_min) + add eax,[dgdx] + mov [dgdl_max],eax ;dgdl_max = dgdl_min +dgdx + + mov eax,[ebx+offs_zbup_b] + mov [b1],eax ;b1 = l1.b + mov eax,[dbdx] + imul eax,[dxdy_min] + add eax,[dbdy] + mov [dbdl_min],eax ;dbdl_min = (dbdy +dbdx*dxdy_min) + add eax,[dbdx] + mov [dbdl_max],eax ;dbdl_max = dbdl_min +dbdx -; b1=l1->b; -; dbdl_min=(dbdy + dbdx * dxdy_min); -; dbdl_max=dbdl_min + dbdx; end if if INTERP_ST eq 1 -; s1=l1->s; -; dsdl_min=(dsdy + dsdx * dxdy_min); -; dsdl_max=dsdl_min + dsdx; + mov ebx,[l1] + mov eax,[ebx+offs_zbup_s] + mov [s1],eax ;s1 = l1.s + mov eax,[dsdx] + imul eax,[dxdy_min] + add eax,[dsdy] + mov [dsdl_min],eax ;dsdl_min = (dsdy +dsdx*dxdy_min) + add eax,[dsdx] + mov [dsdl_max],eax ;dsdl_max = dsdl_min +dsdx -; t1=l1->t; -; dtdl_min=(dtdy + dtdx * dxdy_min); -; dtdl_max=dtdl_min + dtdx; + mov eax,[ebx+offs_zbup_t] + mov [t1],eax ;t1 = l1.t + mov eax,[dtdx] + imul eax,[dxdy_min] + add eax,[dtdy] + mov [dtdl_min],eax ;dtdl_min = (dtdy +dtdx*dxdy_min) + add eax,[dtdx] + mov [dtdl_max],eax ;dtdl_max = dtdl_min +dtdx end if if INTERP_STZ eq 1 -; sz1=l1->sz; -; dszdl_min=(dszdy + dszdx * dxdy_min); -; dszdl_max=dszdl_min + dszdx; + mov ebx,[l1] + mov eax,[ebx+offs_zbup_sz] + mov [sz1],eax ;sz1 = l1.sz - преобразований нет, потому без сопроцессора + fild dword[dxdy_min] + fmul dword[dszdx] + fadd dword[dszdy] + fst dword[dszdl_min] ;dszdl_min = (dszdy +dszdx*dxdy_min) + fadd dword[dszdx] + fstp dword[dszdl_max] ;dszdl_max = dszdl_min +dszdx -; tz1=l1->tz; -; dtzdl_min=(dtzdy + dtzdx * dxdy_min); -; dtzdl_max=dtzdl_min + dtzdx; + mov eax,[ebx+offs_zbup_tz] + mov [tz1],eax ;tz1 = l1.tz - преобразований нет, потому без сопроцессора + fild dword[dxdy_min] + fmul dword[dtzdx] + fadd dword[dtzdy] + fst dword[dtzdl_min] ;dtzdl_min = (dtzdy +dtzdx*dxdy_min) + fadd dword[dtzdx] + fstp dword[dtzdl_max] ;dtzdl_max = dtzdl_min +dtzdx end if -; } + .end_upd_l: -; /* compute values for the right edge */ + ; compute values for the right edge -; if (update_right) { -; dx2 = (pr2->x - pr1->x); -; dy2 = (pr2->y - pr1->y); -; if (dy2>0) -; dx2dy2 = ( dx2 << 16) / dy2; -; else -; dx2dy2 = 0; -; x2 = pr1->x << 16; -; } + cmp dword[update_right],0 ;if(update_right) + je .end_upd_r + mov ebx,[pr1] + mov ecx,[pr2] + mov eax,[ecx+offs_zbup_x] + sub eax,[ebx+offs_zbup_x] + mov [dx2],eax ;dx2 = pr2.x - pr1.x + mov eax,[ecx+offs_zbup_y] + sub eax,[ebx+offs_zbup_y] + mov [dy2],eax ;dy2 = pr2.y - pr1.y + cmp eax,0 ;if (dy2 > 0) + jle .els_4 + mov eax,[dx2] + shl eax,16 + xor edx,edx + div dword[dy2] ;eax = (dx2 << 16) / dy2 + jmp .end_4 + .els_4: + xor eax,eax + .end_4: + mov [dx2dy2],eax + mov eax,[ebx+offs_zbup_x] + shl eax,16 + mov [x2],eax ; x2 = pr1.x << 16 + .end_upd_r: -; /* we draw all the scan line of the part */ + ; we draw all the scan line of the part -; while (nb_lines>0) { -; nb_lines--; -;#ifndef DRAW_LINE -; /* generic draw line */ -; { -; register PIXEL *pp; -; register int n; + .beg_w_lin: + cmp dword[nb_lines],0 ;while (nb_lines>0) + jle .end_w_lin + dec dword[nb_lines] +if DRAW_LINE_M eq 1 + DRAW_LINE 1 +else + ; generic draw line + mov eax,[x2] + shr eax,16 + mov edi,[x1] + sub eax,edi + mov [n],eax ;n = (x2 >> 16) - x1 + imul edi,PSZB + add edi,[pp1] ;pp = pp1 + x1 * PSZB if INTERP_Z eq 1 -; register unsigned short *pz; -; register unsigned int z,zz; + mov eax,[x1] + shl eax,1 + add eax,[pz1] + mov [pz],eax + mov eax,[z1] + mov [z],eax end if if INTERP_RGB eq 1 -; register unsigned int or1,og1,ob1; + mov eax,[r1] + mov [or1],eax + mov eax,[g1] + mov [og1],eax + mov eax,[b1] + mov [ob1],eax end if if INTERP_ST eq 1 -; register unsigned int s,t; + mov eax,[s1] + mov [s],eax + mov eax,[t1] + mov [t],eax end if if INTERP_STZ eq 1 -; float sz,tz; + mov eax,[sz1] + mov [s_z],eax + mov eax,[tz1] + mov [t_z],eax end if -; n=(x2 >> 16) - x1; -; pp=(PIXEL *)((char *)pp1 + x1 * PSZB); + .cycle_1: ;while (n>=3) + PUT_PIXEL 0 + PUT_PIXEL 1 + PUT_PIXEL 2 + PUT_PIXEL 3 if INTERP_Z eq 1 -; pz=pz1+x1; -; z=z1; + add dword[pz],8 ;=4*sizeof(uint) end if -if INTERP_RGB eq 1 -; or1 = r1; -; og1 = g1; -; ob1 = b1; -end if -if INTERP_ST eq 1 -; s=s1; -; t=t1; -end if -if INTERP_STZ eq 1 -; sz=sz1; -; tz=tz1; -end if -; while (n>=3) { -; PUT_PIXEL(0); -; PUT_PIXEL(1); -; PUT_PIXEL(2); -; PUT_PIXEL(3); + add edi,4*PSZB + sub dword[n],4 + cmp dword[n],3 + jge .cycle_1 + .cycle_2: ;while (n>=0) + PUT_PIXEL 0 if INTERP_Z eq 1 -; pz+=4; + add dword[pz],2 ;=sizeof(uint) end if -; pp=(PIXEL *)((char *)pp + 4 * PSZB); -; n-=4; -; } -; while (n>=0) { -; PUT_PIXEL(0); -if INTERP_Z eq 1 -; pz+=1; + add edi,PSZB + dec dword[n] + cmp dword[n],0 + jge .cycle_2 end if -; pp=(PIXEL *)((char *)pp + PSZB); -; n-=1; -; } -; } -;#else -; DRAW_LINE(); -;#endif -; -; /* left edge */ -; error+=derror; -; if (error > 0) { -; error-=0x10000; -; x1+=dxdy_max; + + ; left edge + mov eax,[derror] + add [error],eax + cmp eax,0 ;if (error > 0) + jle .els_er + sub dword[error],0x10000 + mov eax,[dxdy_max] + add [x1],eax if INTERP_Z eq 1 -; z1+=dzdl_max; + mov eax,[dzdl_max] + add [z1],eax end if if INTERP_RGB eq 1 -; r1+=drdl_max; -; g1+=dgdl_max; -; b1+=dbdl_max; + mov eax,[drdl_max] + add [r1],eax + mov eax,[dgdl_max] + add [g1],eax + mov eax,[dbdl_max] + add [b1],eax end if if INTERP_ST eq 1 -; s1+=dsdl_max; -; t1+=dtdl_max; + mov eax,[dsdl_max] + add [s1],eax + mov eax,[dtdl_max] + add [t1],eax end if if INTERP_STZ eq 1 -; sz1+=dszdl_max; -; tz1+=dtzdl_max; + fld dword[dszdl_max] + fadd dword[sz1] + fstp dword[sz1] + fld dword[dtzdl_max] + fadd dword[tz1] + fstp dword[tz1] end if -; } else { -; x1+=dxdy_min; + jmp .end_er + .els_er: + mov eax,[dxdy_min] + add [x1],eax if INTERP_Z eq 1 -; z1+=dzdl_min; + mov eax,[dzdl_min] + add [z1],eax end if if INTERP_RGB eq 1 -; r1+=drdl_min; -; g1+=dgdl_min; -; b1+=dbdl_min; + mov eax,[drdl_min] + add [r1],eax + mov eax,[dgdl_min] + add [g1],eax + mov eax,[dbdl_min] + add [b1],eax end if if INTERP_ST eq 1 -; s1+=dsdl_min; -; t1+=dtdl_min; + mov eax,[dsdl_min] + add [s1],eax + mov eax,[dtdl_min] + add [t1],eax end if if INTERP_STZ eq 1 -; sz1+=dszdl_min; -; tz1+=dtzdl_min; + fld dword[dszdl_min] + fadd dword[sz1] + fstp dword[sz1] + fld dword[dtzdl_min] + fadd dword[tz1] + fstp dword[tz1] end if -; } -; -; /* right edge */ -; x2+=dx2dy2; -; -; /* screen coordinates */ -; pp1=(PIXEL *)((char *)pp1 + zb->linesize); -; pz1+=zb->xsize; -; } -; } -;} + .end_er: + + ; right edge + mov eax,[dx2dy2] + add [x2],eax + + ; screen coordinates + mov ebx,[zb] + mov eax,[ebx+offs_zbuf_linesize] + add [pp1],eax + mov eax,[ebx+offs_zbuf_xsize] + shl eax,1 + add [pz1],eax + jmp .beg_w_lin + .end_w_lin: + inc dword[part] + cmp dword[part],2 + jl .cycle_0 + .end_f: + ret +endp restore INTERP_Z restore INTERP_RGB restore INTERP_ST restore INTERP_STZ +restore DRAW_LINE_M purge DRAW_INIT -purge DRAW_LINE +purge DRAW_LINE purge PUT_PIXEL