;CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;ROUND equ 8
;Ext = NON
;MMX = 1
;NON = 0
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
;------- Procedure draws bump triangle with texture, I use -------------
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
;--------I calc texture pixel by this way: col1*col2/256 ---------------
bump_tex_triangle_z:
;------------------in - eax - x1 shl 16 + y1 -----------
;---------------------- ebx - x2 shl 16 + y2 -----------
;---------------------- ecx - x3 shl 16 + y3 -----------
;---------------------- edx - pointer to bump map-------
;---------------------- esi - pointer to env map--------
;---------------------- edi - pointer to screen buffer--
;---------------------- stack : bump coordinates--------
;----------------------         environment coordinates-
;----------------------         Z position coordinates--
;----------------------         pointer to Z buffer-----
;----------------------         pointer to texture------
;----------------------         texture coordinates-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1	equ ebp+4   ; procedure don't save registers !!!
.b_y1	equ ebp+6   ; each coordinate as word
.b_x2	equ ebp+8
.b_y2	equ ebp+10	 ; b - bump map coords
.b_x3	equ ebp+12	 ; e - env map coords
.b_y3	equ ebp+14
.e_x1	equ ebp+16
.e_y1	equ ebp+18
.e_x2	equ ebp+20
.e_y2	equ ebp+22
.e_x3	equ ebp+24
.e_y3	equ ebp+26
.z1	equ word[ebp+28]
.z2	equ word[ebp+30]
.z3	equ word[ebp+32]
.z_buff equ dword[ebp+34]	; pointer to Z-buffer
.tex_ptr equ dword[ebp+38]	; ptr to texture
.t_x1	equ ebp+42		; texture coords
.t_y1	equ ebp+44
.t_x2	equ ebp+46
.t_y2	equ ebp+48
.t_x3	equ ebp+50
.t_y3	equ ebp+52



.t_bmap equ dword[ebp-4]	; pointer to bump map
.t_emap equ dword[ebp-8]	; pointer to env map
.x1	equ word[ebp-10]
.y1	equ word[ebp-12]
.x2	equ word[ebp-14]
.y2	equ word[ebp-16]
.x3	equ word[ebp-18]
.y3	equ word[ebp-20]

if 0 ;Ext <= SSE2

.dx12	equ dword[edi-4]
.dz12	equ	 [edi-8]
.dbx12	equ dword[edi-12]
.dby12	equ	 [edi-16]
.dex12	equ dword[edi-20]
.dey12	equ	 [edi-24]
.dtx12	equ dword[edi-28]
.dty12	equ	 [edi-32]

.dx13  equ dword[ebp-52-4*1]
.dz13  equ	[ebp-52-4*2]
.dbx13 equ dword[ebp-52-4*3]
.dby13 equ	[ebp-52-4*4]
.dex13 equ dword[ebp-52-4*5]
.dey13 equ	[ebp-52-4*6]
.dtx13 equ dword[ebp-52-4*7]
.dty13 equ	[ebp-52-4*8]


.dx23  equ dword[ebp-(52+4*9)]
.dz23  equ	[ebp-(52+4*10)]
.dbx23 equ dword[ebp-(52+4*11)]
.dby23 equ	[ebp-(52+4*12)]
.dex23 equ dword[ebp-(52+4*13)]
.dey23 equ	[ebp-(52+4*14)]
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ	[ebp-(52+4*16)]

else

.dx12	equ dword[ebp-24]
.dz12	equ	 [ebp-28]
.dbx12	equ dword[ebp-32]
.dby12	equ	 [ebp-36]
.dex12	equ dword[ebp-40]
.dey12	equ	 [ebp-44]
.dtx12	equ dword[ebp-48]
.dty12	equ	 [ebp-52]

.dx13  equ dword[ebp-52-4*1]
.dz13  equ	[ebp-52-4*2]
.dbx13 equ dword[ebp-52-4*3]
.dby13 equ	[ebp-52-4*4]
.dex13 equ dword[ebp-52-4*5]
.dey13 equ	[ebp-52-4*6]
.dtx13 equ dword[ebp-52-4*7]
.dty13 equ	[ebp-52-4*8]


.dx23  equ dword[ebp-(52+4*9)]
.dz23  equ	[ebp-(52+4*10)]
.dbx23 equ dword[ebp-(52+4*11)]
.dby23 equ	[ebp-(52+4*12)]
.dex23 equ dword[ebp-(52+4*13)]
.dey23 equ	[ebp-(52+4*14)]
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ	[ebp-(52+4*16)]

end if

if Ext < SSE

.cx1   equ dword[ebp-(52+4*17)] 	; current variables
.cz1   equ	[ebp-(52+4*18)]
.cx2   equ dword[ebp-(52+4*19)]
.cz2   equ	[ebp-(52+4*20)]
.cbx1  equ dword[ebp-(52+4*21)]
.cby1  equ	[ebp-(52+4*22)]
.cbx2  equ dword[ebp-(52+4*23)]
.cby2  equ	[ebp-(52+4*24)]
.cex1  equ dword[ebp-(52+4*25)]
.cey1  equ	[ebp-(52+4*26)]
.cex2  equ dword[ebp-(52+4*27)]
.cey2  equ	[ebp-(52+4*28)]

.ctx1  equ dword[ebp-(52+4*29)]
.cty1  equ	[ebp-(52+4*30)]
.ctx2  equ dword[ebp-(52+4*31)]
.cty2  equ	[ebp-(52+4*32)]

else

.cx1   equ dword[ebp-(52+4*17)] 	; current variables
.cz1   equ	[ebp-(52+4*18)]
.cbx1  equ dword[ebp-(52+4*19)]
.cby1  equ	[ebp-(52+4*20)]
.cex1  equ dword[ebp-(52+4*21)]
.cey1  equ	[ebp-(52+4*22)]
.ctx1  equ dword[ebp-(52+4*23)]
.cty1  equ	[ebp-(52+4*24)]

.cx2   equ dword[ebp-(52+4*25)]
.cz2   equ	[ebp-(52+4*26)]
.cbx2  equ dword[ebp-(52+4*27)]
.cby2  equ	[ebp-(52+4*28)]
.cex2  equ dword[ebp-(52+4*29)]
.cey2  equ	[ebp-(52+4*30)]
.ctx2  equ dword[ebp-(52+4*31)]
.cty2  equ	[ebp-(52+4*32)]

end if
       cld
       mov     ebp,esp
       push    edx	  ; store bump map
       push    esi	  ; store e. map
     ; sub     esp,120
 .sort3:		  ; sort triangle coordinates...
       cmp     ax,bx
       jle     .sort1
       xchg    eax,ebx
       mov     edx,dword[.b_x1]
       xchg    edx,dword[.b_x2]
       mov     dword[.b_x1],edx
       mov     edx,dword[.e_x1]
       xchg    edx,dword[.e_x2]
       mov     dword[.e_x1],edx
       mov     edx,dword[.t_x1]
       xchg    edx,dword[.t_x2]
       mov     dword[.t_x1],edx
       mov     dx,.z1
       xchg    dx,.z2
       mov     .z1,dx
 .sort1:
       cmp	bx,cx
       jle	.sort2
       xchg	ebx,ecx
       mov	edx,dword[.b_x2]
       xchg	edx,dword[.b_x3]
       mov	dword[.b_x2],edx
       mov	edx,dword[.e_x2]
       xchg	edx,dword[.e_x3]
       mov	dword[.e_x2],edx
       mov	edx,dword[.t_x2]
       xchg	edx,dword[.t_x3]
       mov	dword[.t_x2],edx
       mov     dx,.z2
       xchg    dx,.z3
       mov     .z2,dx
       jmp	.sort3
 .sort2:
       push	eax	; store triangle coords in variables
       push	ebx
       push	ecx
	 mov	  edx,80008000h  ; eax,ebx,ecx are ANDd together into edx which means that
	 and	  edx,ebx	 ; if *all* of them are negative a sign flag is raised
	 and	  edx,ecx
	 and	  edx,eax
	 test	  edx,80008000h  ; Check both X&Y at once
	 jne	  .loop23_done
    ;   mov     edx,eax         ; eax,ebx,ecx are ORd together into edx which means that
    ;   or      edx,ebx         ; if any *one* of them is negative a sign flag is raised
    ;   or      edx,ecx
    ;   test    edx,80000000h   ; Check only X
    ;   jne     .loop23_done

    ;   cmp     .x1,SIZE_X    ; {
    ;   jg      .loop23_done
    ;   cmp     .x2,SIZE_X     ; This can be optimized with effort
    ;   jg      .loop23_done
    ;   cmp     .x3,SIZE_X
    ;   jg      .loop23_done    ; {


       mov	bx,.y2	     ; calc delta 12
       sub	bx,.y1
       jnz	.bt_dx12_make
if 0 ;Ext >= SSE2
       pxor	xmm0,xmm0
       movups	.dty12,xmm0
       movups	.dey12,xmm0
       sub	esp,16
else
       mov	ecx,8
       xor	edx,edx
     @@:
       push	edx   ;dword 0
       loop	@b
end if
       jmp	.bt_dx12_done
 .bt_dx12_make:
       movsx	ebx,bx


if Ext>=SSE
       sub	 esp,32
   ;    mov       eax,256
       cvtsi2ss  xmm4,[i255d]
       cvtsi2ss  xmm3,ebx ;rcps
if 0 ;Ext >= SSE2
       mov	 edi,ebp
       sub	 edi,512
       or	 edi,0x0000000f
end if
       divss	 xmm3,xmm4
       shufps	 xmm3,xmm3,0

       movd	 mm0,[.b_x1]
       movd	 mm1,[.b_x2]
       movd	 mm2,[.e_x1]
       movd	 mm3,[.e_x2]

       pxor	  mm4,mm4
       punpcklwd  mm0,mm4
       punpcklwd  mm1,mm4
       punpcklwd  mm2,mm4
       punpcklwd  mm3,mm4

       psubd	  mm1,mm0
       psubd	  mm3,mm2

       cvtpi2ps  xmm1,mm1
       movlhps	 xmm1,xmm1
       cvtpi2ps  xmm1,mm3

       divps	 xmm1,xmm3   ;xmm1--> | dby | dbx | dey | dex |

       shufps	 xmm1,xmm1,10110001b
			     ;xmm1--> | dbx | dby | dex | dey |
;1       movups    .dey12,xmm1
       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
       movhlps	 xmm1,xmm1
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
       movq	 .dey12,mm0
       movq	 .dby12,mm1
;-------------
  ;    pxor      mm0,mm0
  ;    pxor      mm1,mm1
   ;/   pinsrw    mm0,.z1,1
   ;/   pinsrw    mm0,.x1,0
   ;/   pinsrw    mm1,.z2,1
   ;/   pinsrw    mm1,.x2,0
       mov	 ax,.z2
       sub	 ax,.z1
       cwde

       mov	dx,.x2
       sub	dx,.x1
       movsx	edx,dx

   ;/    movd      mm1,eax

   ;/    punpcklwd  mm0,mm4
   ;/    punpcklwd  mm1,mm4

  ;     cvtpi2ps   xmm1,mm1
  ;     cvtpi2ps   xmm2,mm0
  ;     subps      xmm1,xmm2

   ;/   psubd      mm1,mm0

       movd	  mm2,[.t_x1]
       movd	  mm3,[.t_x2]

       punpcklwd  mm2,mm4
       punpcklwd  mm3,mm4
       psubd	  mm3,mm2

   ;/  cvtpi2ps  xmm1,mm1
       cvtsi2ss  xmm1,eax
       movlhps	 xmm1,xmm1
       cvtsi2ss  xmm1,edx
   ;    movss     xmm1,xmm4
       shufps	 xmm1,xmm1,00101111b
       cvtpi2ps  xmm1,mm3

       divps	 xmm1,xmm3   ; xmm1--> | dx | dz | dty | dtx |

       shufps	 xmm1,xmm1,11100001b
			     ; xmm1--> | dx | dz | dtx | dty |
;1       movlps    .dty12,xmm1
;1       movhps    .dz12,xmm1
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
       movhlps	 xmm1,xmm1
       cvtps2pi  mm1,xmm1
       movq	 .dty12,mm0
       movq	 .dz12,mm1
;----
;       mov       ax,.z2
;       sub       ax,.z1
;       cwde
;       mov       bx,.x2
;       sub       bx,.x1
;       movsx     ebx,bx
;       movd      mm1,eax
;       psllq     mm1,32
;       movd      mm1,ebx

;;       push      ebx
;;       push      eax
;;       movq      mm1,[esp]
;;       add       esp,8
;;;       mov       ax,.z1
;;;       mov       bx,.z2
;;;       shl       eax,16
;;;       shl       ebx,16
;;;       mov       ax,.x1
;;;       mov       bx,.x2
;       movd       mm2,[.t_x1]
;       movd       mm3,[.t_x2]
;;       movd      mm0,eax
;;       movd      mm1,ebx

;       pxor       mm4,mm4
;;       punpcklwd  mm0,mm4
;;       punpcklwd  mm1,mm4
;       punpcklwd  mm2,mm4
;       punpcklwd  mm3,mm4

;;       psubd    mm1,mm0
;       psubd      mm3,mm2


;       cvtpi2ps  xmm1,mm1
;       movlhps   xmm1,xmm1
;       cvtpi2ps  xmm1,mm3

;       divps     xmm1,xmm3   ; xmm1--> | dz | dx | dty | dtx |

;       shufps    xmm1,xmm1,10110001b
			     ; xmm1--> | dx | dz | dtx | dty |
;       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
;       movhlps   xmm1,xmm1
;       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx | dz |
;       movq      .dty12,mm0
;       movq      .dz12,mm1
else
       mov	ax,.x2
       sub	ax,.x1
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dx12,eax
       push	 eax

       mov     ax,.z2
       sub     ax,.z1
       cwde
       shl     eax,CATMULL_SHIFT
       cdq
       idiv    ebx
       push    eax

       mov	ax,word[.b_x2]
       sub	ax,word[.b_x1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dbx12,eax
       push	 eax

       mov	ax,word[.b_y2]
       sub	ax,word[.b_y1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dby12,eax
       push	 eax

       mov	ax,word[.e_x2]
       sub	ax,word[.e_x1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dex12,eax
       push	 eax

       mov	ax,word[.e_y2]
       sub	ax,word[.e_y1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dey12,eax
       push	 eax

       mov	ax,word[.t_x2]
       sub	ax,word[.t_x1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dtx12,eax
       push	 eax

       mov	ax,word[.t_y2]
       sub	ax,word[.t_y1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dty12,eax
       push	 eax
end if
   .bt_dx12_done:

       mov	bx,.y3	     ; calc delta13
       sub	bx,.y1
       jnz	.bt_dx13_make
       mov	ecx,8
       xor	edx,edx
     @@:
       push	edx   ;dword 0
       loop	@b
       jmp	.bt_dx13_done
 .bt_dx13_make:
       movsx	ebx,bx

if Ext>=SSE

       sub	 esp,32
   ;    mov       eax,256
       cvtsi2ss  xmm4,[i255d]
       cvtsi2ss  xmm3,ebx	     ;rcps
       divss	 xmm3,xmm4
       shufps	 xmm3,xmm3,0

       movd	 mm0,[.b_x1]
       movd	 mm1,[.b_x3]
       movd	 mm2,[.e_x1]
       movd	 mm3,[.e_x3]

       pxor	  mm4,mm4
       punpcklwd  mm0,mm4
       punpcklwd  mm1,mm4
       punpcklwd  mm2,mm4
       punpcklwd  mm3,mm4

       psubd	  mm1,mm0
       psubd	  mm3,mm2

       cvtpi2ps  xmm1,mm1
       movlhps	 xmm1,xmm1
       cvtpi2ps  xmm1,mm3

       divps	 xmm1,xmm3   ;xmm1--> | dby | dbx | dey | dex |

       shufps	 xmm1,xmm1,10110001b
			     ;xmm1--> | dbx | dby | dex | dey |
;1       movups    .dey13,xmm1

       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
       movhlps	 xmm1,xmm1
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
       movq	 .dey13,mm0
       movq	 .dby13,mm1

       mov	 ax,.z3
       sub	 ax,.z1
       cwde

       mov	dx,.x3
       sub	dx,.x1
       movsx	edx,dx

       movd	  mm2,[.t_x1]
       movd	  mm3,[.t_x3]

       punpcklwd  mm2,mm4
       punpcklwd  mm3,mm4
       psubd	  mm3,mm2

       cvtsi2ss  xmm1,eax
       movlhps	 xmm1,xmm1
       cvtsi2ss  xmm1,edx
       shufps	 xmm1,xmm1,00101111b
       cvtpi2ps  xmm1,mm3

       divps	 xmm1,xmm3   ; xmm1--> | dx | dz | dty | dtx |

       shufps	 xmm1,xmm1,11100001b
			     ; xmm1--> | dx | dz | dtx | dty |
;1       movlps    .dty13,xmm1
;1       movhps    .dz13,xmm1

       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
       movhlps	 xmm1,xmm1
       cvtps2pi  mm1,xmm1
       movq	 .dty13,mm0
       movq	 .dz13,mm1

else

       mov	ax,.x3
       sub	ax,.x1
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dx13,eax
       push	 eax

       mov     ax,.z3
       sub     ax,.z1
       cwde
       shl     eax,CATMULL_SHIFT
       cdq
       idiv    ebx
  ;    mov    .dz13,eax
       push    eax


       mov	ax,word[.b_x3]
       sub	ax,word[.b_x1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dbx13,eax
       push	 eax

       mov	ax,word[.b_y3]
       sub	ax,word[.b_y1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dby13,eax
       push	 eax

       mov	ax,word[.e_x3]
       sub	ax,word[.e_x1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dex13,eax
       push	 eax

       mov	ax,word[.e_y3]
       sub	ax,word[.e_y1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dey13,eax
       push	 eax

       mov	ax,word[.t_x3]
       sub	ax,word[.t_x1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dtx13,eax
       push	 eax

       mov	ax,word[.t_y3]
       sub	ax,word[.t_y1]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dty13,eax
       push	 eax
end if
   .bt_dx13_done:

       mov	bx,.y3	     ; calc delta23
       sub	bx,.y2
       jnz	.bt_dx23_make
       mov	ecx,8
       xor	edx,edx
     @@:
       push	edx   ;dword 0
       loop	@b
       jmp	.bt_dx23_done
 .bt_dx23_make:
       movsx	ebx,bx

if Ext>=SSE

       sub	 esp,32
   ;    mov       eax,256
       cvtsi2ss  xmm4,[i255d]
       cvtsi2ss  xmm3,ebx	     ;rcps
       divss	 xmm3,xmm4
       shufps	 xmm3,xmm3,0

       movd	 mm0,[.b_x2]
       movd	 mm1,[.b_x3]
       movd	 mm2,[.e_x2]
       movd	 mm3,[.e_x3]

       pxor	  mm4,mm4
       punpcklwd  mm0,mm4
       punpcklwd  mm1,mm4
       punpcklwd  mm2,mm4
       punpcklwd  mm3,mm4

       psubd	  mm1,mm0
       psubd	  mm3,mm2

       cvtpi2ps  xmm1,mm1
       movlhps	 xmm1,xmm1
       cvtpi2ps  xmm1,mm3

       divps	 xmm1,xmm3   ;xmm1--> | dby | dbx | dey | dex |

       shufps	 xmm1,xmm1,10110001b
			     ;xmm1--> | dbx | dby | dex | dey |
;1       movups    .dey23,xmm1

       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
       movhlps	 xmm1,xmm1
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
       movq	 .dey23,mm0
       movq	 .dby23,mm1

       mov	 ax,.z3
       sub	 ax,.z2
       cwde

       mov	dx,.x3
       sub	dx,.x2
       movsx	edx,dx

       movd	  mm2,[.t_x2]
       movd	  mm3,[.t_x3]

       punpcklwd  mm2,mm4
       punpcklwd  mm3,mm4
       psubd	  mm3,mm2

       cvtsi2ss  xmm1,eax
       movlhps	 xmm1,xmm1
       cvtsi2ss  xmm1,edx
       shufps	 xmm1,xmm1,00101111b
       cvtpi2ps  xmm1,mm3

       divps	 xmm1,xmm3   ; xmm1--> | dx | dz | dty | dtx |

       shufps	 xmm1,xmm1,11100001b
			    ; xmm1--> | dx | dz | dtx | dty |
;       movlps    .dty23,xmm1
;       movhps    .dz23,xmm1
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
       movhlps	 xmm1,xmm1
       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx  |  dz |
       movq	 .dty23,mm0
       movq	 .dz23,mm1


else
       mov	ax,.x3
       sub	ax,.x2
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dx23,eax
       push	 eax

       mov     ax,.z3
       sub     ax,.z2
       cwde
       shl     eax,CATMULL_SHIFT
       cdq
       idiv    ebx
     ; mov     .dz23,eax
       push    eax

       mov	ax,word[.b_x3]
       sub	ax,word[.b_x2]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dbx23,eax
       push	 eax

       mov	ax,word[.b_y3]
       sub	ax,word[.b_y2]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dby23,eax
       push	 eax

       mov	ax,word[.e_x3]
       sub	ax,word[.e_x2]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dex23,eax
       push	 eax

       mov	ax,word[.e_y3]
       sub	ax,word[.e_y2]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dey23,eax
       push	 eax


       mov	ax,word[.t_x3]
       sub	ax,word[.t_x2]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dtx23,eax
       push	 eax

       mov	ax,word[.t_y3]
       sub	ax,word[.t_y2]
       cwde
       shl	eax,ROUND
       cdq
       idiv	ebx
 ;     mov      .dty23,eax
       push	 eax
end if
      ;  sub     esp,40
   .bt_dx23_done:
       sub	 esp,64

       movsx	eax,.x1
       shl	eax,ROUND
       mov	.cx1,eax
       mov	.cx2,eax
  ;     push     eax
  ;     push     eax

       movsx	ebx,word[.b_x1]
       shl	ebx,ROUND
       mov	.cbx1,ebx
       mov	.cbx2,ebx
      ; push     ebx
      ; push     ebx

       movsx	ecx,word[.b_y1]
       shl	ecx,ROUND
       mov	.cby1,ecx
       mov	.cby2,ecx
      ; push     ecx
      ; push     ecx

       movsx	edx,word[.e_x1]
       shl	edx,ROUND
       mov	.cex1,edx
       mov	.cex2,edx
    ;   push     edx
    ;   push     edx

       movsx	eax,word[.e_y1]
       shl	eax,ROUND
       mov	.cey1,eax
       mov	.cey2,eax
    ;   push     eax
    ;   push     eax

       movsx	ebx,.z1
       shl	ebx,CATMULL_SHIFT
       mov	.cz1,ebx
       mov	.cz2,ebx
   ;    push     ebx
   ;    push     ebx

      ; sub      esp,16
       movsx	ecx,word[.t_x1]
       shl	ecx,ROUND
       mov	.ctx1,ecx
       mov	.ctx2,ecx
       ;push     ecx
       ;push     ecx

       movsx	edx,word[.t_y1]
       shl	edx,ROUND
       mov	.cty1,edx
       mov	.cty2,edx
      ; push     edx
      ; push     edx

if Ext >= SSE2
       movups  xmm0,.cby1
       movups  xmm1,.cty1
       movups  xmm2,.cby2
       movups  xmm3,.cty2
       movups  xmm4,.dby13
       movups  xmm5,.dty13
       movups  xmm6,.dby12
       movups  xmm7,.dty12
       .scby1  equ [edi]
       .scty1  equ [edi+16]
       .scby2  equ [edi+32]
       .scty2  equ [edi+48]
       .sdby13 equ [edi+64]
       .sdty13 equ [edi+80]
       .sdby12 equ [edi+96]
       .sdty12 equ [edi+128]
       push    edi
       mov     edi,sse_repository
       movaps  .scby1,xmm0
       movaps  .scty1,xmm1
       movaps  .scby2,xmm2
       movaps  .scty2,xmm3
       movaps  .sdby13,xmm4
       movaps  .sdty13,xmm5
       movaps  .sdby12,xmm6
       movaps  .sdty12,xmm7
       pop     edi

end if
       movsx	ecx,.y1
       cmp	cx,.y2
       jge	.loop12_done
  .loop12:
;if Ext >= SSE2
;       fxsave  [sse_repository]
;end if
       call	.call_line
if Ext >= SSE2
;       fxrstor [sse_repository]
       movups  xmm0,.cby1
       movups  xmm1,.cty1
       movups  xmm2,.cby2
       movups  xmm3,.cty2
    ;   movups  xmm4,.dby13
    ;   movups  xmm5,.dty13
    ;   movups  xmm6,.dby12
    ;   movups  xmm7,.dty12
    ;   paddd   xmm0,xmm4
    ;   paddd   xmm1,xmm5
    ;   paddd   xmm2,xmm6
    ;   paddd   xmm3,xmm7
       push    edi
       mov     edi,sse_repository
       paddd   xmm0,.sdby13
       paddd   xmm1,.sdty13
       paddd   xmm2,.sdby12
       paddd   xmm3,.sdty12
       pop     edi
       movups  .cby1,xmm0
       movups  .cty1,xmm1
       movups  .cby2,xmm2
       movups  .cty2,xmm3
end if

if (Ext = MMX) | (Ext = SSE)
       movq	mm0,.cby2
       movq	mm1,.cby1
       movq	mm2,.cey2
       movq	mm3,.cey1
       movq	mm4,.cty1
       movq	mm5,.cty2
       movq	mm6,.cz1
       movq	mm7,.cz2
       paddd	mm0,.dby12
       paddd	mm1,.dby13
       paddd	mm2,.dey12
       paddd	mm3,.dey13
       paddd	mm4,.dty13
       paddd	mm5,.dty12
       paddd	mm6,.dz13
       paddd	mm7,.dz12
       movq	.cby2,mm0
       movq	.cby1,mm1
       movq	.cey1,mm3
       movq	.cey2,mm2
       movq	.cty1,mm4
       movq	.cty2,mm5
       movq	.cz1,mm6
       movq	.cz2,mm7
end if
if Ext = NON
       mov	edx,.dbx13
       add	.cbx1,edx
       mov	eax,.dbx12
       add	.cbx2,eax
       mov	ebx,.dby13
       add	.cby1,ebx
       mov	edx,.dby12
       add	.cby2,edx

       mov	eax,.dex13
       add	.cex1,eax
       mov	ebx,.dex12
       add	.cex2,ebx
       mov	edx,.dey13
       add	.cey1,edx
       mov	eax,.dey12
       add	.cey2,eax

       mov	eax,.dtx13
       add	.ctx1,eax
       mov	ebx,.dtx12
       add	.ctx2,ebx
       mov	edx,.dty13
       add	.cty1,edx
       mov	eax,.dty12
       add	.cty2,eax

       mov	eax,.dx13
       add	.cx1,eax
       mov	ebx,.dx12
       add	.cx2,ebx
       mov	ebx,.dz13
       add	.cz1,ebx
       mov	edx,.dz12
       add	.cz2,edx
end if
       inc	ecx
       cmp	cx,.y2
       jl	.loop12
    .loop12_done:

       movsx	ecx,.y2
       cmp	cx,.y3
       jge	.loop23_done


       movsx	eax,.z2
       shl	eax,CATMULL_SHIFT
       mov	.cz2,eax

       movsx	ebx,.x2
       shl	ebx,ROUND
       mov	.cx2,ebx

       movzx	edx,word[.b_x2]
       shl	edx,ROUND
       mov	.cbx2,edx

       movzx	eax,word[.b_y2]
       shl	eax,ROUND
       mov	.cby2,eax

       movzx	ebx,word[.e_x2]
       shl	ebx,ROUND
       mov	.cex2,ebx

       movzx	edx,word[.e_y2]
       shl	edx,ROUND
       mov	.cey2,edx

       movzx	eax,word[.t_x2]
       shl	eax,ROUND
       mov	.ctx2,eax

       movzx	ebx,word[.t_y2]
       shl	ebx,ROUND
       mov	.cty2,ebx
if Ext >= SSE2
       movups  xmm2,.cby2
       movups  xmm3,.cty2
   ;    movups  xmm4,.dby13
   ;    movups  xmm5,.dty13
       movups  xmm6,.dby23
       movups  xmm7,.dty23
;       .scby1  equ [edi]
;       .scty1  equ [edi+16]
;       .scby2  equ [edi+32]
;       .scty2  equ [edi+48]
;       .sdby13 equ [edi+64]
;       .sdty13 equ [edi+80]
       .sdby23 equ [edi+160]
       .sdty23 equ [edi+192]
       push    edi
       mov     edi,sse_repository
;       movaps  .scby1,xmm0
;       movaps  .scty1,xmm1
       movaps  .scby2,xmm2
       movaps  .scty2,xmm3
;       movaps  .sdby13,xmm4
;       movaps  .sdty13,xmm5
       movaps  .sdby23,xmm6
       movaps  .sdty23,xmm7
       pop     edi

end if

     .loop23:
;if Ext >= SSE2
;       fxsave  [sse_repository]
;end if
       call	.call_line

if Ext >= SSE2

       movups  xmm0,.cby1
       movups  xmm1,.cty1
       movups  xmm2,.cby2
       movups  xmm3,.cty2


       push    edi
       mov     edi,sse_repository
       paddd   xmm0,.sdby13
       paddd   xmm1,.sdty13
       paddd   xmm2,.sdby23
       paddd   xmm3,.sdty23
       pop     edi
       movups  .cby1,xmm0
       movups  .cty1,xmm1
       movups  .cby2,xmm2
       movups  .cty2,xmm3




;       fxrstor [sse_repository]
;       movups  xmm0,.cby1
;       movups  xmm1,.cty1
;       movups  xmm2,.cby2
;       movups  xmm3,.cty2
;       movups  xmm4,.dby13
;       movups  xmm5,.dty13
;       movups  xmm6,.dby23
;       movups  xmm7,.dty23
;       paddd   xmm0,xmm4
;       paddd   xmm1,xmm5
;       paddd   xmm2,xmm6
 ;      paddd   xmm3,xmm7
 ;      movups  .cby1,xmm0
 ;      movups  .cty1,xmm1
 ;      movups  .cby2,xmm2
 ;      movups  .cty2,xmm3
;
end if
if (Ext = MMX) | (Ext = SSE)
       movq	mm0,.cby2
       movq	mm1,.cby1
       movq	mm2,.cey2
       movq	mm3,.cey1
       movq	mm4,.cty1
       movq	mm5,.cty2
       movq	mm6,.cz1
       movq	mm7,.cz2
       paddd	mm0,.dby23
       paddd	mm1,.dby13
       paddd	mm2,.dey23
       paddd	mm3,.dey13
       paddd	mm4,.dty13
       paddd	mm5,.dty23
       paddd	mm6,.dz13
       paddd	mm7,.dz23
       movq	.cby2,mm0
       movq	.cby1,mm1
       movq	.cey2,mm2
       movq	.cey1,mm3
       movq	.cty1,mm4
       movq	.cty2,mm5
       movq	.cz1,mm6
       movq	.cz2,mm7
end if
If Ext = NON
       mov	edx,.dbx13
       add	.cbx1,edx
       mov	eax,.dbx23
       add	.cbx2,eax
       mov	ebx,.dby13
       add	.cby1,ebx
       mov	edx,.dby23
       add	.cby2,edx

       mov	eax,.dex13
       add	.cex1,eax
       mov	ebx,.dex23
       add	.cex2,ebx
       mov	edx,.dey13
       add	.cey1,edx
       mov	eax,.dey23
       add	.cey2,eax

       mov	eax,.dx13
       add	.cx1,eax
       mov	ebx,.dx23
       add	.cx2,ebx
       mov	ebx,.dz13
       add	.cz1,ebx
       mov	edx,.dz23
       add	.cz2,edx

       mov	eax,.dtx13
       add	.ctx1,eax
       mov	ebx,.dtx23
       add	.ctx2,ebx
       mov	edx,.dty13
       add	.cty1,edx
       mov	eax,.dty23
       add	.cty2,eax
end if
       inc	ecx
       cmp	cx,.y3
       jl	.loop23
    .loop23_done:

       mov	esp,ebp
ret   50

.call_line:

       pushad
       ; xmm0= cby1,cbx1,cz1,cx1
       ; xmm1= cty1,ctx1,cey1,cex1
if Ext >= SSE2
       sub	esp,8
       shufps	xmm1,xmm1,10110001b
       shufps	xmm3,xmm3,10110001b
       movlps	[esp],xmm1
else
       push	dword .cty1
       push	.ctx1
end if
       push	dword .cz1
if Ext>=SSE2
       sub	esp,8
       movlps	[esp],xmm3
else
       push	dword .cty2
       push	.ctx2
end if
       push	dword .cz2
if Ext>=SSE2
       sub	esp,32
       movhps	[esp+24],xmm3
       shufps	xmm2,xmm2,10110001b
       movlps	[esp+16],xmm2
       movhps	[esp+8],xmm1
       shufps	xmm0,xmm0,10110001b
       movlps	[esp],xmm0 ;================================

else
       push	dword .cey2
       push	.cex2
       push	dword .cby2
       push	.cbx2
       push	dword .cey1
       push	.cex1
       push	dword .cby1
       push	.cbx1
end if

       push	.tex_ptr
       push	.z_buff
       push	.t_emap
       push	.t_bmap

       push	ecx

       mov	eax,.cx1
       sar	eax,ROUND
       mov	ebx,.cx2
       sar	ebx,ROUND

       call	bump_tex_line_z

       popad
;end if
ret
bump_tex_line_z:
;--------------in: eax - x1
;--------------    ebx - x2
;--------------    edi - pointer to screen buffer
;stack - another parameters :
.y	equ dword [ebp+4]
.bmap	equ dword [ebp+8]	 ; bump map pointer
.emap	equ dword [ebp+12]	 ; env map pointer
.z_buff equ dword [ebp+16]	 ; z buffer
.tex_map equ dword [ebp+20]	 ; texture pointer

.bx1	equ  [ebp+24]	;   ---
.by1	equ  [ebp+28]  ;       |
.ex1	equ  [ebp+32]  ;       |
.ey1	equ  [ebp+36]  ;       |
.bx2	equ  [ebp+40]  ;       |
.by2	equ  [ebp+44]  ;       |>   b. map and e. map coords
.ex2	equ  [ebp+48]  ;       |>   shifted shl ROUND
.ey2	equ  [ebp+52]  ;   ---
.z2	equ  [ebp+56]
.tx2	equ  [ebp+60]
.ty2	equ  [ebp+64]
.z1	equ  [ebp+68]
.tx1	equ  [ebp+72]
.ty1	equ  [ebp+76]



.x1	equ [ebp-4]
.x2	equ [ebp-8]
.dbx	equ [ebp-12]
.dby	equ [ebp-16]
.dex	equ [ebp-20]
.dey	equ [ebp-24]
.dz	equ [ebp-28]
.dtx	equ [ebp-32]
.dty	equ [ebp-36]

.cbx	equ [ebp-40]
.cby	equ [ebp-44]
.cex	equ [ebp-48]
.cey	equ [ebp-52]
.cz	equ [ebp-56]
.czbuff equ [ebp-60]
.ctx	equ [ebp-64]
.cty	equ [ebp-68]
.c_scr	equ [ebp-72]

.temp1	equ	   ebp-80
.temp2	equ	   ebp-88
.temp3	equ	   ebp-76
.temp4	equ	   ebp-84
.temp5	equ	   ebp-92

	mov	ebp,esp

	mov	ecx,.y
	or	ecx,ecx
	jl	.bl_end
	movzx	edx,word[size_y_var]
	cmp	ecx,edx  ;SIZE_Y
	jge	.bl_end

	cmp	eax,ebx
	jl	.bl_ok
	je	.bl_end


if Ext=NON
	mov	edx,.bx1
	xchg	edx,.bx2
	mov	.bx1,edx
	mov	edx,.by1
	xchg	edx,.by2
	mov	.by1,edx

	mov	edx,.ex1
	xchg	edx,.ex2
	mov	.ex1,edx
	mov	edx,.ey1
	xchg	edx,.ey2
	mov	.ey1,edx

	mov	edx,.tx1
	xchg	edx,.tx2
	mov	.tx1,edx
	mov	edx,.ty1
	xchg	edx,.ty2
	mov	.ty1,edx
end if
if Ext = MMX
	movq	mm0,.bx1
	movq	mm1,.bx2
	movq	mm2,.ex1
	movq	mm3,.ex2
	movq	mm4,.tx1
	movq	mm5,.tx2
	movq	.bx2,mm0
	movq	.bx1,mm1
	movq	.ex1,mm3
	movq	.ex2,mm2
	movq	.tx1,mm5
	movq	.tx2,mm4
end if
if Ext>=SSE
	movups xmm0,.bx1
	movups xmm1,.bx2
	movups .bx1,xmm1
	movups .bx2,xmm0
	movq	mm0,.tx1
	movq	mm1,.tx2
	movq	.tx1,mm1
	movq	.tx2,mm0
end if
;if Ext>=SSE2
;        movaps  xmm4,xmm0
;        movaps  xmm0,xmm2
;        movaps  xmm2,xmm4
;        movaps  xmm5,xmm1
;        movaps  xmm1,xmm3
;        movaps  xmm3,xmm5
;else

	xchg	eax,ebx
	mov	edx,.z1
	xchg	edx,.z2
	mov	.z1,edx
;end if
  .bl_ok:
;if Ext >= SSE2
;        shufps  xmm0,xmm0,11100001b
;        shufps  xmm2,xmm2,11100001b
;        movlps  .bx1,xmm0
;        movlps  .bx2,xmm2


;        shufps  xmm0,xmm0,00011011b
;        shufps  xmm2,xmm2,00011011b
;        movd    eax,xmm0
;        movd    ebx,xmm2
;        shufps  xmm0,xmm0,11000110b
;        shufps  xmm2,xmm2,11000110b
;        movd    .z1,xmm0
;        movd    .z2,xmm2
;        shufps  xmm1,xmm1,10110001b
;        shufps  xmm3,xmm3,10110001b
;        movlps  .ex1,xmm1
;        movlps  .ex2,xmm2
;        movhps  .tx1,xmm1
;        movhps  .tx2,xmm2

;        xchg    eax,ebx
;        mov     edx,.z1
;        xchg    edx,.z2
;        mov     .z1,edx


;end if

	push	eax
	push	ebx	      ;store x1, x2
	movzx	ebx,word[size_x_var]
    ;    mov     eax,.x1
	cmp	dword .x1,ebx  ;dword .x1,SIZE_X
	jge	.bl_end
	cmp	dword .x2,0
	jle	.bl_end

	mov	ebx,.x2
	sub	ebx,.x1

if Ext>=SSE

       sub	 esp,28
       cvtsi2ss  xmm3,ebx	     ;rcps
       shufps	 xmm3,xmm3,0
; float using SSE variant  ::-->
;       movups    xmm0,.bx1  ; new
;       movups    xmm1,.bx2  ; new

       cvtpi2ps  xmm0,.bx1 ;mm0    ; variant fixed point
       movlhps	 xmm0,xmm0
       cvtpi2ps  xmm0,.ex1 ;mm2
       cvtpi2ps  xmm1,.bx2 ;mm1
       movlhps	 xmm1,xmm1
       cvtpi2ps  xmm1,.ex2 ;mm3
       subps	 xmm1,xmm0

       divps	 xmm1,xmm3

       shufps	 xmm1,xmm1,10110001b
;       movups    .dey,xmm1  ; new
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
       movhlps	 xmm1,xmm1
       cvtps2pi  mm1,xmm1
       movq	 .dey,mm0
       movq	 .dby,mm1

       movd	 mm2,.z1
       movd	 mm3,.z2

       cvtpi2ps  xmm0,.tx1 ;mm0
       movlhps	 xmm0,xmm0
       cvtpi2ps  xmm0,mm2
       cvtpi2ps  xmm1,.tx2 ;mm1
       movlhps	 xmm1,xmm1
       cvtpi2ps  xmm1,mm3
;       movups    xmm0,,z1  ; new
;       movups    xmm1,.z2  ; new
       subps	 xmm1,xmm0

       divps	 xmm1,xmm3

;       movups    .dz,xmm1  ;new

       shufps	 xmm1,xmm1,10110100b
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
       movhlps	 xmm1,xmm1
       cvtps2pi  mm1,xmm1
       movd	 .dz,mm0
       movq	 .dty,mm1

else

	mov	eax,.bx2       ; calc .dbx
	sub	eax,.bx1
	cdq
	idiv	ebx
	push	eax

	mov	eax,.by2       ; calc .dby
	sub	eax,.by1
	cdq
	idiv	ebx
	push	eax

	mov	eax,.ex2       ; calc .dex
	sub	eax,.ex1
	cdq
	idiv	ebx
	push	eax

	mov	eax,.ey2       ; calc .dey
	sub	eax,.ey1
	cdq
	idiv	ebx
	push	eax


	mov	eax,.z2        ; calc .dz
	sub	eax,.z1
	cdq
	idiv	ebx
	push	eax

	mov	eax,.tx2       ; calc .dtx
	sub	eax,.tx1
	cdq
	idiv	ebx
	push	eax

	mov	eax,.ty2       ; calc .dty
	sub	eax,.ty1
	cdq
	idiv	ebx
	push	eax

end if
	cmp	dword .x1,0	    ; set correctly begin variable
	jge	@f	      ; CLIPPING ON FUNCTION
			      ; cutting triangle exceedes screen
	mov	ebx,.x1
	neg	ebx

;if Ext >= SSE

;        cvtsi2ss xmm0,ebx
;        shufps   xmm0,xmm0,0
;        movups   xmm1,.dey
;        mulps    xmm1,xmm0
;        shufps   xmm1,xmm1,00011011b
;        movups   xmm2,.bx1
;        addps    xmm2,xmm1
;        movups   .bx1,xmm2

	mov	eax,.dz
	imul	ebx	      ; eax = .dz * abs(.x1)
	add	.z1,eax
	mov	dword .x1,0

	mov	eax,.dbx
	imul	ebx
	add    .bx1,eax

	mov	eax,.dby
	imul	ebx
	add	.by1,eax

	mov	eax,.dex
	imul	ebx
	add	.ex1,eax

	mov	eax,.dey
	imul	ebx
	add	.ey1,eax

	mov	eax,.dtx
	imul	ebx
	add	.tx1,eax

	mov	eax,.dty
	imul	ebx
	add	.ty1,eax

      @@:
   ;     mov     ebx,.x2
	movzx	eax,word[size_x_var]
       ; cmp     dword .x2,SIZE_X
	cmp	dword .x2,eax  ; eax,ebx
	jl	@f
	mov	dword .x2,eax  ;SIZE_X
      @@:
	movzx	eax,word[size_x_var]  ;SIZE_X       ;calc memory begin in buffers
	mul	.y
	add	eax,.x1
	lea	esi,[4*eax]
	add	esi,.z_buff	  ; z-buffer filled with dd variables
	lea	eax,[eax*3]
	add	edi,eax


	mov	ecx,.x2
	sub	ecx,.x1
	; init current variables
	push	dword .bx1   ; current b, e and t shifted shl ROUND   .cbx
	push	dword .by1					   ;  .cby
	push	dword .ex1					   ;  .cex
	push	dword .ey1					   ;  .cey

	push	dword .z1    ; current z shl CATMULL_SHIFT         ; .cz
	push	esi					     ; .czbuff

	push	dword .tx1	;         .ctx
	push	dword .ty1	;         .cty
	push	edi	  ;         .c_scr
if Ext = SSE2
	mov    eax,TEXTURE_SIZE
	movd   xmm1,eax
	shufps xmm1,xmm1,0
	push   dword  TEX_X
	push   dword  -TEX_X
	push   dword  1
	push   dword  -1
	movups xmm2,[esp]
	movd   xmm3,.bmap
	shufps xmm3,xmm3,0
end if

if Ext>=MMX
	movq	mm7,.cty
	movq	mm6,.cby
	movq	mm5,.cey
;        movq    mm4,.dtyq
;        movq    mm3,.dbyq
end if

     .draw:
    ; if TEX = SHIFTING   ;bump drawing only in shifting mode
	mov	esi,.czbuff	 ; .czbuff current address in buffer
	mov	ebx,.cz 	 ; .cz - cur z position
	cmp	ebx,dword[esi]
	jge	.skip
if Ext=NON
	mov	eax,.cby
	shr	eax,ROUND
	mov	esi,.cbx
	shr	esi,ROUND
else
	movq	mm1,mm6
	psrld	mm1,ROUND
	movd	eax,mm1
	psrlq	mm1,32
	movd	esi,mm1
end if

	shl	eax,TEX_SHIFT
	add	esi,eax 	;-  ; esi - current bump map index

if Ext = SSE2

	movd	xmm0,esi
	shufps	xmm0,xmm0,0
	paddd	xmm0,xmm2
	pand	xmm0,xmm1
	paddd	xmm0,xmm3

	movd	ebx,xmm0
	movzx	eax,byte[ebx]
;
;        shufps  xmm0,xmm0,11100001b
	psrldq	xmm0,4
	movd	ebx,xmm0
	movzx	ebx,byte[ebx]
	sub	eax,ebx
;
;        shufps  xmm0,xmm0,11111110b
	psrldq	xmm0,4
	movd	ebx,xmm0
	movzx	edx, byte [ebx]
;
;        shufps  xmm0,xmm0,11111111b
	psrldq	xmm0,4
	movd	ebx,xmm0
	movzx	ebx, byte [ebx]
	sub	edx,ebx
;
else
;        mov     ebx,esi
;        dec     ebx
	lea	ebx,[esi-1]
	and	ebx,TEXTURE_SIZE
	add	ebx,.bmap
	movzx	eax,byte [ebx]

;        mov     ebx,esi
;        inc     ebx
	lea	ebx,[esi+1]
	and	ebx,TEXTURE_SIZE
	add	ebx,.bmap
	movzx	ebx,byte [ebx]
	sub	eax,ebx

;        mov     ebx,esi
;        sub     ebx,TEX_X
	lea	ebx,[esi-TEX_X]
	and	ebx,TEXTURE_SIZE
	add	ebx,.bmap
	movzx	edx,byte [ebx]

;        mov     ebx,esi
;        add     ebx,TEX_X
	lea	ebx,[esi+TEX_X]
	and	ebx,TEXTURE_SIZE
	add	ebx,.bmap
	movzx	ebx,byte [ebx]
	sub	edx,ebx
end if

     ;  eax - horizontal sub    modificated x coord
     ;  edx - vertical   sub    modificated y coord
if Ext=NON
	mov	ebx,.cex       ;.cex - current env map X
	shr	ebx,ROUND
	add	eax,ebx


	mov	ebx,.cey       ;.cey - current  env map y
	shr	ebx,ROUND
	add	edx,ebx

else
	movq	mm1,mm5        ; mm5 - copy of cur env coords
	psrld	mm1,ROUND
	movd	ebx,mm1
	psrlq	mm1,32
	add	eax,ebx
	movd	ebx,mm1
	add	edx,ebx
;        movq    qword[.temp1],mm3
;        add     eax,dword [.temp1]
;        add     edx,dword [.temp1+4]
end if

	or	eax,eax
	jl	.black
	cmp	eax,TEX_X
	jg	.black
	or	edx,edx
	jl	.black
	cmp	edx,TEX_Y
	jg	.black

	shl	edx,TEX_SHIFT	; zaburzenie w emapie = zaburzenie w teksturze
	add	edx,eax 	; proponuje nie stawiac czarnego pixela tylko
	lea	esi,[edx*3]	; niezaburzony.
	add	esi,.emap	;
	lodsd

if Ext=NON
	mov	edx,.cty
	shr	edx,ROUND  ; sar

	mov	edi,.ctx
	shr	edi,ROUND  ; sar
else
	movq	mm1,mm7
	psrld	mm1,ROUND
	movd	edx,mm1
	psrlq	mm1,32
	movd	edi,mm1

end if

	shl	edx,TEX_SHIFT
	add	edi,edx
	and	edi,TEXTURE_SIZE
	lea	esi,[edi*3]
	add	esi,.tex_map

if Ext=NON
	mov	edx,eax
	lodsd
	push	ax
	mul	dl
	mov	dl,ah
	pop	ax
	shr	ax,8
	mul	dh
	mov	al,dl
	mov	edi,.c_scr
	stosw
	shr	edx,16
	shr	eax,16
	mul	dl
	shr	ax,8
	stosb
else
	movd	   mm0,eax
	pxor	   mm1,mm1
	punpcklbw  mm0,mm1
	movd	   mm2,[esi]
	punpcklbw  mm2,mm1
	pmullw	   mm0,mm2
	psrlw	   mm0,8
	packuswb   mm0,mm1
	mov	   edi,.c_scr
	movd	   [edi],mm0

end if

	jmp	.actual_zbuff	; actualize z buffer
     @@:
     .black:
	xor	eax,eax
	mov	edi,.c_scr
	stosd
     .actual_zbuff:
	mov	eax,.cz
	mov	edi,.czbuff
	stosd

      .skip:
	add	dword .czbuff,4
	add	dword .c_scr,3

if Ext=NON
	mov	eax,.dbx
	add	.cbx,eax
	mov	ebx,.dby
	add	.cby,ebx

	mov	edx,.dex
	add	.cex,edx
	mov	eax,.dey
	add	.cey,eax

	mov	ebx,.dtx
	add	.ctx,ebx
	mov	edx,.dty
	add	.cty,edx

else
	paddd	mm7,.dty
	paddd	mm6,.dby
	paddd	mm5,.dey
end if
	mov	eax,.dz
	add	.cz,eax

	dec	ecx
	jnz	.draw

  .bl_end:
	mov	esp,ebp
ret 76
;Ext = MMX

;     else
;        movq    mm5, qword[.temp1]  ;-
;        paddd   mm5, qword[.temp5]  ; .temp5 == low dword = TEX_X, high dword = -TEX_X
;        pand    mm5, qword[.temp3]  ; .temp3 == low = high dword = TEX_SIZE
;        paddd   mm5, qword[.temp4]  ; .temp4 == low = high dword = offset .bmap
;        movd    ebx,mm5
;        psrlq   mm5,32
;     end if