1818 lines
38 KiB
PHP
Raw Normal View History

;CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;ROUND equ 8
;Ext = NON
;MMX = 1
;NON = 0
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
;------- Procedure draws bump triangle with texture, I use -------------
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
;--------I calc texture pixel by this way: col1*col2/256 ---------------
bump_tex_triangle_z:
;------------------in - eax - x1 shl 16 + y1 -----------
;---------------------- ebx - x2 shl 16 + y2 -----------
;---------------------- ecx - x3 shl 16 + y3 -----------
;---------------------- edx - pointer to bump map-------
;---------------------- esi - pointer to env map--------
;---------------------- edi - pointer to screen buffer--
;---------------------- stack : bump coordinates--------
;---------------------- environment coordinates-
;---------------------- Z position coordinates--
;---------------------- pointer to Z buffer-----
;---------------------- pointer to texture------
;---------------------- texture coordinates-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10 ; b - bump map coords
.b_x3 equ ebp+12 ; e - env map coords
.b_y3 equ ebp+14
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
.tex_ptr equ dword[ebp+38] ; ptr to texture
.t_x1 equ ebp+42 ; texture coords
.t_y1 equ ebp+44
.t_x2 equ ebp+46
.t_y2 equ ebp+48
.t_x3 equ ebp+50
.t_y3 equ ebp+52
.t_bmap equ dword[ebp-4] ; pointer to bump map
.t_emap equ dword[ebp-8] ; pointer to env map
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
if 0 ;Ext <= SSE2
.dx12 equ dword[edi-4]
.dz12 equ [edi-8]
.dbx12 equ dword[edi-12]
.dby12 equ [edi-16]
.dex12 equ dword[edi-20]
.dey12 equ [edi-24]
.dtx12 equ dword[edi-28]
.dty12 equ [edi-32]
.dx13 equ dword[ebp-52-4*1]
.dz13 equ [ebp-52-4*2]
.dbx13 equ dword[ebp-52-4*3]
.dby13 equ [ebp-52-4*4]
.dex13 equ dword[ebp-52-4*5]
.dey13 equ [ebp-52-4*6]
.dtx13 equ dword[ebp-52-4*7]
.dty13 equ [ebp-52-4*8]
.dx23 equ dword[ebp-(52+4*9)]
.dz23 equ [ebp-(52+4*10)]
.dbx23 equ dword[ebp-(52+4*11)]
.dby23 equ [ebp-(52+4*12)]
.dex23 equ dword[ebp-(52+4*13)]
.dey23 equ [ebp-(52+4*14)]
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ [ebp-(52+4*16)]
else
.dx12 equ dword[ebp-24]
.dz12 equ [ebp-28]
.dbx12 equ dword[ebp-32]
.dby12 equ [ebp-36]
.dex12 equ dword[ebp-40]
.dey12 equ [ebp-44]
.dtx12 equ dword[ebp-48]
.dty12 equ [ebp-52]
.dx13 equ dword[ebp-52-4*1]
.dz13 equ [ebp-52-4*2]
.dbx13 equ dword[ebp-52-4*3]
.dby13 equ [ebp-52-4*4]
.dex13 equ dword[ebp-52-4*5]
.dey13 equ [ebp-52-4*6]
.dtx13 equ dword[ebp-52-4*7]
.dty13 equ [ebp-52-4*8]
.dx23 equ dword[ebp-(52+4*9)]
.dz23 equ [ebp-(52+4*10)]
.dbx23 equ dword[ebp-(52+4*11)]
.dby23 equ [ebp-(52+4*12)]
.dex23 equ dword[ebp-(52+4*13)]
.dey23 equ [ebp-(52+4*14)]
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ [ebp-(52+4*16)]
end if
if Ext < SSE
.cx1 equ dword[ebp-(52+4*17)] ; current variables
.cz1 equ [ebp-(52+4*18)]
.cx2 equ dword[ebp-(52+4*19)]
.cz2 equ [ebp-(52+4*20)]
.cbx1 equ dword[ebp-(52+4*21)]
.cby1 equ [ebp-(52+4*22)]
.cbx2 equ dword[ebp-(52+4*23)]
.cby2 equ [ebp-(52+4*24)]
.cex1 equ dword[ebp-(52+4*25)]
.cey1 equ [ebp-(52+4*26)]
.cex2 equ dword[ebp-(52+4*27)]
.cey2 equ [ebp-(52+4*28)]
.ctx1 equ dword[ebp-(52+4*29)]
.cty1 equ [ebp-(52+4*30)]
.ctx2 equ dword[ebp-(52+4*31)]
.cty2 equ [ebp-(52+4*32)]
else
.cx1 equ dword[ebp-(52+4*17)] ; current variables
.cz1 equ [ebp-(52+4*18)]
.cbx1 equ dword[ebp-(52+4*19)]
.cby1 equ [ebp-(52+4*20)]
.cex1 equ dword[ebp-(52+4*21)]
.cey1 equ [ebp-(52+4*22)]
.ctx1 equ dword[ebp-(52+4*23)]
.cty1 equ [ebp-(52+4*24)]
.cx2 equ dword[ebp-(52+4*25)]
.cz2 equ [ebp-(52+4*26)]
.cbx2 equ dword[ebp-(52+4*27)]
.cby2 equ [ebp-(52+4*28)]
.cex2 equ dword[ebp-(52+4*29)]
.cey2 equ [ebp-(52+4*30)]
.ctx2 equ dword[ebp-(52+4*31)]
.cty2 equ [ebp-(52+4*32)]
end if
cld
mov ebp,esp
push edx ; store bump map
push esi ; store e. map
; sub esp,120
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.b_x1]
xchg edx,dword[.b_x2]
mov dword[.b_x1],edx
mov edx,dword[.e_x1]
xchg edx,dword[.e_x2]
mov dword[.e_x1],edx
mov edx,dword[.t_x1]
xchg edx,dword[.t_x2]
mov dword[.t_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov edx,dword[.t_x2]
xchg edx,dword[.t_x3]
mov dword[.t_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort3
.sort2:
push eax ; store triangle coords in variables
push ebx
push ecx
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that
; or edx,ebx ; if any *one* of them is negative a sign flag is raised
; or edx,ecx
; test edx,80000000h ; Check only X
; jne .loop23_done
; cmp .x1,SIZE_X ; {
; jg .loop23_done
; cmp .x2,SIZE_X ; This can be optimized with effort
; jg .loop23_done
; cmp .x3,SIZE_X
; jg .loop23_done ; {
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
if 0 ;Ext >= SSE2
pxor xmm0,xmm0
movups .dty12,xmm0
movups .dey12,xmm0
sub esp,16
else
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
loop @b
end if
jmp .bt_dx12_done
.bt_dx12_make:
movsx ebx,bx
if Ext>=SSE
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
if 0 ;Ext >= SSE2
mov edi,ebp
sub edi,512
or edi,0x0000000f
end if
divss xmm3,xmm4
shufps xmm3,xmm3,0
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm1,mm0
psubd mm3,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey12,xmm1
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey12,mm0
movq .dby12,mm1
;-------------
; pxor mm0,mm0
; pxor mm1,mm1
;/ pinsrw mm0,.z1,1
;/ pinsrw mm0,.x1,0
;/ pinsrw mm1,.z2,1
;/ pinsrw mm1,.x2,0
mov ax,.z2
sub ax,.z1
cwde
mov dx,.x2
sub dx,.x1
movsx edx,dx
;/ movd mm1,eax
;/ punpcklwd mm0,mm4
;/ punpcklwd mm1,mm4
; cvtpi2ps xmm1,mm1
; cvtpi2ps xmm2,mm0
; subps xmm1,xmm2
;/ psubd mm1,mm0
movd mm2,[.t_x1]
movd mm3,[.t_x2]
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
;/ cvtpi2ps xmm1,mm1
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
; movss xmm1,xmm4
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
;1 movlps .dty12,xmm1
;1 movhps .dz12,xmm1
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dty12,mm0
movq .dz12,mm1
;----
; mov ax,.z2
; sub ax,.z1
; cwde
; mov bx,.x2
; sub bx,.x1
; movsx ebx,bx
; movd mm1,eax
; psllq mm1,32
; movd mm1,ebx
;; push ebx
;; push eax
;; movq mm1,[esp]
;; add esp,8
;;; mov ax,.z1
;;; mov bx,.z2
;;; shl eax,16
;;; shl ebx,16
;;; mov ax,.x1
;;; mov bx,.x2
; movd mm2,[.t_x1]
; movd mm3,[.t_x2]
;; movd mm0,eax
;; movd mm1,ebx
; pxor mm4,mm4
;; punpcklwd mm0,mm4
;; punpcklwd mm1,mm4
; punpcklwd mm2,mm4
; punpcklwd mm3,mm4
;; psubd mm1,mm0
; psubd mm3,mm2
; cvtpi2ps xmm1,mm1
; movlhps xmm1,xmm1
; cvtpi2ps xmm1,mm3
; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx |
; shufps xmm1,xmm1,10110001b
; xmm1--> | dx | dz | dtx | dty |
; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
; movhlps xmm1,xmm1
; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz |
; movq .dty12,mm0
; movq .dz12,mm1
else
mov ax,.x2
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx12,eax
push eax
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby12,eax
push eax
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex12,eax
push eax
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey12,eax
push eax
mov ax,word[.t_x2]
sub ax,word[.t_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dtx12,eax
push eax
mov ax,word[.t_y2]
sub ax,word[.t_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dty12,eax
push eax
end if
.bt_dx12_done:
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx13_done
.bt_dx13_make:
movsx ebx,bx
if Ext>=SSE
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
divss xmm3,xmm4
shufps xmm3,xmm3,0
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm1,mm0
psubd mm3,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey13,xmm1
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey13,mm0
movq .dby13,mm1
mov ax,.z3
sub ax,.z1
cwde
mov dx,.x3
sub dx,.x1
movsx edx,dx
movd mm2,[.t_x1]
movd mm3,[.t_x3]
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
;1 movlps .dty13,xmm1
;1 movhps .dz13,xmm1
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dty13,mm0
movq .dz13,mm1
else
mov ax,.x3
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz13,eax
push eax
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx13,eax
push eax
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby13,eax
push eax
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex13,eax
push eax
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey13,eax
push eax
mov ax,word[.t_x3]
sub ax,word[.t_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dtx13,eax
push eax
mov ax,word[.t_y3]
sub ax,word[.t_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dty13,eax
push eax
end if
.bt_dx13_done:
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx23_done
.bt_dx23_make:
movsx ebx,bx
if Ext>=SSE
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
divss xmm3,xmm4
shufps xmm3,xmm3,0
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm1,mm0
psubd mm3,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey23,xmm1
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey23,mm0
movq .dby23,mm1
mov ax,.z3
sub ax,.z2
cwde
mov dx,.x3
sub dx,.x2
movsx edx,dx
movd mm2,[.t_x2]
movd mm3,[.t_x3]
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
; movlps .dty23,xmm1
; movhps .dz23,xmm1
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz |
movq .dty23,mm0
movq .dz23,mm1
else
mov ax,.x3
sub ax,.x2
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz23,eax
push eax
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx23,eax
push eax
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby23,eax
push eax
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex23,eax
push eax
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey23,eax
push eax
mov ax,word[.t_x3]
sub ax,word[.t_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dtx23,eax
push eax
mov ax,word[.t_y3]
sub ax,word[.t_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dty23,eax
push eax
end if
; sub esp,40
.bt_dx23_done:
sub esp,64
movsx eax,.x1
shl eax,ROUND
mov .cx1,eax
mov .cx2,eax
; push eax
; push eax
movsx ebx,word[.b_x1]
shl ebx,ROUND
mov .cbx1,ebx
mov .cbx2,ebx
; push ebx
; push ebx
movsx ecx,word[.b_y1]
shl ecx,ROUND
mov .cby1,ecx
mov .cby2,ecx
; push ecx
; push ecx
movsx edx,word[.e_x1]
shl edx,ROUND
mov .cex1,edx
mov .cex2,edx
; push edx
; push edx
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
; push eax
; push eax
movsx ebx,.z1
shl ebx,CATMULL_SHIFT
mov .cz1,ebx
mov .cz2,ebx
; push ebx
; push ebx
; sub esp,16
movsx ecx,word[.t_x1]
shl ecx,ROUND
mov .ctx1,ecx
mov .ctx2,ecx
;push ecx
;push ecx
movsx edx,word[.t_y1]
shl edx,ROUND
mov .cty1,edx
mov .cty2,edx
; push edx
; push edx
if Ext >= SSE2
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
movups xmm4,.dby13
movups xmm5,.dty13
movups xmm6,.dby12
movups xmm7,.dty12
.scby1 equ [edi]
.scty1 equ [edi+16]
.scby2 equ [edi+32]
.scty2 equ [edi+48]
.sdby13 equ [edi+64]
.sdty13 equ [edi+80]
.sdby12 equ [edi+96]
.sdty12 equ [edi+128]
push edi
mov edi,sse_repository
movaps .scby1,xmm0
movaps .scty1,xmm1
movaps .scby2,xmm2
movaps .scty2,xmm3
movaps .sdby13,xmm4
movaps .sdty13,xmm5
movaps .sdby12,xmm6
movaps .sdty12,xmm7
pop edi
end if
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
if Ext >= SSE2
; fxrstor [sse_repository]
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
; movups xmm6,.dby12
; movups xmm7,.dty12
; paddd xmm0,xmm4
; paddd xmm1,xmm5
; paddd xmm2,xmm6
; paddd xmm3,xmm7
push edi
mov edi,sse_repository
paddd xmm0,.sdby13
paddd xmm1,.sdty13
paddd xmm2,.sdby12
paddd xmm3,.sdty12
pop edi
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
movups .cty2,xmm3
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cty1
movq mm5,.cty2
movq mm6,.cz1
movq mm7,.cz2
paddd mm0,.dby12
paddd mm1,.dby13
paddd mm2,.dey12
paddd mm3,.dey13
paddd mm4,.dty13
paddd mm5,.dty12
paddd mm6,.dz13
paddd mm7,.dz12
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
movq .cty1,mm4
movq .cty2,mm5
movq .cz1,mm6
movq .cz2,mm7
end if
if Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
mov eax,.dtx13
add .ctx1,eax
mov ebx,.dtx12
add .ctx2,ebx
mov edx,.dty13
add .cty1,edx
mov eax,.dty12
add .cty2,eax
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
end if
inc ecx
cmp cx,.y2
jl .loop12
.loop12_done:
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
movsx ebx,.x2
shl ebx,ROUND
mov .cx2,ebx
movzx edx,word[.b_x2]
shl edx,ROUND
mov .cbx2,edx
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
movzx ebx,word[.e_x2]
shl ebx,ROUND
mov .cex2,ebx
movzx edx,word[.e_y2]
shl edx,ROUND
mov .cey2,edx
movzx eax,word[.t_x2]
shl eax,ROUND
mov .ctx2,eax
movzx ebx,word[.t_y2]
shl ebx,ROUND
mov .cty2,ebx
if Ext >= SSE2
movups xmm2,.cby2
movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
movups xmm6,.dby23
movups xmm7,.dty23
; .scby1 equ [edi]
; .scty1 equ [edi+16]
; .scby2 equ [edi+32]
; .scty2 equ [edi+48]
; .sdby13 equ [edi+64]
; .sdty13 equ [edi+80]
.sdby23 equ [edi+160]
.sdty23 equ [edi+192]
push edi
mov edi,sse_repository
; movaps .scby1,xmm0
; movaps .scty1,xmm1
movaps .scby2,xmm2
movaps .scty2,xmm3
; movaps .sdby13,xmm4
; movaps .sdty13,xmm5
movaps .sdby23,xmm6
movaps .sdty23,xmm7
pop edi
end if
.loop23:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
if Ext >= SSE2
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
push edi
mov edi,sse_repository
paddd xmm0,.sdby13
paddd xmm1,.sdty13
paddd xmm2,.sdby23
paddd xmm3,.sdty23
pop edi
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
movups .cty2,xmm3
; fxrstor [sse_repository]
; movups xmm0,.cby1
; movups xmm1,.cty1
; movups xmm2,.cby2
; movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
; movups xmm6,.dby23
; movups xmm7,.dty23
; paddd xmm0,xmm4
; paddd xmm1,xmm5
; paddd xmm2,xmm6
; paddd xmm3,xmm7
; movups .cby1,xmm0
; movups .cty1,xmm1
; movups .cby2,xmm2
; movups .cty2,xmm3
;
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cty1
movq mm5,.cty2
movq mm6,.cz1
movq mm7,.cz2
paddd mm0,.dby23
paddd mm1,.dby13
paddd mm2,.dey23
paddd mm3,.dey13
paddd mm4,.dty13
paddd mm5,.dty23
paddd mm6,.dz13
paddd mm7,.dz23
movq .cby2,mm0
movq .cby1,mm1
movq .cey2,mm2
movq .cey1,mm3
movq .cty1,mm4
movq .cty2,mm5
movq .cz1,mm6
movq .cz2,mm7
end if
If Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
mov eax,.dtx13
add .ctx1,eax
mov ebx,.dtx23
add .ctx2,ebx
mov edx,.dty13
add .cty1,edx
mov eax,.dty23
add .cty2,eax
end if
inc ecx
cmp cx,.y3
jl .loop23
.loop23_done:
mov esp,ebp
ret 50
.call_line:
pushad
; xmm0= cby1,cbx1,cz1,cx1
; xmm1= cty1,ctx1,cey1,cex1
if Ext >= SSE2
sub esp,8
shufps xmm1,xmm1,10110001b
shufps xmm3,xmm3,10110001b
movlps [esp],xmm1
else
push dword .cty1
push .ctx1
end if
push dword .cz1
if Ext>=SSE2
sub esp,8
movlps [esp],xmm3
else
push dword .cty2
push .ctx2
end if
push dword .cz2
if Ext>=SSE2
sub esp,32
movhps [esp+24],xmm3
shufps xmm2,xmm2,10110001b
movlps [esp+16],xmm2
movhps [esp+8],xmm1
shufps xmm0,xmm0,10110001b
movlps [esp],xmm0 ;================================
else
push dword .cey2
push .cex2
push dword .cby2
push .cbx2
push dword .cey1
push .cex1
push dword .cby1
push .cbx1
end if
push .tex_ptr
push .z_buff
push .t_emap
push .t_bmap
push ecx
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
call bump_tex_line_z
popad
;end if
ret
bump_tex_line_z:
;--------------in: eax - x1
;-------------- ebx - x2
;-------------- edi - pointer to screen buffer
;stack - another parameters :
.y equ dword [ebp+4]
.bmap equ dword [ebp+8] ; bump map pointer
.emap equ dword [ebp+12] ; env map pointer
.z_buff equ dword [ebp+16] ; z buffer
.tex_map equ dword [ebp+20] ; texture pointer
.bx1 equ [ebp+24] ; ---
.by1 equ [ebp+28] ; |
.ex1 equ [ebp+32] ; |
.ey1 equ [ebp+36] ; |
.bx2 equ [ebp+40] ; |
.by2 equ [ebp+44] ; |> b. map and e. map coords
.ex2 equ [ebp+48] ; |> shifted shl ROUND
.ey2 equ [ebp+52] ; ---
.z2 equ [ebp+56]
.tx2 equ [ebp+60]
.ty2 equ [ebp+64]
.z1 equ [ebp+68]
.tx1 equ [ebp+72]
.ty1 equ [ebp+76]
.x1 equ [ebp-4]
.x2 equ [ebp-8]
.dbx equ [ebp-12]
.dby equ [ebp-16]
.dex equ [ebp-20]
.dey equ [ebp-24]
.dz equ [ebp-28]
.dtx equ [ebp-32]
.dty equ [ebp-36]
.cbx equ [ebp-40]
.cby equ [ebp-44]
.cex equ [ebp-48]
.cey equ [ebp-52]
.cz equ [ebp-56]
.czbuff equ [ebp-60]
.ctx equ [ebp-64]
.cty equ [ebp-68]
.c_scr equ [ebp-72]
.temp1 equ ebp-80
.temp2 equ ebp-88
.temp3 equ ebp-76
.temp4 equ ebp-84
.temp5 equ ebp-92
mov ebp,esp
mov ecx,.y
or ecx,ecx
jl .bl_end
movzx edx,word[size_y_var]
cmp ecx,edx ;SIZE_Y
jge .bl_end
cmp eax,ebx
jl .bl_ok
je .bl_end
if Ext=NON
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
mov edx,.tx1
xchg edx,.tx2
mov .tx1,edx
mov edx,.ty1
xchg edx,.ty2
mov .ty1,edx
end if
if Ext = MMX
movq mm0,.bx1
movq mm1,.bx2
movq mm2,.ex1
movq mm3,.ex2
movq mm4,.tx1
movq mm5,.tx2
movq .bx2,mm0
movq .bx1,mm1
movq .ex1,mm3
movq .ex2,mm2
movq .tx1,mm5
movq .tx2,mm4
end if
if Ext>=SSE
movups xmm0,.bx1
movups xmm1,.bx2
movups .bx1,xmm1
movups .bx2,xmm0
movq mm0,.tx1
movq mm1,.tx2
movq .tx1,mm1
movq .tx2,mm0
end if
;if Ext>=SSE2
; movaps xmm4,xmm0
; movaps xmm0,xmm2
; movaps xmm2,xmm4
; movaps xmm5,xmm1
; movaps xmm1,xmm3
; movaps xmm3,xmm5
;else
xchg eax,ebx
mov edx,.z1
xchg edx,.z2
mov .z1,edx
;end if
.bl_ok:
;if Ext >= SSE2
; shufps xmm0,xmm0,11100001b
; shufps xmm2,xmm2,11100001b
; movlps .bx1,xmm0
; movlps .bx2,xmm2
; shufps xmm0,xmm0,00011011b
; shufps xmm2,xmm2,00011011b
; movd eax,xmm0
; movd ebx,xmm2
; shufps xmm0,xmm0,11000110b
; shufps xmm2,xmm2,11000110b
; movd .z1,xmm0
; movd .z2,xmm2
; shufps xmm1,xmm1,10110001b
; shufps xmm3,xmm3,10110001b
; movlps .ex1,xmm1
; movlps .ex2,xmm2
; movhps .tx1,xmm1
; movhps .tx2,xmm2
; xchg eax,ebx
; mov edx,.z1
; xchg edx,.z2
; mov .z1,edx
;end if
push eax
push ebx ;store x1, x2
movzx ebx,word[size_x_var]
; mov eax,.x1
cmp dword .x1,ebx ;dword .x1,SIZE_X
jge .bl_end
cmp dword .x2,0
jle .bl_end
mov ebx,.x2
sub ebx,.x1
if Ext>=SSE
sub esp,28
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
; float using SSE variant ::-->
; movups xmm0,.bx1 ; new
; movups xmm1,.bx2 ; new
cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2
cvtpi2ps xmm1,.bx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3
subps xmm1,xmm0
divps xmm1,xmm3
shufps xmm1,xmm1,10110001b
; movups .dey,xmm1 ; new
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey,mm0
movq .dby,mm1
movd mm2,.z1
movd mm3,.z2
cvtpi2ps xmm0,.tx1 ;mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,.tx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; movups xmm0,,z1 ; new
; movups xmm1,.z2 ; new
subps xmm1,xmm0
divps xmm1,xmm3
; movups .dz,xmm1 ;new
shufps xmm1,xmm1,10110100b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movd .dz,mm0
movq .dty,mm1
else
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
mov eax,.by2 ; calc .dby
sub eax,.by1
cdq
idiv ebx
push eax
mov eax,.ex2 ; calc .dex
sub eax,.ex1
cdq
idiv ebx
push eax
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
mov eax,.tx2 ; calc .dtx
sub eax,.tx1
cdq
idiv ebx
push eax
mov eax,.ty2 ; calc .dty
sub eax,.ty1
cdq
idiv ebx
push eax
end if
cmp dword .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
;if Ext >= SSE
; cvtsi2ss xmm0,ebx
; shufps xmm0,xmm0,0
; movups xmm1,.dey
; mulps xmm1,xmm0
; shufps xmm1,xmm1,00011011b
; movups xmm2,.bx1
; addps xmm2,xmm1
; movups .bx1,xmm2
mov eax,.dz
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov dword .x1,0
mov eax,.dbx
imul ebx
add .bx1,eax
mov eax,.dby
imul ebx
add .by1,eax
mov eax,.dex
imul ebx
add .ex1,eax
mov eax,.dey
imul ebx
add .ey1,eax
mov eax,.dtx
imul ebx
add .tx1,eax
mov eax,.dty
imul ebx
add .ty1,eax
@@:
; mov ebx,.x2
movzx eax,word[size_x_var]
; cmp dword .x2,SIZE_X
cmp dword .x2,eax ; eax,ebx
jl @f
mov dword .x2,eax ;SIZE_X
@@:
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers
mul .y
add eax,.x1
lea esi,[4*eax]
add esi,.z_buff ; z-buffer filled with dd variables
lea eax,[eax*3]
add edi,eax
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1 ; current b, e and t shifted shl ROUND .cbx
push dword .by1 ; .cby
push dword .ex1 ; .cex
push dword .ey1 ; .cey
push dword .z1 ; current z shl CATMULL_SHIFT ; .cz
push esi ; .czbuff
push dword .tx1 ; .ctx
push dword .ty1 ; .cty
push edi ; .c_scr
if Ext = SSE2
mov eax,TEXTURE_SIZE
movd xmm1,eax
shufps xmm1,xmm1,0
push dword TEX_X
push dword -TEX_X
push dword 1
push dword -1
movups xmm2,[esp]
movd xmm3,.bmap
shufps xmm3,xmm3,0
end if
if Ext>=MMX
movq mm7,.cty
movq mm6,.cby
movq mm5,.cey
; movq mm4,.dtyq
; movq mm3,.dbyq
end if
.draw:
; if TEX = SHIFTING ;bump drawing only in shifting mode
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
jge .skip
if Ext=NON
mov eax,.cby
shr eax,ROUND
mov esi,.cbx
shr esi,ROUND
else
movq mm1,mm6
psrld mm1,ROUND
movd eax,mm1
psrlq mm1,32
movd esi,mm1
end if
shl eax,TEX_SHIFT
add esi,eax ;- ; esi - current bump map index
if Ext = SSE2
movd xmm0,esi
shufps xmm0,xmm0,0
paddd xmm0,xmm2
pand xmm0,xmm1
paddd xmm0,xmm3
movd ebx,xmm0
movzx eax,byte[ebx]
;
; shufps xmm0,xmm0,11100001b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx,byte[ebx]
sub eax,ebx
;
; shufps xmm0,xmm0,11111110b
psrldq xmm0,4
movd ebx,xmm0
movzx edx, byte [ebx]
;
; shufps xmm0,xmm0,11111111b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx, byte [ebx]
sub edx,ebx
;
else
; mov ebx,esi
; dec ebx
lea ebx,[esi-1]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx eax,byte [ebx]
; mov ebx,esi
; inc ebx
lea ebx,[esi+1]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
sub eax,ebx
; mov ebx,esi
; sub ebx,TEX_X
lea ebx,[esi-TEX_X]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx edx,byte [ebx]
; mov ebx,esi
; add ebx,TEX_X
lea ebx,[esi+TEX_X]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
sub edx,ebx
end if
; eax - horizontal sub modificated x coord
; edx - vertical sub modificated y coord
if Ext=NON
mov ebx,.cex ;.cex - current env map X
shr ebx,ROUND
add eax,ebx
mov ebx,.cey ;.cey - current env map y
shr ebx,ROUND
add edx,ebx
else
movq mm1,mm5 ; mm5 - copy of cur env coords
psrld mm1,ROUND
movd ebx,mm1
psrlq mm1,32
add eax,ebx
movd ebx,mm1
add edx,ebx
; movq qword[.temp1],mm3
; add eax,dword [.temp1]
; add edx,dword [.temp1+4]
end if
or eax,eax
jl .black
cmp eax,TEX_X
jg .black
or edx,edx
jl .black
cmp edx,TEX_Y
jg .black
shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze
add edx,eax ; proponuje nie stawiac czarnego pixela tylko
lea esi,[edx*3] ; niezaburzony.
add esi,.emap ;
lodsd
if Ext=NON
mov edx,.cty
shr edx,ROUND ; sar
mov edi,.ctx
shr edi,ROUND ; sar
else
movq mm1,mm7
psrld mm1,ROUND
movd edx,mm1
psrlq mm1,32
movd edi,mm1
end if
shl edx,TEX_SHIFT
add edi,edx
and edi,TEXTURE_SIZE
lea esi,[edi*3]
add esi,.tex_map
if Ext=NON
mov edx,eax
lodsd
push ax
mul dl
mov dl,ah
pop ax
shr ax,8
mul dh
mov al,dl
mov edi,.c_scr
stosw
shr edx,16
shr eax,16
mul dl
shr ax,8
stosb
else
movd mm0,eax
pxor mm1,mm1
punpcklbw mm0,mm1
movd mm2,[esi]
punpcklbw mm2,mm1
pmullw mm0,mm2
psrlw mm0,8
packuswb mm0,mm1
mov edi,.c_scr
movd [edi],mm0
end if
jmp .actual_zbuff ; actualize z buffer
@@:
.black:
xor eax,eax
mov edi,.c_scr
stosd
.actual_zbuff:
mov eax,.cz
mov edi,.czbuff
stosd
.skip:
add dword .czbuff,4
add dword .c_scr,3
if Ext=NON
mov eax,.dbx
add .cbx,eax
mov ebx,.dby
add .cby,ebx
mov edx,.dex
add .cex,edx
mov eax,.dey
add .cey,eax
mov ebx,.dtx
add .ctx,ebx
mov edx,.dty
add .cty,edx
else
paddd mm7,.dty
paddd mm6,.dby
paddd mm5,.dey
end if
mov eax,.dz
add .cz,eax
dec ecx
jnz .draw
.bl_end:
mov esp,ebp
ret 76
;Ext = MMX
; else
; movq mm5, qword[.temp1] ;-
; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X
; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE
; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap
; movd ebx,mm5
; psrlq mm5,32
; end if