Kirill Lipatov (Leency) b654a4e928 View3DS by macgub: bugfixes and new rendering model - ray casted shadows. Check Readme for more info.
git-svn-id: svn://kolibrios.org@9237 a494cfbc-eb01-0410-851d-a64ba20cac60
2021-11-03 16:39:08 +00:00

1133 lines
21 KiB
PHP

;SIZE_X equ 350
;SIZE_Y equ 350
ROUND equ 8
;TEX_X equ 512
;TEX_Y equ 512
;TEXTURE_SIZE EQU (512*512)-1
;TEX_SHIFT EQU 9
CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;Ext = NON
;MMX = 1
;NON = 0
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
;------- Procedure draws bump triangle using Catmull Z-buffer algorithm-
;------- (Z coordinate interpolation)-----------------------------------
bump_triangle_z:
;------------------in - eax - x1 shl 16 + y1 -----------
;---------------------- ebx - x2 shl 16 + y2 -----------
;---------------------- ecx - x3 shl 16 + y3 -----------
;---------------------- edx - pointer to bump map ------
;---------------------- esi - pointer to environment map
;---------------------- edi - pointer to screen buffer--
;---------------------- stack : bump coordinates--------
;---------------------- environment coordinates-
;---------------------- Z position coordinates--
;---------------------- pointer io Z buffer-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10
.b_x3 equ ebp+12
.b_y3 equ ebp+14
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
.t_bmap equ dword[ebp-4] ; pointer to bump map
.t_emap equ dword[ebp-8] ; pointer to e. map
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
.dx12 equ dword[ebp-24]
.dz12 equ [ebp-28]
.dbx12 equ dword[ebp-32]
.dby12 equ [ebp-36]
.dex12 equ dword[ebp-40]
.dey12 equ [ebp-44]
.dx13 equ dword[ebp-48]
.dz13 equ [ebp-52]
.dbx13 equ dword[ebp-56]
.dby13 equ [ebp-60]
.dex13 equ dword[ebp-64]
.dey13 equ [ebp-68]
.dx23 equ dword[ebp-72]
.dz23 equ [ebp-76]
.dbx23 equ dword[ebp-80]
.dby23 equ [ebp-84]
.dex23 equ dword[ebp-88]
.dey23 equ [ebp-92]
.cx1 equ dword[ebp-96] ; current variables
.cz1 equ [ebp-100]
.cx2 equ dword[ebp-104]
.cz2 equ [ebp-108]
.cbx1 equ dword[ebp-112]
.cby1 equ [ebp-116]
.cex1 equ dword[ebp-120]
.cey1 equ [ebp-124]
.cbx2 equ dword[ebp-128]
.cby2 equ [ebp-132]
.cex2 equ dword[ebp-136]
.cey2 equ [ebp-140]
mov ebp,esp
push edx ; store bump map
push esi ; store e. map
; sub esp,120
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.b_x1]
xchg edx,dword[.b_x2]
mov dword[.b_x1],edx
mov edx,dword[.e_x1]
xchg edx,dword[.e_x2]
mov dword[.e_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort3
.sort2:
push eax ; store triangle coords in variables
push ebx
push ecx
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that
; or edx,ebx ; if any *one* of them is negative a sign flag is raised
; or edx,ecx
; test edx,80000000h ; Check only X
; jne .loop23_done
; cmp .x1,SIZE_X ; {
; jg .loop23_done
; cmp .x2,SIZE_X ; This can be optimized with effort
; jg .loop23_done
; cmp .x3,SIZE_X
; jg .loop23_done ; {
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx12_done
.bt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
if Ext>=SSE
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm1,mm0
psubd mm3,mm2
; cvtpi2ps xmm0,mm0
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; subps xmm1,xmm0
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey12,mm0
movq .dby12,mm1
else
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx12,eax
push eax
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby12,eax
push eax
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex12,eax
push eax
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey12,eax
push eax
end if
.bt_dx12_done:
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx13_done
.bt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz13,eax
push eax
if Ext>=SSE
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm1,mm0
psubd mm3,mm2
; cvtpi2ps xmm0,mm0
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; subps xmm1,xmm0
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey13,mm0
movq .dby13,mm1
else
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx13,eax
push eax
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby13,eax
push eax
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex13,eax
push eax
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey13,eax
push eax
end if
.bt_dx13_done:
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx23_done
.bt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz23,eax
push eax
; sub esp,40
if Ext>=SSE
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm1,mm0
psubd mm3,mm2
; cvtpi2ps xmm0,mm0
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; subps xmm1,xmm0
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey23,mm0
movq .dby23,mm1
else
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx23,eax
push eax
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby23,eax
push eax
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex23,eax
push eax
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey23,eax
push eax
end if
.bt_dx23_done:
sub esp,48
movsx eax,.x1
shl eax,ROUND
mov .cx1,eax
mov .cx2,eax
; push eax
; push eax
movsx eax,word[.b_x1]
shl eax,ROUND
mov .cbx1,eax
mov .cbx2,eax
; push eax
; push eax
movsx eax,word[.b_y1]
shl eax,ROUND
mov .cby1,eax
mov .cby2,eax
; push eax
; push eax
movsx eax,word[.e_x1]
shl eax,ROUND
mov .cex1,eax
mov .cex2,eax
; push eax
; push eax
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
; push eax
; push eax
movsx eax,.z1
shl eax,CATMULL_SHIFT
mov .cz1,eax
mov .cz2,eax
; push eax
; push eax
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
call .call_bump_line
if Ext >= SSE2
movups xmm0,.cey2
movups xmm1,.cey1
movups xmm2,.dey12
movups xmm3,.dey13
paddd xmm0,xmm2
paddd xmm1,xmm3
movups .cey2,xmm0
movups .cey1,xmm1
movq mm4,.cz1
movq mm5,.cz2
paddd mm4,.dz13
paddd mm5,.dz12
movq .cz1,mm4
movq .cz2,mm5
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cz1
movq mm5,.cz2
paddd mm0,.dby12
paddd mm1,.dby13
paddd mm2,.dey12
paddd mm3,.dey13
paddd mm4,.dz13
paddd mm5,.dz12
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
movq .cz1,mm4
movq .cz2,mm5
else if Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
end if
inc ecx
cmp cx,.y2
jl .loop12
.loop12_done:
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
movsx eax,.x2
shl eax,ROUND
mov .cx2,eax
movzx eax,word[.b_x2]
shl eax,ROUND
mov .cbx2,eax
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
movzx eax,word[.e_x2]
shl eax,ROUND
mov .cex2,eax
movzx eax,word[.e_y2]
shl eax,ROUND
mov .cey2,eax
.loop23:
call .call_bump_line
if Ext >= SSE2
movups xmm0,.cey2
movups xmm1,.cey1
movups xmm2,.dey23
movups xmm3,.dey13
paddd xmm0,xmm2
paddd xmm1,xmm3
movups .cey2,xmm0
movups .cey1,xmm1
movq mm4,.cz1
movq mm5,.cz2
paddd mm4,.dz13
paddd mm5,.dz23
movq .cz1,mm4
movq .cz2,mm5
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cz1
movq mm5,.cz2
paddd mm0,.dby23
paddd mm1,.dby13
paddd mm2,.dey23
paddd mm3,.dey13
paddd mm4,.dz13
paddd mm5,.dz23
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
movq .cz1,mm4
movq .cz2,mm5
else if Ext = NON
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
end if
inc ecx
cmp cx,.y3
jl .loop23
.loop23_done:
mov esp,ebp
ret 34
.call_bump_line:
; push ebp
; push ecx
pushad
push dword .cz1
push dword .cz2
push .z_buff
push .t_emap
push .t_bmap
push dword .cey2
push .cex2
push dword .cby2
push .cbx2
push dword .cey1
push .cex1
push dword .cby1
push .cbx1
push ecx
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
call bump_line_z
popad
ret
bump_line_z:
;--------------in: eax - x1
;-------------- ebx - x2
;-------------- edi - pointer to screen buffer
;stack - another parameters :
.y equ dword [ebp+4]
.bx1 equ [ebp+8] ; ---
.by1 equ dword [ebp+12] ; |
.ex1 equ [ebp+16] ; |
.ey1 equ dword [ebp+20] ; |> bump and env coords
.bx2 equ [ebp+24] ; |> shifted shl ROUND
.by2 equ dword [ebp+28] ; |
.ex2 equ [ebp+32] ; |
.ey2 equ dword [ebp+36] ; ---
.bmap equ dword [ebp+40]
.emap equ dword [ebp+44]
.z_buff equ dword [ebp+48]
.z2 equ dword [ebp+52] ; -- |> z coords shifted
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT
.x1 equ dword [ebp-4]
.x2 equ dword [ebp-8]
.dbx equ dword [ebp-12]
.dby equ [ebp-16]
.dex equ dword [ebp-20]
.dey equ [ebp-24]
.dz equ dword [ebp-28]
.cbx equ dword [ebp-32]
.cby equ [ebp-36]
.cex equ dword [ebp-40]
.cey equ [ebp-44]
.cz equ dword [ebp-48]
.czbuff equ dword [ebp-52]
.temp1 equ ebp-60
.temp2 equ ebp-68
.temp3 equ ebp-76
.temp4 equ ebp-84
.temp5 equ ebp-92
mov ebp,esp
mov ecx,.y
or ecx,ecx
jl .bl_end
; mov dx,[size_x_var]
; dec dx
cmp cx,[size_y_var] ;SIZE_Y
jge .bl_end
cmp eax,ebx
jl .bl_ok
je .bl_end
xchg eax,ebx
if Ext=NON
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
end if
if Ext = MMX
movq mm0,.bx1
movq mm1,.ex1
movq mm2,.bx2
movq mm3,.ex2
movq .bx2,mm0
movq .ex2,mm1
movq .bx1,mm2
movq .ex1,mm3
end if
if Ext >= SSE
movups xmm0,.bx1
movups xmm1,.bx2
movups .bx2,xmm0
movups .bx1,xmm1
end if
mov edx,.z1
xchg edx,.z2
mov .z1,edx
.bl_ok:
push eax
push ebx ;store x1, x2
movzx edx,word[size_x_var]
dec edx
cmp .x1,edx ;SIZE_X
jge .bl_end
cmp .x2,0
jle .bl_end
mov ebx,.x2
sub ebx,.x1
if Ext >= SSE
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
cvtpi2ps xmm0,.bx1 ;mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2
cvtpi2ps xmm1,.bx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3
subps xmm1,xmm0
divps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey,mm0
movq .dby,mm1
else
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
mov eax,.by2 ; calc .dby
sub eax,.by1
cdq
idiv ebx
push eax
mov eax,.ex2 ; calc .dex
sub eax,.ex1
cdq
idiv ebx
push eax
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
end if
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
cmp .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov .x1,0
mov eax,.dbx
imul ebx
add .bx1,eax
mov eax,.dby
imul ebx
add .by1,eax
mov eax,.dex
imul ebx
add .ex1,eax
mov eax,.dey
imul ebx
add .ey1,eax
@@:
movzx edx,word[size_x_var]
dec edx
cmp .x2,edx ;SIZE_X
jl @f
mov .x2,edx ;SIZE_X
@@:
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers
mov ebx,.y
mul ebx
mov ebx,.x1
add eax,ebx
mov ebx,eax
lea eax,[eax*3]
add edi,eax
mov esi,.z_buff ; z-buffer filled with dd variables
shl ebx,2
add esi,ebx
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1
push .by1
push dword .ex1
push .ey1
push .z1 ; current z shl CATMULL_SHIFT
push esi
;if Ext = SSE2
; movups xmm1,.dey
;end if
if Ext>=MMX
movq mm0,.cby
movq mm1,.cey
movq mm2,.dby
movq mm3,.dey
end if
if Ext >= SSE2
mov eax,TEXTURE_SIZE
movd xmm1,eax
shufps xmm1,xmm1,0
push dword TEX_X
push dword -TEX_X
push dword 1
push dword -1
movups xmm2,[esp]
movd xmm3,.bmap
shufps xmm3,xmm3,0
end if
;align 16
.draw:
; if TEX = SHIFTING ;bump drawing only in shifting mode
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
jge .skip
if Ext>=MMX
movq mm6,mm0
psrld mm6,ROUND
movd eax,mm6
psrlq mm6,32
movd esi,mm6
else
mov eax,.cby
sar eax,ROUND
mov esi,.cbx
sar esi,ROUND
end if
shl eax,TEX_SHIFT ;-
add esi,eax ;- ; esi - current bump map index
if Ext = SSE2
movd xmm0,esi
shufps xmm0,xmm0,0
paddd xmm0,xmm2
pand xmm0,xmm1
paddd xmm0,xmm3
movd ebx,xmm0
movzx eax,byte[ebx]
;
; shufps xmm0,xmm0,11100001b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx,byte[ebx]
sub eax,ebx
;
; shufps xmm0,xmm0,11111110b
psrldq xmm0,4
movd ebx,xmm0
movzx edx, byte [ebx]
;
; shufps xmm0,xmm0,11111111b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx, byte [ebx]
sub edx,ebx
;
else
mov ebx,esi
dec ebx
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx eax,byte [ebx]
mov ebx,esi
inc ebx
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
sub eax,ebx
mov ebx,esi
sub ebx,TEX_X
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx edx,byte [ebx]
mov ebx,esi
add ebx,TEX_X
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
sub edx,ebx
end if
; eax - horizontal sub
; edx - vertical sub
if Ext = NON
mov ebx,.cex ;.cex - current env map X
sar ebx,ROUND
add eax,ebx ; eax - modified x coord
mov ebx,.cey ;.cey - current env map y
sar ebx,ROUND
add edx,ebx ; edx - modified y coord
else
movq mm6,mm1 ; mm5 - copy of cur env coords
psrld mm6,ROUND
movd ebx,mm6
psrlq mm6,32
add eax,ebx
movd ebx,mm6
add edx,ebx
end if
or eax,eax
jl .black
cmp eax,TEX_X
jg .black
or edx,edx
jl .black
cmp edx,TEX_Y
jg .black
shl edx,TEX_SHIFT
add edx,eax
lea esi,[edx*3]
add esi,.emap
lodsd
jmp .put_pixel
.black:
xor eax,eax
.put_pixel:
stosd
dec edi
mov ebx,.cz
mov esi,.czbuff
mov dword[esi],ebx
jmp .no_skip
.skip:
add edi,3
.no_skip:
add .czbuff,4
;if Ext = SSE2
; movups xmm0,.cey
; paddd xmm0,xmm1
; movups .cey,xmm0
;
;end if
if Ext >= MMX
paddd mm0,mm2
paddd mm1,mm3
end if
if Ext=NON
mov eax,.dbx
add .cbx,eax
mov eax,.dby
add .cby,eax
mov eax,.dex
add .cex,eax
mov eax,.dey
add .cey,eax
end if
mov eax,.dz
add .cz,eax
dec ecx
jnz .draw
; end if
.bl_end:
mov esp,ebp
ret 56