View3DS by macgub: bugfixes and new rendering model - ray casted shadows. Check Readme for more info.

git-svn-id: svn://kolibrios.org@9237 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Kirill Lipatov (Leency) 2021-11-03 16:39:08 +00:00
parent ae85867e03
commit b654a4e928
19 changed files with 10536 additions and 9530 deletions

View File

@ -4,13 +4,108 @@ z3d equ 4
vec_x equ 0 vec_x equ 0
vec_y equ 4 vec_y equ 4
vec_z equ 8 vec_z equ 8
; 3d point - triple integer word coordinate
; vector - triple float dword coordinate if 0 ; Ext >= SSE3
;----------------------in: -------------------------------- calc_bounding_box:
;------------------------ esi - pointer to 1st 3d point --- ; in:
;------------------------ edi - pointer to 2nd 3d point --- ; xmm0 - normal vector of ray
;------------------------ ebx - pointer to result vector -- ; xmm1 - light origin
;---------------------- out : none ------------------------ ; out:
; eax - axis aligned bounding boxes bit mask
.rmx equ [ebp-36]
.nray equ [ebp-64]
.origin equ [ebp-80]
.dirfrac equ [ebp-96]
.nrayr equ [ebp-112]
.originr equ [ebp-128]
.tmin equ [ebp-132]
.tmax equ [ebp-136]
push ebp
mov ebp,esp
and ebp,-16
sub esp,160
movss xmm5,[rsscale]
shufps xmm5,xmm1,0
movd xmm2,[vect_x]
punpcklwd xmm2,[the_zero]
cvtdq2ps xmm2,xmm2
subps xmm1,xmm2
movaps .origin,xmm1
mulps xmm0,xmm5
movaps .nray,xmm0
mov esi,matrix
lea edi,.rmx
call reverse_mx_3x3
; in: esi - ptr to points(normals], each point(normal) coeficient as dword
; edi - ptr to rotated points(normals)
; ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix
; ecx - number of points(normals)
; reverse transform
lea esi,.nray
lea edi,.nrayr
lea ebx,.rmx
mov ecx,1
call rotary
lea esi,.origin
lea edi,.originr
lea ebx,.rmx
mov ecx,1
call rotary
xor ecx,ecx
mov ebx,aabb1
xor eax,eax
rcpps xmm7,.nrayr
movaps .dirfrac,xmm7
.nx_aabb:
movaps xmm5,[ebx]
movaps xmm6,[ebx]
minps xmm5,[the_zero]
maxps xmm6,[the_zero]
; xmm5 - lb corner of AABB with minimal coordinates
; xmm6 - rt cor. of AABB wit maximum coords
subps xmm5,.originr
subps xmm6,.originr
mulps xmm5,.dirfrac ; xmm5 - tx1, ty1
mulps xmm6,.dirfrac ; xmm6 - tx2, ty2
movaps xmm1,xmm6
movaps xmm2,xmm6
minps xmm1,xmm5
maxps xmm2,xmm5
movaps xmm5,xmm1
movaps xmm6,xmm2
shufps xmm5,xmm5,11100001b
shufps xmm6,xmm6,11100001b
maxss xmm1,xmm5 ;t min
minss xmm2,xmm6 ;t max
comiss xmm2,xmm1
jb .no_inter
.yes:
bts eax,ecx
.no_inter:
add ebx,16
inc ecx
cmp ecx,8
jne .nx_aabb
; out: eax - bit mask
add esp,160
pop ebp
ret
end if
reverse_mx_3x3: reverse_mx_3x3:
; esi - source matrix ; esi - source matrix
; edi - desired reversed matrix ; edi - desired reversed matrix
@ -141,6 +236,13 @@ reverse_mx_3x3:
mov esp,ebp mov esp,ebp
pop ebp pop ebp
ret ret
; 3d point - triple integer word coordinate
; vector - triple float dword coordinate
;----------------------in: --------------------------------
;------------------------ esi - pointer to 1st 3d point ---
;------------------------ edi - pointer to 2nd 3d point ---
;------------------------ ebx - pointer to result vector --
;---------------------- out : none ------------------------
make_vector_r: make_vector_r:
if Ext < SSE2 if Ext < SSE2
@ -194,17 +296,37 @@ cross_product:
fsubp ;st1 ,st fsubp ;st1 ,st
fstp dword [ebx+vec_z] fstp dword [ebx+vec_z]
ret ret
cross_aligned:
movaps xmm0,[esi]
movaps xmm1,[esi]
movaps xmm2,[edi]
movaps xmm3,[edi]
shufps xmm0,xmm0,00001001b
shufps xmm1,xmm1,00010010b
shufps xmm2,xmm2,00010010b
shufps xmm3,xmm3,00001001b
mulps xmm0,xmm2
mulps xmm1,xmm3
subps xmm0,xmm1
movaps [ebx],xmm0
ret
;----------------------- in: ------------------------------ ;----------------------- in: ------------------------------
;---------------------------- edi - pointer to vector ----- ;---------------------------- edi - pointer to vector -----
;----------------------- out : none ;----------------------- out : none
normalize_vector: normalize_vector:
if Ext >= SSE3 if Ext >= SSE2
movups xmm0,[edi] movups xmm0,[edi]
andps xmm0,[zero_hgst_dd] andps xmm0,[zero_hgst_dd]
movups xmm1,xmm0 movups xmm1,xmm0
mulps xmm0,xmm0 mulps xmm0,xmm0
haddps xmm0,xmm0 movhlps xmm2,xmm0
haddps xmm0,xmm0 addps xmm0,xmm2
movaps xmm2,xmm0
shufps xmm2,xmm2,11100101b
addps xmm0,xmm2
shufps xmm0,xmm0,0
; haddps xmm0,xmm0
; haddps xmm0,xmm0
rsqrtps xmm0,xmm0 rsqrtps xmm0,xmm0
mulps xmm0,xmm1 mulps xmm0,xmm1
movlps [edi],xmm0 movlps [edi],xmm0
@ -559,7 +681,7 @@ translate_points: ; just convert into integer; z coord still needed
; packsdw xmm0,xmm0 ; packsdw xmm0,xmm0
; movq [edi] ; movq [edi]
fld dword[esi] fld dword[esi]
fiadd [vect_x] fiadd word[vect_x]
fistp word[edi] fistp word[edi]
fld dword[esi+4] fld dword[esi+4]
fiadd [vect_y] fiadd [vect_y]

View File

@ -1,5 +1,5 @@
; Glass like rendering triangle by Maciej Guba. ; Glass like rendering triangle by Maciej Guba.
; http://macgub.hekko.pl, macgub3@wp.pl ; http://macgub.co.pl, macgub3@wp.pl
ROUND2 equ 10 ROUND2 equ 10
glass_tri: glass_tri:

View File

@ -1,7 +1,7 @@
; Bilinear filtering, real Phongs shading and glass like parallel. ; Bilinear filtering, real Phongs shading and glass like parallel.
; Thanks to authors of 3dica tutorial. ; Thanks to authors of 3dica tutorial.
; Implemented in FASM by Maciej Guba. ; Implemented in FASM by Maciej Guba.
; http://macgub.j.pl ; http://macgub.co.pl
ROUND2 equ 10 ROUND2 equ 10

View File

@ -1,6 +1,6 @@
; Real Phong's shading implemented if flat assembler ; Real Phong's shading implemented if flat assembler
; by Maciej Guba. ; by Maciej Guba.
; http://macgub.vxm.pl ; http://macgub.co.pl
ROUND2 equ 10 ROUND2 equ 10
real_phong_tri_z: real_phong_tri_z:

View File

@ -0,0 +1,688 @@
; Ray casted shadows
; by Maciej Guba.
; http://macgub.co.pl
ROUND2 equ 10
ray_shad:
;--- Procedure render triangle with ray casted shadow ---
;--- effect. Calc intersection with all triangles in ----
;--- everypixel. Its not real time process, especially --
;--- when many triangles are computed. ------------------
;------in - eax - x1 shl 16 + y1 ------------------------
;---------- ebx - x2 shl 16 + y2 ------------------------
;---------- ecx - x3 shl 16 + y3 ------------------------
;---------- edx - ptr to fur coords struct --------------
;---------- esi - pointer to stencil / Z-buffer, filled -
;-------------- with dword float variables, it masks --
;-------------- 'Z' position (coord) of every front ---
;-------------- pixel. --------------------------------
;---------- edi - pointer to screen buffer --------------
;---------- xmm0 - 1st normal vector --------------------
;---------- xmm1 - 2cond normal vector ------------------
;---------- xmm2 - 3rd normal vector --------------------
;---------- xmm3 - --------------------------------------
;---------- xmm4 - lo -> hi z1, z2, z3 coords -----------
;--------------- as dwords floats ---------------------
;---------- xmm5 - lo -> hi y_min, y_max, x_min, x_max --
;--------------- as dword integers --------------------
;-----------mm7 - current triangle index ---------------
;---------------------- stack - no parameters -----------
;--------------------------------------------------------
;----------------- procedure don't save registers !! ----
push ebp
mov ebp,esp
sub esp,1024
sub ebp,16
and ebp,0xfffffff0
.1_nv equ [ebp-16]
.2_nv equ [ebp-32]
.3_nv equ [ebp-48]
.l_v equ [ebp-64]
.z3 equ [ebp-72]
.z2 equ [ebp-76]
.z1 equ [ebp-80]
.x1 equ [ebp-82]
.y1 equ [ebp-84]
.x2 equ [ebp-86]
.y2 equ [ebp-88]
.x3 equ [ebp-90]
.y3 equ [ebp-92]
.Zbuf equ [ebp-96]
.x_max equ [ebp-100]
.x_min equ [ebp-104]
.y_max equ [ebp-108]
.y_min equ [ebp-112]
.screen equ [ebp-116]
.dx12 equ [ebp-120]
.dx13 equ [ebp-124]
.dx23 equ [ebp-128]
.dn12 equ [ebp-144]
.dn13 equ [ebp-160]
.dn23 equ [ebp-176]
.dz12 equ [ebp-180]
.dz13 equ [ebp-184]
.dz23 equ [ebp-188]
.cnv1 equ [ebp-208] ; current normal vectors
.cnv2 equ [ebp-240]
.cz2 equ [ebp-244]
.cz1 equ [ebp-248]
.tri_no equ [ebp-252]
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
shufps xmm4,xmm4,11100001b
movaps xmm6,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm6
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
shufps xmm4,xmm4,11011000b
movaps xmm6,xmm1
movaps xmm1,xmm2
movaps xmm2,xmm6
jmp .sort3
.sort2:
movaps .z1,xmm4
mov .y1,eax
mov .y2,ebx
mov .y3,ecx
movdqa .y_min,xmm5
if 1 ; check if at last only fragment
packssdw xmm5,xmm5 ; of triangle is in visable area
pshuflw xmm5,xmm5,11011000b
movdqu xmm7,.y3
movdqa xmm6,xmm5
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
movdqa xmm4,xmm7
pcmpgtw xmm7,xmm5
pcmpgtw xmm4,xmm6
pxor xmm7,xmm4
pmovmskb eax,xmm7
and eax,0x00aaaaaa
or eax,eax
jz .rpt_loop2_end
end if
movd .tri_no,mm7
movaps .1_nv,xmm0
movaps .2_nv,xmm1
movaps .3_nv,xmm2
; movaps .l_v,xmm3
mov .Zbuf,esi
mov .screen,edi
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .rpt_dx12_make
xorps xmm7,xmm7
mov dword .dx12,0
mov dword .dz12,0
movaps .dn12,xmm7
jmp .rpt_dx12_done
.rpt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx12,eax
cvtsi2ss xmm6,ebx
movss xmm5,.z2
rcpss xmm6,xmm6
subss xmm5,.z1
mulss xmm5,xmm6
movss .dz12,xmm5
shufps xmm6,xmm6,0
movaps xmm0,.2_nv
subps xmm0,.1_nv
mulps xmm0,xmm6
movaps .dn12,xmm0
; subps xmm3,xmm0
; mulps xmm3,xmm6
.rpt_dx12_done:
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .rpt_dx13_make
xorps xmm7,xmm7
mov dword .dx13,0
mov dword .dz13,0
movaps .dn13,xmm7
jmp .rpt_dx13_done
.rpt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx13,eax
cvtsi2ss xmm6,ebx
movss xmm5,.z3
rcpss xmm6,xmm6
subss xmm5,.z1
mulss xmm5,xmm6
movss .dz13,xmm5
movaps xmm0,.3_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
mulps xmm0,xmm6
movaps .dn13,xmm0
; mulps xmm0,xmm6
.rpt_dx13_done:
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .rpt_dx23_make
xorps xmm7,xmm7
mov dword .dx23,0
mov dword .dz23,0
movaps .dn23,xmm7
jmp .rpt_dx23_done
.rpt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx23,eax
cvtsi2ss xmm6,ebx
movss xmm5,.z3
rcpss xmm6,xmm6
subss xmm5,.z2
mulss xmm5,xmm6
movss .dz23,xmm5
movaps xmm0,.3_nv
subps xmm0,.2_nv
shufps xmm6,xmm6,0
mulps xmm0,xmm6
movaps .dn23,xmm0
; mulps xmm0,xmm6
.rpt_dx23_done:
movsx eax,word .x1
shl eax,ROUND2
mov ebx,eax
mov ecx,.z1
mov .cz1,ecx
mov .cz2,ecx
movaps xmm0,.1_nv
movaps .cnv1,xmm0
movaps .cnv2,xmm0
mov edi,.screen
mov esi,.Zbuf
movsx ecx,word .y1
cmp cx,.y2
jge .rpt_loop1_end
.rpt_loop1:
pushad
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
; movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
movd mm7,.tri_no
call ray_shd_l
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
; fur x,y
movss xmm2,.cz1
movss xmm3,.cz2
shufps xmm4,xmm4,01001110b
addps xmm0,.dn13
addps xmm1,.dn12
addss xmm2,.dz13
addss xmm3,.dz12
add eax,.dx13
add ebx,.dx12
shufps xmm4,xmm4,01001110b
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
add ecx,1
cmp cx,.y2
jl .rpt_loop1
.rpt_loop1_end:
movsx ecx,word .y2
cmp cx,.y3
jge .rpt_loop2_end
movsx ebx,word .x2 ; eax - cur x1
shl ebx,ROUND2 ; ebx - cur x2
push dword .z2
pop dword .cz2
movaps xmm0,.2_nv
movaps .cnv2,xmm0
mov edi,.screen
mov esi,.Zbuf
.rpt_loop2:
pushad
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
; movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
movd mm7,.tri_no
call ray_shd_l
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
addps xmm0,.dn13
addps xmm1,.dn23
addss xmm2,.dz13
addss xmm3,.dz23
add eax,.dx13
add ebx,.dx23
addps xmm4,xmm6
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
add ecx,1
cmp cx,.y3
jl .rpt_loop2
.rpt_loop2_end:
add esp,1024
pop ebp
ret
align 16
ray_shd_l:
; in:
; xmm0 - normal vector 1
; xmm1 - normal vect 2
; xmm3 - lo -> hi z1, z2 coords as dwords floats
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
; as dword integers
; xmm4 - ----
; mm7 - current triangle index
; eax - x1
; ebx - x2
; ecx - y
; edx - -----
; edi - screen buffer
; esi - z buffer / stencil buffer filled with dd floats
push ebp
mov ebp,esp
sub esp,320
sub ebp,16
and ebp,0xfffffff0
.n1 equ [ebp-16]
.n2 equ [ebp-32]
.lv equ [ebp-48]
.lx1 equ [ebp-52]
.lx2 equ [ebp-56]
.z2 equ [ebp-60]
.z1 equ [ebp-64]
.screen equ [ebp-68]
.zbuff equ [ebp-72]
.x_max equ [ebp-74]
.x_min equ [ebp-76]
.y_max equ [ebp-78]
.y_min equ [ebp-80]
.dn equ [ebp-96]
.dz equ [ebp-100]
.y equ [ebp-104]
; .cur_tri equ [ebp-108]
.cnv equ [ebp-128]
.Rlen equ [ebp-128-16]
.r1 equ [ebp-128-32]
.vect_t equ [ebp-128-48]
.cur_tri equ [ebp-128-64]
; .p3t equ [ebp-128-80]
.nray equ [ebp-128-96]
.final_col equ [ebp-128-112]
.aabb_mask equ dword[ebp-128-112-4]
mov .y,ecx
movdqa xmm4,xmm2
packssdw xmm2,xmm2
movq .y_min,xmm2
cmp cx,.y_min
jl .end_rp_line
cmp cx,.y_max
jge .end_rp_line ;
cmp eax,ebx
je .end_rp_line
jl @f
xchg eax,ebx
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
shufps xmm3,xmm3,11100001b
@@:
movd .cur_tri,mm7
cmp ax,.x_max
jge .end_rp_line
cmp bx,.x_min
jle .end_rp_line
; movaps .lv,xmm4
andps xmm0,[zero_hgst_dd]
andps xmm1,[zero_hgst_dd]
movaps .n1,xmm0
movaps .n2,xmm1
mov .lx1,eax
mov .lx2,ebx
movlps .z1,xmm3
sub ebx,eax
cvtsi2ss xmm7,ebx
rcpss xmm7,xmm7
shufps xmm7,xmm7,0
subps xmm1,xmm0
mulps xmm1,xmm7
movaps .dn,xmm1
shufps xmm3,xmm3,11111001b
subss xmm3,.z1
mulss xmm3,xmm7
movss .dz,xmm3
subps xmm6,xmm5
mulps xmm6,xmm7
mov ebx,.lx1
cmp bx,.x_min ; clipping on function4
jge @f
movzx eax,word .x_min
sub eax,ebx
cvtsi2ss xmm7,eax
shufps xmm7,xmm7,0
mulss xmm3,xmm7
mulps xmm1,xmm7
mulps xmm6,xmm7
addss xmm3,.z1
addps xmm1,.n1
addps xmm6,xmm5
movsx eax,word .x_min
movss .z1,xmm3
movaps .n1,xmm1
mov dword .lx1,eax
@@:
movzx eax,word .x_max
cmp .lx2,eax
jl @f
mov .lx2,eax
@@:
movzx eax,word[xres_var]
mul dword .y
add eax,.lx1
mov .zbuff,esi
mov .screen,edi
shl eax,2
add edi,eax
add esi,eax
mov ecx,.lx2
sub ecx,.lx1
movd xmm0,[vect_x]
punpcklwd xmm0,[the_zero]
cvtdq2ps xmm0,xmm0
movaps .vect_t,xmm0
.ddraw:
xorps xmm0,xmm0
movss xmm2,.z1
movss xmm5,.z1
movaps .final_col,xmm0
addss xmm2,[f1]
subss xmm5,[f1]
cmpnltss xmm2,dword[esi]
cmpnltss xmm5,dword[esi]
pxor xmm2,xmm5
movd eax,xmm2
or eax,eax
jz .skips
movaps xmm7,.n1
andps xmm7,[zero_hgst_dd]
mulps xmm7,xmm7 ; normalize
haddps xmm7,xmm7
haddps xmm7,xmm7
rsqrtps xmm7,xmm7
mulps xmm7,.n1
movaps .cnv,xmm7
mov ebx,point_light_coords
mov edx,lights_aligned
xor eax,eax
.nx_light:
pushad
cvtsi2ss xmm0,.lx1
cvtsi2ss xmm1,.y
movss xmm2,.z1
movlhps xmm0,xmm1
shufps xmm0,xmm2,11001000b
subps xmm0,[ebx] ; xmm0 - ray end, -> current vertex
movaps xmm3,[ebx]
andps xmm0,[zero_hgst_dd]
movaps xmm1,xmm0
mulps xmm0,xmm0
haddps xmm0,xmm0
haddps xmm0,xmm0
sqrtps xmm0,xmm0
movss .Rlen,xmm0
rcpps xmm0,xmm0
mulps xmm0,xmm1 ; xmm0 - normalized ray vector
andps xmm0,[zero_hgst_dd]
movaps .nray,xmm0
movaps .r1,xmm3 ; ray orgin
if 0
movaps xmm1,xmm3
call calc_bounding_box
mov .aabb_mask,eax
end if
mov edi,[triangles_ptr]
xor ecx,ecx
.nx_tri: ; next triangle
cmp ecx,.cur_tri ; prevent self shadowing
je .skipp
if 0
mov edi,ecx
imul edi,[i12]
add edi,[triangles_ptr]
mov eax,[edi]
mov ebx,[edi+4]
mov edx,[edi+8]
imul eax,[i12]
imul ebx,[i12]
imul edx,[i12]
add eax,[points_ptr]
add ebx,[points_ptr]
add edx,[points_ptr]
movups xmm2,[eax]
movups xmm3,[ebx]
movups xmm4,[edx]
andps xmm2,[sign_mask]
andps xmm3,[sign_mask]
andps xmm4,[sign_mask]
movmskps ebx,xmm4
cmpeqps xmm2,xmm3
cmpeqps xmm3,xmm4
andps xmm2,xmm3
movmskps eax,xmm2
and eax,111b
and ebx,111b
cmp eax,111b
jne @f
bt .aabb_mask,ebx
jnc .skipp
@@:
end if
mov edi,ecx
imul edi,[i12]
add edi,[triangles_ptr]
mov eax,[edi]
mov ebx,[edi+4]
mov edx,[edi+8]
imul eax,[i12]
imul ebx,[i12]
imul edx,[i12]
add eax,[points_rotated_ptr]
add ebx,[points_rotated_ptr]
add edx,[points_rotated_ptr]
movups xmm2,[eax]
movups xmm3,[ebx]
movups xmm4,[edx]
addps xmm2,.vect_t
addps xmm3,.vect_t
addps xmm4,.vect_t
;intersect_tri: procs header
; in:
; xmm0 - ray direction ; should be normalized
; xmm1 - ray orgin
; xmm2 - tri vert1
; xmm3 - tri vert2
; xmm4 - tri vert3
; if eax = 1 - intersction with edge
; xmm6 - edge lenght
; if eax = 0 - intersect with ray (classic)
; out:
; eax = 1 - intersection occured
; xmm0 - float lo -> hi = t, v, u, ...
movss xmm6,.Rlen
movaps xmm0,.nray
movaps xmm1,.r1
subss xmm6,[the_one]
mov eax,1
push ecx
call intersect_tri
pop ecx
cmp eax,1
je .inter
.skipp:
.skp:
inc ecx
cmp ecx,[triangles_count_var]
jnz .nx_tri
; jz .do_process
; comiss xmm0,.Rlen
; jl .inter
popad
.do_process:
movaps xmm5,.nray ;[edx]
andps xmm5,[zero_hgst_dd] ; global
mulps xmm5,.cnv ;.lv ; last dword should be zeroed
; andps xmm5,[sign_z] ; global
haddps xmm5,xmm5
haddps xmm5,xmm5
andps xmm5,[abs_mask] ; global
movaps xmm7,xmm5
mulps xmm7,xmm7
mulps xmm7,xmm7
mulps xmm5,[edx+16]
mulps xmm7,xmm7
mulps xmm7,xmm7
mulps xmm7,[edx+48]
addps xmm5,xmm7
minps xmm5,[mask_255f] ; global
maxps xmm5,.final_col ; addps maxps
movaps .final_col,xmm5
jmp .nx_loop
.inter:
popad
.nx_loop:
; add edx,64 ; unncomment to achive 3 lights
; add ebx,16
; cmp edx,lights_aligned_end ; global
; jnz .nx_light
movaps xmm1,.final_col
cvtps2dq xmm1,xmm1
packssdw xmm1,xmm1
packuswb xmm1,xmm1
movd [edi],xmm1
.skips:
movaps xmm0,.n1
movss xmm2,.z1
add edi,4
add esi,4
add dword .lx1,1
addps xmm0,.dn
addss xmm2,.dz
movaps .n1,xmm0
movss .z1,xmm2
dec ecx
jnz .ddraw
.end_rp_line:
add esp,320
pop ebp
ret

View File

@ -1,3 +1,200 @@
if Ext > SSE2
;--------------------------------------------------------------------
init_point_lights:
; mov eax,1000
; cvtsi2ss xmm1,eax
; shufps xmm1,xmm1,11000000b
; mov esi,lights_aligned
; mov edi,point_light_coords
; mov ecx,3
; @@:
; movaps xmm0,[esi]
; addps xmm0,[f05xz]
; mulps xmm0,xmm1
; movaps [edi],xmm0
; add esi,64
; add edi,16
; loop @b
mov ecx,3
mov edi,point_light_coords
@@:
push ecx
xor ecx,ecx
movzx edx,word[size_x_var]
call random
cvtsi2ss xmm0,eax
movss [edi],xmm0
xor ecx,ecx
movzx edx,word[size_x_var]
call random
cvtsi2ss xmm0,eax
movss [edi+4],xmm0
; movzx ebx,word[size_x_var]
; shl ebx,2
; neg ebx
mov ecx,-1900
; sub ecx,100
mov edx,-600
call random
cvtsi2ss xmm0,eax
movss [edi+8],xmm0
; mov dword[edi+8],-1700.0
mov [edi+12],dword 0
add edi,16
pop ecx
loop @b
ret
;------------------------------------------------------------------
intersect_tri: ; Moeller-Trumbore method
; in:
; xmm0 - ray direction ; should be normalized
; xmm1 - ray orgin
; xmm2 - tri vert1
; xmm3 - tri vert2
; xmm4 - tri vert3
; if eax = 1 - intersction with edge
; xmm6 - edge lenght
; if eax = 0 - intersect with ray (classic)
; out:
; eax = 1 - intersection occured
; xmm0 - float lo -> hi = t, v, u, ...
push ebp
mov ebp,esp
and ebp,-16
sub esp,220
.dir equ [ebp-16]
.origin equ [ebp-32]
.ta equ [ebp-48]
.tb equ [ebp-64]
.tc equ [ebp-80]
.tvec equ [ebp-96]
.pvec equ [ebp-112]
.qvec equ [ebp-128]
.e1 equ [ebp-128-16]
.ift equ dword[ebp-152]
.invdet equ [ebp-156]
.det equ [ebp-160]
.ed_l equ [ebp-164]
.u equ [ebp-168]
.v equ [ebp-172]
.t equ [ebp-176]
.e2 equ [ebp-192]
movaps .dir,xmm0
movaps .origin,xmm1
movaps .ta,xmm2
movaps .tb,xmm3
movaps .tc,xmm4
mov .ift,eax
movss .ed_l,xmm6
subps xmm3,xmm2
subps xmm4,xmm2
andps xmm3,[zero_hgst_dd]
andps xmm4,[zero_hgst_dd]
movaps .e1,xmm3
movaps .e2,xmm4
lea esi,.dir
lea edi,.e2
lea ebx,.pvec
call cross_aligned
movaps xmm0,.e1
mulps xmm0,.pvec
; andps xmm0,[zero_hgst_dd]
haddps xmm0,xmm0
haddps xmm0,xmm0
movss .det,xmm0
; cmpnless xmm0,[eps]
; movd eax,xmm0
; or eax,eax
; jz @f
comiss xmm0,[eps]
jl @f
rcpss xmm0,.det
movss .invdet,xmm0
movaps xmm0,.origin
subps xmm0,.ta
andps xmm0,[zero_hgst_dd]
movaps .tvec,xmm0
mulps xmm0,.pvec
haddps xmm0,xmm0
haddps xmm0,xmm0
mulss xmm0,.invdet
movss xmm1,xmm0
movss .u,xmm0
cmpnless xmm1,[epsone]
cmpnless xmm0,[epsminus]
pxor xmm1,xmm0
movd eax,xmm1
or eax,eax
jz @f
lea esi,.tvec
lea edi,.e1
lea ebx,.qvec
call cross_aligned
movaps xmm0,.dir
mulps xmm0,.qvec
haddps xmm0,xmm0
haddps xmm0,xmm0
mulss xmm0,.invdet
movss .v,xmm0
movss xmm1,xmm0
addss xmm1,.u
cmpnless xmm1,[epsone]
cmpnless xmm0,[epsminus]
pxor xmm1,xmm0
movd eax,xmm1
or eax,eax
jz @f
movaps xmm1,.e2
mulps xmm1,.qvec
haddps xmm1,xmm1
haddps xmm1,xmm1
mulss xmm1,.invdet
movss .t,xmm1
; cmpnless xmm1,[eps]
; movmskps eax,xmm1
; test eax,1
; jz @f
comiss xmm1,[eps]
jl @f
mov eax,1
cmp .ift,0
je .end ; ok intersect occured, no edge cause
movss xmm0,.t ; else check with edge lenght
; movss xmm1,.t
cmpnless xmm0,[eps]
cmpnless xmm1,.ed_l
xorps xmm0,xmm1
movd ebx,xmm0
or ebx,ebx
jz @f
; mov eax,1
; movaps xmm0,.t
jmp .end
@@:
xor eax,eax
.end:
movaps xmm0,.t
add esp,220
pop ebp
ret
end if
;=============================================================== ;===============================================================
do_edges_list: do_edges_list:
push ebp push ebp
@ -223,13 +420,18 @@ ret
do_sinus: do_sinus:
;in - ax - render mode
.x equ [ebp-8] .x equ [ebp-8]
.y equ [ebp-12] .y equ [ebp-12]
.new_y equ [ebp-16] .new_y equ [ebp-16]
.temp equ [ebp-20] .temp equ [ebp-20]
.dr_f equ word[ebp-22]
push ebp push ebp
mov ebp,esp mov ebp,esp
sub esp,64 sub esp,30
mov .dr_f,ax
mov dword .x,0 mov dword .x,0
mov dword .y,0 mov dword .y,0
mov esi,[screen_ptr] mov esi,[screen_ptr]
@ -243,53 +445,20 @@ do_sinus:
cld cld
rep stosd rep stosd
pop edi pop edi
; movzx eax,[sinus_flag]
; mov edx,10
; mul edx
; mov [sin_amplitude],eax
; mov [sin_frq],eax
fninit fninit
;if Ext = SSE2
; movups xmm1,[const0123] ; xmm1 - init values
; mov eax,0x000000ff
; movd xmm2,eax
; shufps xmm2,xmm2,0 ; xmm2 - mask value
; mov eax,4
; movd xmm3,eax
; shufps xmm3,xmm3,0
.again: .again:
if 0
fild dword .x
fidiv [sin_frq]
fsin
fimul [sin_amplitude]
fiadd dword .y
fistp dword .new_y
else
fild dword .x fild dword .x
fmul [sin_frq] fmul [sin_frq]
fistp dword .temp fistp dword .temp
mov eax, .temp mov eax, .temp
; mov bx, [angle_x]
; add bx, [angle_y]
; movzx ebx,bx
; shr ebx,1 ; change phase
; add eax,ebx
and eax, 0x000000ff and eax, 0x000000ff
; cdq
; mul [sin_frq]
; and eax,0x000000ff
; and ax,0x00ff
; cwde
fld dword [sin_tab+eax*4] fld dword [sin_tab+eax*4]
fimul dword [sin_amplitude] fimul dword [sin_amplitude]
fiadd dword .y fiadd dword .y
fistp dword .new_y fistp dword .new_y
end if
mov eax,.new_y mov eax,.new_y
or eax,eax or eax,eax
jl .skip jl .skip
@ -298,20 +467,19 @@ end if
jg .skip jg .skip
movzx edx,word[size_x_var] movzx edx,word[size_x_var]
mul edx mul edx
; shl eax,9
add eax,dword .x add eax,dword .x
lea ebx,[eax*3] lea ebx,[eax*3]
cmp [dr_flag],12 ; 32 bit col cause cmp .dr_f,12 ; 32 bit col cause
jl @f jb @f
add ebx,eax add ebx,eax
@@: @@:
mov eax,[esi] mov eax,[esi]
mov [edi+ebx],eax mov [edi+ebx],eax
.skip: .skip:
add esi,3 add esi,3
cmp [dr_flag],12 cmp .dr_f,12
jl @f jb @f
inc esi inc esi
@@: @@:
inc dword .x inc dword .x
@ -330,8 +498,8 @@ end if
movzx ecx,word[size_x_var] movzx ecx,word[size_x_var]
movzx eax,word[size_y_var] movzx eax,word[size_y_var]
imul ecx,eax imul ecx,eax
cmp [dr_flag],12 cmp .dr_f,12
jge @f jae @f
lea ecx,[ecx*3] lea ecx,[ecx*3]
shr ecx,2 shr ecx,2
; mov ecx,SIZE_X*SIZE_Y*3/4 ; mov ecx,SIZE_X*SIZE_Y*3/4
@ -377,7 +545,19 @@ draw_dots:
ret ret
do_emboss: ; sse2 version only do_emboss: ; sse2 version only
; in ax - render model
push ebp
mov ebp,esp
sub esp,4
.dr_mod equ word[ebp-2]
mov .dr_mod,ax
if Ext >= SSE2 if Ext >= SSE2
movzx ecx,[bumps_deep_flag] movzx ecx,[bumps_deep_flag]
inc ecx inc ecx
call blur_screen ;blur n times call blur_screen ;blur n times
@ -392,20 +572,20 @@ if Ext >= SSE2
sub ecx,ebx sub ecx,ebx
mov esi,[screen_ptr] mov esi,[screen_ptr]
mov edi,[Zbuffer_ptr] mov edi,[Zbuffer_ptr]
cmp [dr_flag],12 cmp .dr_mod,11
jge @f jge @f
lea ebx,[ebx*3] lea ebx,[ebx*3]
jmp .f jmp .gf
@@: @@:
shl ebx,2 shl ebx,2
.f: .gf:
mov edx,esi mov edx,esi
add esi,ebx add esi,ebx
lea ebx,[ebx+esi] lea ebx,[ebx+esi]
pxor xmm0,xmm0 pxor xmm0,xmm0
push eax push eax
.emb: .emb:
cmp [dr_flag],12 cmp .dr_mod ,11
jge @f jge @f
movlps xmm1,[esi+3] movlps xmm1,[esi+3]
movhps xmm1,[esi+6] movhps xmm1,[esi+6]
@ -442,14 +622,7 @@ if Ext >= SSE2
pmaxsw xmm1,xmm7 pmaxsw xmm1,xmm7
pmaxsw xmm1,xmm6 pmaxsw xmm1,xmm6
if 0
movaps xmm7,xmm3
movaps xmm6,xmm3
psrlq xmm7,2*8
psrlq xmm6,4*8
pmaxsw xmm3,xmm7
pmaxsw xmm3,xmm6
end if
pmaxsw xmm1,xmm3 pmaxsw xmm1,xmm3
movd eax,xmm1 movd eax,xmm1
@ -469,7 +642,7 @@ end if
mov eax,[eax] mov eax,[eax]
mov [edi+4],eax mov [edi+4],eax
cmp [dr_flag],12 cmp .dr_mod,11
jl @f jl @f
add esi,2 add esi,2
add ebx,2 add ebx,2
@ -487,7 +660,7 @@ end if
pop ecx ;,eax pop ecx ;,eax
mov edi,[screen_ptr] mov edi,[screen_ptr]
mov esi,[Zbuffer_ptr] mov esi,[Zbuffer_ptr]
cmp [dr_flag],12 cmp .dr_mod,11
jge .e jge .e
@@: @@:
movsd movsd
@ -498,6 +671,11 @@ end if
end if end if
mov esp,ebp
pop ebp
ret ret
;align 16 ;align 16

View File

@ -737,22 +737,33 @@ ret
blur_screen: ;blur n times ; blur or fire blur_screen: ;blur n times ; blur or fire
;in - ecx times count ;in - ecx times count
;.counter equ dword[esp-4] ; ax - render mode
.counter1 equ dword[esp-8]
.val equ dword[ebp-4]
.dr_model equ word[ebp-6]
.fire equ dword[ebp-10]
if Ext>=SSE2 if Ext>=SSE2
push ebp push ebp
mov ebp,esp mov ebp,esp
push dword 0x01010101 sub esp,10
movss xmm5,[esp] ; xorps xmm5,xmm5
; or edx,edx
; jz @f
mov .val,0x01010101
movss xmm5,.val
shufps xmm5,xmm5,0 shufps xmm5,xmm5,0
@@:
mov .dr_model,ax
.again_blur: .again_blur:
push ecx push ecx
mov edi,[screen_ptr] mov edi,[screen_ptr]
movzx ecx,word[size_x_var] ;SIZE_X*3/4 movzx ecx,word[size_x_var] ;SIZE_X*3/4
cmp .dr_model,11
cmp [dr_flag],12
jge @f jge @f
lea ecx,[ecx*3+1] lea ecx,[ecx*3+3]
shr ecx,2 shr ecx,2
@@: @@:
@ -763,11 +774,11 @@ if Ext>=SSE2
movzx ecx,word[size_y_var] movzx ecx,word[size_y_var]
sub ecx,3 sub ecx,3
imul ecx,ebx imul ecx,ebx
cmp [dr_flag],12 ; 32 bit per pix cause cmp .dr_model,11 ; 32 bit per pix cause
jge @f jge @f
lea ecx,[ecx*3] lea ecx,[ecx*3]
shr ecx,4 shr ecx,4
lea ebx,[ebx *3] lea ebx,[ebx*3]
jmp .blr jmp .blr
@@: @@:
@ -781,7 +792,7 @@ if Ext>=SSE2
mov ecx,edi mov ecx,edi
sub ecx,ebx sub ecx,ebx
movups xmm1,[ecx] movups xmm1,[ecx]
cmp [dr_flag],12 cmp .dr_model,12
jge @f jge @f
movups xmm2,[edi-3] movups xmm2,[edi-3]
movups xmm3,[edi+3] movups xmm3,[edi+3]
@ -802,9 +813,9 @@ if Ext>=SSE2
end if end if
xor eax,eax xor eax,eax
movzx ecx,word[size_x_var] movzx ecx,word[size_x_var]
cmp [dr_flag],12 cmp .dr_model,11
jge @f jge @f
lea ecx,[ecx*3] lea ecx,[ecx*3+3]
shr ecx,2 shr ecx,2
@@: @@:
; mov ecx,SIZE_X*3/4 ; mov ecx,SIZE_X*3/4

View File

@ -1,11 +1,14 @@
; DATA AREA ************************************ ; DATA AREA ************************************
if Ext > SSE2
isSSE3 db 1
end if
i3 dw 3 i3 dw 3
i6 dd 6 i6 dd 6
i12 dd 12 i12 dd 12
i36 dd 36 i36 dd 36
i256 dw 256 i256 dw 256
i255d dd 255 i255d dd 255
f1:
dot_max dd 1.0 ; dot product max and min dot_max dd 1.0 ; dot product max and min
dot_min dd 0.0 dot_min dd 0.0
env_const dd 1.05 env_const dd 1.05
@ -25,7 +28,7 @@
y_offset dw SIZE_Y / 2 y_offset dw SIZE_Y / 2
z_offset dw 0 z_offset dw 0
rsscale dd 175.0 ; next real scale rsscale dd 175.0 ; next real scale
vect_x dw SIZE_X / 2 vect_x: dw SIZE_X / 2
vect_y dw SIZE_Y / 2 vect_y dw SIZE_Y / 2
vect_z dw 0 vect_z dw 0
size_y_var: size_y_var:
@ -110,9 +113,9 @@
dd ? dd ?
db 7 db 7
db 'catmull ' db 'ray shadow'
db 2 db 2
catmull_flag db 1 ray_shd_flag db 0
dd onoff_f dd onoff_f
db 8 db 8
@ -165,7 +168,7 @@ emboss_flag db 0
db 16 db 16
db 'fire ' db 'fire '
db 3 db 2
fire_flag db 0 fire_flag db 0
dd blur_f dd blur_f
@ -350,7 +353,7 @@ base_vector:
if Ext=SSE3 if Ext=SSE3
db ' (SSE3)' db ' (SSE3)'
end if end if
db ' 0.073',0 db ' 0.074',0
labellen: labellen:
STRdata db '-1 ' STRdata db '-1 '
lab_vert: lab_vert:
@ -425,8 +428,43 @@ lightsend:
;if Ext >= SSE3
align 16 align 16
point_light_coords:
dd 50.0
dd 50.0
dd -215.0
dd 0.0
dd 815.0
dd 815.0
dd -215.0
dd 0.0
dd 1500.0
dd 1500.0
dd -215.0
dd 0.0
if 0
aabb1:
.0 dd 1.0,1.0,1.0,0
.1 dd -1.0,1.0,1.0,0
.2 dd 1.0,-1.0,1.0,0
.3 dd -1.0,-1.0,1.0,0
.4 dd 1.0,1.0,-1.0,0
.5 dd -1.0,1.0,-1.0,0
.6 dd 1.0,-1.0,-1.0,0
.7 dd -1.0,-1.0,-1.0,0
end if
sign_mask:
times 4 dd 0x80000000
f05xz: dd 0, 0, - 1.0 ,0
sign_z:
dd -1,-1,0x7fffffff,0
abs_mask: abs_mask:
dd 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff dd 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
emboss_bias: emboss_bias:
@ -442,7 +480,11 @@ align 16
times 4 dd 510.0 times 4 dd 510.0
the_one: the_one:
times 4 dd 1.0 times 4 dd 1.0
aprox dd 0.0001
eps: times 4 dd 0.00000
epsone dd 1.0001
aprox dd 0.0001
epsminus dd -0.0001
file_info: file_info:
@ -463,22 +505,13 @@ SourceFile:
workarea rb 180 workarea rb 180
EndFile dd ? EndFile dd ?
align 8 align 8
sinbeta dd ?;+32 sinbeta dd ?;
cosbeta dd ? cosbeta dd ?
xsub dw ? xsub dw ?
zsub dw ?;+40 zsub dw ?
ysub dw ? ysub dw ?
xx1 dw ?
yy1 dw ?
zz1 dw ?;+48 xx1 + 4
xx2 dw ?
yy2 dw ?
zz2 dw ? ; xx1 + 10
xx3 dw ?;+56
yy3 dw ?
zz3 dw ? ; xx1 + 16
col1 dd ? col1 dd ?
col2 dd ? col2 dd ?
col3 dd ? col3 dd ?
@ -487,13 +520,9 @@ align 8
points_count_var dd ? ; points_count_var dd ? ;
triangles_count_var dd ? ; dont change order triangles_count_var dd ? ; dont change order
edges_count dd ? ; edges_count dd ? ;
tex_points_ptr dd ?
point_index1 dd ? ;-\
point_index2 dd ? ; } don't change order
point_index3 dd ? ;-/
temp_col dw ? temp_col dw ?
temp1 dd ? ; > dont change
temp2 dd ? ; > order
high dd ? high dd ?
rand_seed dw ? rand_seed dw ?
align 8 align 8
@ -510,18 +539,14 @@ align 8
matrix rb 36 matrix rb 36
cos_tab rd 360 cos_tab rd 360
sin_tab rd 360 sin_tab rd 360
align 16 align 16
lights_aligned:
lights_aligned_end = $ + 16 * 12
rb 16 * 12
points_count = 180000/6*3
triangles_count = 180000 / 6 ;($-triangles)/6
align 16
label trizdd dword
label trizdq qword
triangles_with_z rw triangles_count*4 + 2 ; triangles triple dw + z position
align 16 align 16
vectors rb 24 vectors rb 24
align 16 align 16
bumpmap rb TEXTURE_SIZE + 1 bumpmap rb TEXTURE_SIZE + 1
align 16 align 16
@ -535,25 +560,19 @@ align 16
align 16 align 16
color_map rb (TEXTURE_SIZE +100) * 3 color_map rb (TEXTURE_SIZE +100) * 3
align 16 align 16
tex_points rb points_count * 4 ; bump_map and texture coords ; tex_points rb points_count * 4 ; bump_map and texture coords
; each point word x, word y ; ; each point word x, word y
align 16 ;align 16
lights_aligned: ; lights_aligned:
lights_aligned_end = $ + 16 * 12 ; lights_aligned_end = $ + 16 * 12
rb 16 * 12 ; rb 16 * 12
if Ext >= SSE2 if Ext >= SSE2
sse_repository rb 1024 sse_repository rb 1024
end if end if
; SourceFile: ; source file temporally in screen area
; workarea dd ?
; screen rb SIZE_X * SIZE_Y * 3 ; screen buffer
;align 16
; Z_buffer rb SIZE_X * SIZE_Y * 4
procinfo: procinfo:
rb 1024 ; process info rb 2048 ; process info
I_Param rb 256 I_Param rb 256
memStack: memStack:
rb 2000 rb 2000

View File

@ -1,3 +1,8 @@
View3ds 0.073 - may 2021
1. I introduced procedure for searching nonredundand edges.
2. Writing some info about object: vertices, triangles unique edges
count.
-----------------------------------------------------------------------------------
View3ds 0.072 - march 2021 View3ds 0.072 - march 2021
1. New displaying model - texturing with bilinear filtering and transparency 1. New displaying model - texturing with bilinear filtering and transparency

View File

@ -1,20 +1,16 @@
View3ds 0.073 - tiny viewer to .3ds and .asc files with several graphics View3ds 0.074 - tiny viewer to .3ds and .asc files with several graphics
effects implementation. effects implementation.
What's new? What's new?
1. I introduced procedure for searching nonredundand edges. 1. Fixed emboss bug in grd lines displaying model.
2. Writing some info about object: vertices, triangles unique edges 2. Grd line exceedes screen problem fix.
count. 3. New rendering model - ray casted shadows and appropiate button to
set 'on' this option. Note that is non real time model, especially when
complex object is computed. I took effort to introduce accelerating
structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled
for now - seems to work incorrect(slow).
1. New displaying model - texturing with bilinear filtering and transparency
simultanusly. Note that filtering is done only inside polygon. To better
quality of image there is a need to use floats coordinates of texture to pass
as arguments to single triangle rendering proc.
2. Optimizations.
3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and
transparented texturing with filtering rendering models are disabled.
Buttons description: Buttons description:
1. rotary: choosing rotary axle: x, y, x+y. 1. rotary: choosing rotary axle: x, y, x+y.
2. shd. model: choosing shading model: flat, grd (smooth), env (spherical 2. shd. model: choosing shading model: flat, grd (smooth), env (spherical
@ -26,7 +22,7 @@ Buttons description:
ptex (real Phong + texturing + transparency). ptex (real Phong + texturing + transparency).
3. speed: idle, full. 3. speed: idle, full.
4,5. zoom in, out: no comment. 4,5. zoom in, out: no comment.
6. catmull: disabled 6. ray shadow: calc ray casted shadows.
7. culling: backface culling on/ off. 7. culling: backface culling on/ off.
8. rand. light: Randomize 3 unlinear lights( so called Phong's illumination). 8. rand. light: Randomize 3 unlinear lights( so called Phong's illumination).
9. Blur: blur N times; N=0,1,2,3,4,5 9. Blur: blur N times; N=0,1,2,3,4,5
@ -50,4 +46,4 @@ Buttons description:
decrease whole handlers count by enable culling (using appropriate button) - some decrease whole handlers count by enable culling (using appropriate button) - some
back handlers become hidden. back handlers become hidden.
Maciej Guba V 2021 Maciej Guba IX 2021

File diff suppressed because it is too large Load Diff