diff --git a/programs/demos/3DS/3DMATH.INC b/programs/demos/3DS/3DMATH.INC index 5b26dc288f..028df61912 100644 --- a/programs/demos/3DS/3DMATH.INC +++ b/programs/demos/3DS/3DMATH.INC @@ -11,23 +11,6 @@ vec_z equ 8 ;------------------------ edi - pointer to 2nd 3d point --- ;------------------------ ebx - pointer to result vector -- ;---------------------- out : none ------------------------ -if 0 -make_vector: - fninit - fild word[edi+x3d] ;edi+x3d - fisub word[esi+x3d] ;esi+x3d - fstp dword[ebx+vec_x] - - fild word[edi+y3d] - fisub word[esi+y3d] - fstp dword[ebx+vec_y] - - fild word[edi+z3d] - fisub word[esi+z3d] - fstp dword[ebx+vec_z] - -ret -end if reverse_mx_3x3: ; esi - source matrix ; edi - desired reversed matrix @@ -215,6 +198,20 @@ ret ;---------------------------- edi - pointer to vector ----- ;----------------------- out : none normalize_vector: +if Ext >= SSE3 + movups xmm0,[edi] + andps xmm0,[zero_hgst_dd] + movups xmm1,xmm0 + mulps xmm0,xmm0 + haddps xmm0,xmm0 + haddps xmm0,xmm0 + rsqrtps xmm0,xmm0 + mulps xmm0,xmm1 + movlps [edi],xmm0 + movhlps xmm0,xmm0 + movss [edi+8],xmm0 +else + fninit fld dword [edi+vec_x] fmul st, st @@ -244,6 +241,7 @@ normalize_vector: fstp dword [edi+vec_y] fdivr dword [edi+vec_z] fstp dword [edi+vec_z] +end if ret ;------------------in: ------------------------- ;------------------ esi - pointer to 1st vector @@ -252,6 +250,16 @@ ret ;------------------ st0 - dot-product dot_product: fninit +;if Ext >=SSE3 +; movups xmm0,[esi] +; movups xmm1,[edi] +; andps xmm0,[zero_hgst_dd] +; mulps xmm0,xmm1 +; haddps xmm0,xmm0 +; haddps xmm0,xmm0 +; movss [esp-4],xmm0 +; fld dword[esp-4] +;else fld dword [esi+vec_x] fmul dword [edi+vec_x] fld dword [esi+vec_y] @@ -260,6 +268,7 @@ dot_product: fmul dword [edi+vec_z] faddp faddp +;end if ret ; DOS version Coded by Mikolaj Felix aka Majuma diff --git a/programs/demos/3DS/3STENCIL.INC b/programs/demos/3DS/3STENCIL.INC new file mode 100644 index 0000000000..b9e3ab55a7 --- /dev/null +++ b/programs/demos/3DS/3STENCIL.INC @@ -0,0 +1,329 @@ +ROUND2 equ 10 + +stencil_tri: +; procedure calculate triangle in stencil buffer +; ----------------in - eax - x1 shl 16 + y1 ---------------------- +; -------------------- ebx - x2 shl 16 + y2 ---------------------- +; -------------------- ecx - x3 shl 16 + y3 ---------------------- +; -------------------- esi - pointer to s-buffer ----------------- +; -------------------- xmm0 - lo -> hi z1, z2, z3 as dword float + +.x1 equ [ebp-2] +.y1 equ [ebp-4] +.x2 equ [ebp-6] +.y2 equ [ebp-8] +.x3 equ [ebp-10] +.y3 equ [ebp-12] + +.dx12 equ dword[ebp-20] +.dx13 equ dword[ebp-24] +.dx23 equ dword[ebp-28] +.dz12 equ dword[ebp-32] +.dz13 equ dword[ebp-36] +.dz23 equ dword[ebp-40] +.zz2 equ [ebp-44] +.zz1 equ [ebp-48] +.z3 equ [ebp-56] +.z2 equ [ebp-60] +.z1 equ [ebp-64] +.s_buff equ [ebp-68] + + push ebp + mov ebp,esp + sub esp,128 + and ebp,0xfffffff0 + .sort2: + cmp ax,bx + jle .sort1 + xchg eax,ebx + shufps xmm0,xmm0,11100001b + .sort1: + cmp bx,cx + jle .sort3 + xchg ebx,ecx + shufps xmm0,xmm0,11011000b + jmp .sort2 + .sort3: + mov .y1,eax ; store triangle coordinates in user friendly variables + mov .y2,ebx + mov .y3,ecx + + ; mov edx,100.11 + ; movd xmm0,edx + ; shufps xmm0,xmm0,11100000b + + movaps .z1,xmm0 + ; mov dword .z1,edx + ; mov .z2,edx + ; mov .z3,edx + mov .s_buff,esi + + mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that + and edx,ebx ; if *all* of them are negative a sign flag is raised + and edx,ecx + and edx,eax + test edx,80008000h ; Check both X&Y at once + jne .loop2_end + + mov bx,.y2 ; calc delta 12 + sub bx,.y1 + jnz .dx12_make + mov .dx12,0 + mov .dz12,0 + jmp .dx12_done + .dx12_make: + mov ax,.x2 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx12,eax + + movss xmm1,.z2 + cvtsi2ss xmm2,ebx + subss xmm1,.z1 + divss xmm1,xmm2 + movss .dz12,xmm1 + ; mov .dz12, dword 0.11 + + .dx12_done: + mov bx,.y3 ; calc delta 13 + sub bx,.y1 + jnz .dx13_make + mov .dx13,0 + mov .dz13,0 + jmp .dx13_done + .dx13_make: + mov ax,.x3 + sub ax,.x1 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx13,eax + + movss xmm1,.z3 + cvtsi2ss xmm2,ebx + subss xmm1,.z1 + divss xmm1,xmm2 + movss .dz13,xmm1 + ; mov .dz13, dword 0.11 + + .dx13_done: + mov bx,.y3 ; calc delta 23 + sub bx,.y2 + jnz .dx23_make + mov .dx23,0 + mov .dz23,0 + jmp .dx23_done + .dx23_make: + mov ax,.x3 + sub ax,.x2 + cwde + movsx ebx,bx + shl eax,ROUND2 + cdq + idiv ebx + mov .dx23,eax + + movss xmm1,.z3 + cvtsi2ss xmm2,ebx + subss xmm1,.z2 + divss xmm1,xmm2 + movss .dz23,xmm1 + + .dx23_done: + mov edx,.z1 + + mov .zz1,edx + mov .zz2,edx + movsx eax,word .x1 + shl eax,ROUND2 ; eax - cur x1 + mov ebx,eax ; ebx - cur x2 + + mov cx,.y1 + cmp cx,.y2 + jge .loop1_end + .loop1: + + pushad + sar ebx,ROUND2 + sar eax,ROUND2 + ; movq xmm0,.zz1 + movlps xmm0,.zz1 + ; mov edx,0.11 + ; movd xmm0,edx + ; shufps xmm0,xmm0,11100000b + mov esi,.s_buff + + call stencil_line + + popad + add eax,.dx13 + add ebx,.dx12 + + movss xmm1,.zz1 + movss xmm2,.zz2 + addss xmm1,.dz13 + addss xmm2,.dz12 + movss .zz1,xmm1 + movss .zz2,xmm2 + + add cx,1 + cmp cx,.y2 + jl .loop1 + + .loop1_end: + + mov edx,.z2 + mov .zz2,edx + movsx ebx,word .x2 + shl ebx,ROUND2 + + mov cx,.y2 + cmp cx,.y3 + jge .loop2_end + .loop2: + pushad + + sar ebx,ROUND2 + sar eax,ROUND2 + movlps xmm0,.zz1 + mov esi,.s_buff + + + call stencil_line + + popad + + add eax,.dx13 + add ebx,.dx23 + + movss xmm1,.zz1 + movss xmm2,.zz2 + addss xmm1,.dz13 + addss xmm2,.dz23 + movss .zz1,xmm1 + movss .zz2,xmm2 + + + add cx,1 + cmp cx,.y3 + jl .loop2 + .loop2_end: + + add esp,128 + pop ebp + +ret + +stencil_line: +;---------------------------------------------------- +;-------------in xmm0 - lo -> hi z1, z2 +;--------------- eax - x1 --------------------------- +;--------------- ebx - x2 --------------------------- +;--------------- ecx - y----------------------------- +;--------------- esi - pointer to s-buffer + + +.dz equ [ebp-4] +.z2 equ [ebp-8] +.z1 equ [ebp-12] +.x2 equ [ebp-16] +.x1 equ [ebp-20] +.s_buf equ [ebp-24] +; cmp eax,ebx +; je @f +; int3 +; @@: + push ebp + mov ebp,esp + sub esp,64 + ; cmp eax,0 + ; jg @f + ; + ; @@: + or cx,cx + jle .l_quit + + cmp cx,[size_y_var] + jge .l_quit + + movzx ecx,cx + mov .s_buf,esi + cmp eax,ebx + je .l_quit + jl .l_ok + + xchg eax,ebx + shufps xmm0,xmm0,11100001b + .l_ok: + + cmp ax,[size_x_var] + jge .l_quit + cmp bx,0 + jle .l_quit + + movlps .z1,xmm0 + mov .x1,eax + mov .x2,ebx + + sub ebx,eax + movss xmm0,.z2 + cvtsi2ss xmm1,ebx + subss xmm0,.z1 + divss xmm0,xmm1 + movss .dz,xmm0 + + movzx edx,word[size_x_var] + cmp eax,1 + jge @f + mov eax,.x1 + neg eax + cvtsi2ss xmm2,eax + mulss xmm2,.dz + addss xmm2,.z1 + movss .z1,xmm2 + mov dword .x1,0 + movzx edx,word[size_x_var] + sub edx,1 + @@: + cmp .x2,edx + jl @f + mov .x2,edx + + @@: + ; mov eax,.x1 + ; cmp .x2,eax + ; je .l_quit + movzx edx,word[size_x_var] + mov esi,.s_buf + mov eax,ecx ; y + mul edx + add eax,.x1 + + shl eax,2 + add esi,eax + + mov ecx,.x2 + sub ecx,.x1 + movss xmm2,.z1 ; cz + .ccalc: + movss xmm1,xmm2 + cmpltss xmm1,dword[esi] + movd eax,xmm1 + cmp eax,-1 + jnz @f + movss dword[esi],xmm2 + @@: + add esi,4 + addss xmm2,.dz + sub ecx,1 + jnz .ccalc + .l_quit: + mov esp,ebp + pop ebp +ret diff --git a/programs/demos/3DS/A_PROCS.INC b/programs/demos/3DS/A_PROCS.INC index fa69da6bed..466c1c9331 100644 --- a/programs/demos/3DS/A_PROCS.INC +++ b/programs/demos/3DS/A_PROCS.INC @@ -79,7 +79,7 @@ end if lea ebx,[eax*3] cmp [dr_flag],12 ; 32 bit col cause - jne @f + jl @f add ebx,eax @@: mov eax,[esi] @@ -87,7 +87,7 @@ end if .skip: add esi,3 cmp [dr_flag],12 - jne @f + jl @f inc esi @@: inc dword .x @@ -106,8 +106,8 @@ end if movzx ecx,word[size_x_var] movzx eax,word[size_y_var] imul ecx,eax - cmp [dr_flag],12 - je @f + cmp [dr_flag],12 + jge @f lea ecx,[ecx*3] shr ecx,2 ; mov ecx,SIZE_X*SIZE_Y*3/4 @@ -169,7 +169,7 @@ if Ext >= SSE2 mov esi,[screen_ptr] mov edi,[Zbuffer_ptr] cmp [dr_flag],12 - je @f + jge @f lea ebx,[ebx*3] jmp .f @@: @@ -182,7 +182,7 @@ if Ext >= SSE2 push eax .emb: cmp [dr_flag],12 - je @f + jge @f movlps xmm1,[esi+3] movhps xmm1,[esi+6] movlps xmm2,[esi-3] @@ -246,7 +246,7 @@ end if mov [edi+4],eax cmp [dr_flag],12 - jne @f + jl @f add esi,2 add ebx,2 add edx,2 @@ -264,7 +264,7 @@ end if mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] cmp [dr_flag],12 - je .e + jge .e @@: movsd dec edi @@ -562,8 +562,6 @@ generate_object2: ; torus pop ebp ret - - generate_object3: ; heart ;locals ; counter dw ? @@ -755,3 +753,5 @@ generate_object3: ; heart ret + + diff --git a/programs/demos/3DS/B_PROCS.INC b/programs/demos/3DS/B_PROCS.INC index e6150107e1..8ed566eea1 100644 --- a/programs/demos/3DS/B_PROCS.INC +++ b/programs/demos/3DS/B_PROCS.INC @@ -751,7 +751,7 @@ if Ext>=SSE2 movzx ecx,word[size_x_var] ;SIZE_X*3/4 cmp [dr_flag],12 - je @f + jge @f lea ecx,[ecx*3+1] shr ecx,2 @@: @@ -764,7 +764,7 @@ if Ext>=SSE2 sub ecx,3 imul ecx,ebx cmp [dr_flag],12 ; 32 bit per pix cause - je @f + jge @f lea ecx,[ecx*3] shr ecx,4 lea ebx,[ebx *3] @@ -782,7 +782,7 @@ if Ext>=SSE2 sub ecx,ebx movups xmm1,[ecx] cmp [dr_flag],12 - je @f + jge @f movups xmm2,[edi-3] movups xmm3,[edi+3] jmp .f @@ -803,7 +803,7 @@ if Ext>=SSE2 xor eax,eax movzx ecx,word[size_x_var] cmp [dr_flag],12 - je @f + jge @f lea ecx,[ecx*3] shr ecx,2 @@: diff --git a/programs/demos/3DS/DATA.INC b/programs/demos/3DS/DATA.INC index 05e00a36c0..145c6cb43c 100644 --- a/programs/demos/3DS/DATA.INC +++ b/programs/demos/3DS/DATA.INC @@ -13,8 +13,6 @@ xobs dw 0 ;SIZE_X / 2 ;200 ;observer = camera yobs dw 0 ;SIZE_Y / 2 ;200 ;coordinates zobs dw -1000 - ; size_x dw SIZE_X - ; size_y dw SIZE_Y re_alloc_flag db 0 angle_counter dw 0 @@ -56,7 +54,6 @@ vertices_index_ptr dd 0 - ; draw_win_at_first db 1 vertex_edit_no dw 0 edit_start_x: dw 0 @@ -76,7 +73,7 @@ db 3 db 'shd. model' if Ext >= SSE3 - db 13 + db 14 else db 12 end if @@ -269,6 +266,7 @@ flags: ; flags description db 'cenv' db 'grdl' db 'rphg' + db 'glas' spd_f: db 'idle' db 'full' @@ -279,15 +277,6 @@ flags: ; flags description onoff_f: db 'off ' db 'on ' -; light_component_f: -; db 'norm ' ; diffuse | -; db 'min' ; specular | or sth. like this -; db 'max ' ; emmisive | - -;; color_component_f: -;; db ' r ' -;; db ' g ' -;; db ' b ' blur_f: ; blur, fire db 'off ' @@ -334,14 +323,6 @@ base_vector: db 'y +' labelyplusend: -;navigation_size = $ - labelvector -; db 'set color ' -; db 'r -' -; db 'g +' -; db 'b -' -; db 'b +' -; db 'g -' -; db 'r +' labelt: db 'DEUS CARITAS EST' @@ -357,13 +338,12 @@ base_vector: if Ext=SSE3 db ' (SSE3)' end if - db ' 0.070',0 + db ' 0.071',0 labellen: STRdata db '-1 ' all_lights_size dw lightsend-lights -if USE_LFN file_info: dd 0 @@ -373,20 +353,7 @@ if USE_LFN fptr dd 0 ;workarea file_name: db '/rd/1/3d/house.3ds',0 - -else - - file_info: - dd 0 - dd 0 - fsize dd 1 - dd workarea - dd hash_table - file_name: - db '/rd/1/teapot.3ds',0 -end if - - ;I_END: + ; db '/tmp0/1/ant.3ds',0 rb 256 @@ -448,14 +415,8 @@ align 16 times 4 dd 0.0 I_END: -if USE_LFN = 0 -hash_table rb 4096 -SourceFile: -workarea rb 180000 -else SourceFile: workarea rb 180 -end if EndFile dd ? align 8 sinbeta dd ?;+32 @@ -478,29 +439,16 @@ align 8 col2 dd ? col3 dd ? scale dd ? ; help scale variable - edges_counter dd ? + ;== triangles_count_var dd ? points_count_var dd ? - ; triangles_ptr dd ? - ; triangles_w_z_ptr dd ? - ; triangles_normals_ptr dd ? - ; points_normals_ptr dd ? - ; points_normals_rot_ptr dd ? - ; points_ptr dd ? - ; points_rotated_ptr dd ? - ; points_translated_ptr dd ? - ; screen_ptr dd ? - ; Zbuffer_ptr dd ? - ; vertices_index_ptr dd ? - ; edit_start_x: - dw ? ; don't change order - ; edit_start_y dw ? - ; edges_ptr dd ? + size_y_var: - dw ? + yres_var dw ? + size_x_var: - dw ? + xres_var dw ? x_start: dw ? y_start: @@ -512,6 +460,8 @@ align 8 point_index2 dd ? ; } don't change order point_index3 dd ? ;-/ temp_col dw ? + temp1 dd ? + temp2 dd ? high dd ? rand_seed dw ? align 8 @@ -531,41 +481,15 @@ align 8 align 16 -if USE_LFN = 0 - points: - rw (EndFile-SourceFile)/12*3 - points_count = ($-points)/6 - triangles: - rw (EndFile-SourceFile)/12*3 - triangles_count = ($-triangles)/6 -align 16 - real_points rd points_count*3 + 1 -align 16 - rotated_points_r rd points_count*3 + 1 -align 16 - points_rotated rw points_count*3 + 2 ;means translated -align 16 - triangles_normals rb triangles_count * 12 ; -align 16 - point_normals rb points_count * 12 ;one 3dvector - triple float dword x,y,z -align 16 - point_normals_rotated rb points_count * 12 -align 16 - triangles_normals_rotated rb triangles_count * 12 -else points_count = 180000/6*3 triangles_count = 180000 / 6 ;($-triangles)/6 -end if align 16 label trizdd dword label trizdq qword triangles_with_z rw triangles_count*4 + 2 ; triangles triple dw + z position align 16 vectors rb 24 -;align 16 -; points_color rb 6*points_count ; each color as word -; sorted_triangles rw triangles_count*3 + 2 align 16 bumpmap rb TEXTURE_SIZE + 1 align 16 @@ -577,7 +501,7 @@ align 16 align 16 texmap rb (TEXTURE_SIZE +1) * 3 align 16 - color_map rb (TEXTURE_SIZE +1) * 3 + color_map rb (TEXTURE_SIZE +100) * 3 align 16 tex_points rb points_count * 4 ; bump_map and texture coords ; each point word x, word y diff --git a/programs/demos/3DS/History.txt b/programs/demos/3DS/History.txt index e662d28904..e3c10375a9 100644 --- a/programs/demos/3DS/History.txt +++ b/programs/demos/3DS/History.txt @@ -1,3 +1,12 @@ + +View3ds 0.070 - VII 2020 + +1. Some keys support by Leency. +2. New displaying model - real Phong - real not fake normal vector interpolation, + normalising it and calculating dot product (one for each light). + It requires SSE3. (by me, Maciej Guba) +----------------------------------------------------------------------------------- + View3ds 0.069 - May 2020 1. KPacked files support by Leency. 2. 32bit vertices indexes and ability to load whole RAM limited objects. @@ -5,7 +14,6 @@ View3ds 0.069 - May 2020 3. I switch off painters algotithm mode (depth sorting). In app impelementetion it has limited vertices count and produce less quality image than Z buffer Catmull algo. In addition this switch off reduces app size, (by me). - ----------------------------------------------------------------------------------- View3ds 0.068 - XI 2016 diff --git a/programs/demos/3DS/README.TXT b/programs/demos/3DS/README.TXT index 1a51874f12..6dc8273faf 100644 --- a/programs/demos/3DS/README.TXT +++ b/programs/demos/3DS/README.TXT @@ -1,11 +1,12 @@ -View3ds 0.070 - tiny viewer to .3ds and .asc files with several graphics +View3ds 0.071 - tiny viewer to .3ds and .asc files with several graphics effects implementation. What's new? -1. Some keys support by Leency. -2. New displaying model - real Phong - real not fake normal vector interpolation, normalising it and calculating - dot product (one for each light). It requires SSE3. (by me) +1. New displaying model - glass - it's two pass rendering. First pass calculates + Z position of all front pixels, second render image with adding reflective + component of light only for front pixels. Transparent effect by adding with saturation. +2. I removed bug with performing generation object after choosing 'emboss' option. Buttons description: 1. rotary: choosing rotary axle: x, y, x+y. @@ -39,4 +40,4 @@ Buttons description: is released apply current position. You may also decrease whole handlers count by enable culling (using appropriate button) - some back handlers become hidden. - Maciej Guba VII 2020 + Maciej Guba VIII 2020 diff --git a/programs/demos/3DS/VIEW3DS.ASM b/programs/demos/3DS/VIEW3DS.ASM index ee27c5bb1f..05fd85e0cd 100644 --- a/programs/demos/3DS/VIEW3DS.ASM +++ b/programs/demos/3DS/VIEW3DS.ASM @@ -1,5 +1,5 @@ -; application : View3ds ver. 0.070 - tiny .3ds and .asc files viewer +; application : View3ds ver. 0.071 - tiny .3ds and .asc files viewer ; with a few graphics effects demonstration. ; compiler : FASM ; system : KolibriOS @@ -16,7 +16,7 @@ ; I tried optimizing it a bit, but don't know if it was successful. The objects ; can be: ; 1) Read from a file (*.3DS standard) -; 2) Written in manually (at the end of the code) +; 2) Written in manually (at the end of the code) ; now not exist SIZE_X equ 512 @@ -37,10 +37,10 @@ MMX = 1 SSE = 2 SSE2 = 3 SSE3 = 4 -Ext = SSE2 ;Ext={ NON | MMX | SSE | SSE2 } +Ext = SSE3 ;Ext={ NON | MMX | SSE | SSE2 | SSE3 } ; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) -USE_LFN = 1 +USE_LFN = 1 ; App is Kolibri only now. use32 org 0x0 @@ -63,6 +63,7 @@ START: ; start of execution shr ax,2 movzx ebx,ax + movzx ebx,ax lea ebx,[ebx*3] push ebx fninit @@ -75,8 +76,8 @@ START: ; start of execution shr ax,1 mov [vect_x],ax - mov eax, 20 shl 16 + 20 - mov [x_start],eax + ; mov eax, 20 shl 16 + 20 + mov [x_start],dword 20 shl 16 + 20 ;eax call alloc_buffer_mem @@ -131,6 +132,8 @@ START: ; start of execution ;mov eax,40 ; set events mask ;mov ebx,1100000000000000000000000100111b ;int 0x40 + + still: cmp [edit_flag],1 jne @f @@ -316,7 +319,8 @@ still: cmp ah,15 jne @f cmp [emboss_flag],1 - call init_envmap2 + ; call init_envmap2 + call do_emboss @@: ; cmp ah,17 ; jne .next_m @@ -332,8 +336,8 @@ still: jne .next_m2 mov [re_alloc_flag],1 ; reallocate memory - mov [triangles_count_var],1000 - mov [points_count_var],1000 + mov [triangles_count_var],20000 + mov [points_count_var],20000 call alloc_mem_for_tp mov [re_alloc_flag],0 @@ -347,7 +351,7 @@ still: @@: cmp bl,4 jg @f - movzx ax,bl ; ax < - object number + movzx eax,bl ; eax < - object number call generate_object2 jmp .calc_norm @@: @@ -610,7 +614,8 @@ still: movzx ecx,word[size_y_var] movzx eax,word[size_x_var] mul ecx - lea ecx,[eax*3] + lea ecx,[eax*4] + if (Ext = MMX)|(Ext = SSE) mov bh,bl push bx @@ -668,7 +673,7 @@ end if movzx eax,word[size_x_var] movzx ecx,word[size_y_var] mul ecx - lea ecx,[eax*3] + lea ecx,[eax*4] if (Ext = MMX)|(Ext = SSE) mov bh,bl push bx @@ -736,7 +741,7 @@ end if ; mov ecx,SIZE_X shl 16 + SIZE_Y mov edx,[offset_y] ;5 shl 16 + 25 cmp [dr_flag],12 - je .ff + jge .ff int 0x40 jmp .f .ff: @@ -775,15 +780,11 @@ end if ;-------------------------------------------------------------------------------- ;-------------------------PROCEDURES--------------------------------------------- ;-------------------------------------------------------------------------------- -;include "TEX3.INC" include "FLAT_CAT.INC" include "TEX_CAT.INC" include "BUMP_CAT.INC" include "3DMATH.INC" include "GRD_LINE.INC" -;include "GRD3.INC" -;include "FLAT3.INC" -;include "BUMP3.INC" include "B_PROCS.INC" include "A_PROCS.INC" include "GRD_CAT.INC" @@ -793,6 +794,8 @@ include "TWO_TEX.INC" include "ASC.INC" if Ext >= SSE3 include "3R_PHG.INC" +include '3STENCIL.INC' +include '3GLASS.INC' end if clear_vertices_index: mov edi,[vertices_index_ptr] @@ -827,7 +830,7 @@ edit: ; mmx required, edit mesh by vertex lea ebx,[ebx*3] cmp [dr_flag],12 - jne @f + jl @f add ebx,[esp] @@: add esp,4 @@ -946,9 +949,13 @@ edit: ; mmx required, edit mesh by vertex shl edi,2 add edi,[points_ptr] lea esi,[.points_rotated] - mov ecx,3 cld - rep movsd + movsd + movsd + movsd + ; mov ecx,3 + ; cld + ; rep movsd mov dword[edit_end_x],0 @@ -982,7 +989,6 @@ alloc_buffer_mem: mov ebx,20 mov edx,[screen_ptr] int 0x40 - ; and eax,0xfffffff0 mov [screen_ptr],eax mov ecx,[.temp] @@ -992,7 +998,6 @@ alloc_buffer_mem: mov ebx,20 mov edx,[Zbuffer_ptr] int 0x40 - ; and eax,0xfffffff0 mov [Zbuffer_ptr],eax @@ -1003,7 +1008,6 @@ alloc_buffer_mem: mov ebx,20 mov edx,[vertices_index_ptr] int 0x40 - ; and eax,0xfffffff0 mov [vertices_index_ptr],eax mov esp,ebp @@ -1417,7 +1421,78 @@ do_color_buffer: ; do color buffer for Gouraud, flat shading mov esp,ebp pop ebp ret +if Ext >= SSE3 +init_point_normals: +.z equ dword [ebp-8] +.y equ dword [ebp-12] +.x equ [ebp-16] +.point_number equ dword [ebp-28] +.hit_faces equ dword [ebp-32] + fninit + push ebp + mov ebp,esp + sub esp,64 + and ebp,-16 + mov edi,[points_normals_ptr] + mov .point_number,0 + .ipn_loop: + movd xmm0,.point_number + pshufd xmm0,xmm0,0 + mov .hit_faces,0 + mov .x,dword 0 + mov .y,0 + mov .z,0 + mov esi,[triangles_ptr] + xor ecx,ecx ; ecx - triangle number + .ipn_check_face: + movdqu xmm1,[esi] + pcmpeqd xmm1,xmm0 + pmovmskb eax,xmm1 + and eax,0xfff + or eax,eax + jz .ipn_next_face + push esi + mov esi,ecx + lea esi,[esi*3] + shl esi,2 + add esi,[triangles_normals_ptr] + movups xmm7,[esi] + addps xmm7,.x + movaps .x,xmm7 + pop esi + inc .hit_faces + + .ipn_next_face: + add esi,12 + inc ecx + cmp ecx,[triangles_count_var] + jne .ipn_check_face + cvtsi2ss xmm6,.hit_faces + movaps xmm7,.x + rcpss xmm6,xmm6 + shufps xmm6,xmm6,11000000b + mulps xmm7,xmm6 + movaps xmm6,xmm7 + mulps xmm6,xmm6 + andps xmm6,[zero_hgst_dd] + haddps xmm6,xmm6 + haddps xmm6,xmm6 + rsqrtps xmm6,xmm6 + mulps xmm7,xmm6 + movlps [edi],xmm7 + movhlps xmm7,xmm7 + movss [edi+8],xmm7 + add edi,12 + inc .point_number + mov edx,.point_number + cmp edx,[points_count_var] + jne .ipn_loop + + add esp,64 + pop ebp +ret +else init_point_normals: .x equ dword [ebp-4] .y equ dword [ebp-8] @@ -1491,7 +1566,7 @@ init_point_normals: mov esp,ebp ret ;=============================================================== - +end if init_triangles_normals2: mov ebx,[triangles_normals_ptr] mov ebp,[triangles_ptr] @@ -1626,12 +1701,69 @@ draw_triangles: movd mm1,dword[eax] paddw mm1,mm0 movd dword[eax],mm1 - @@: + @@: + if Ext >= SSE3 + cmp [dr_flag],13 + jne .no_stencil + mov esi,[triangles_ptr] + mov ecx,[triangles_count_var] + @@: + push esi + push ecx + + mov eax,[esi] + mov ebx,[esi+4] + mov ecx,[esi+8] + imul eax,[i12] + imul ebx,[i12] + imul ecx,[i12] + add eax,[points_rotated_ptr] + add ebx,[points_rotated_ptr] + add ecx,[points_rotated_ptr] + push dword[ecx+8] + push dword[ebx+8] + push dword[eax+8] + movups xmm0,[esp] + add esp,12 + andps xmm0,[zero_hgst_dd] + + + mov eax,[esi] + mov ebx,[esi+4] + mov ecx,[esi+8] + shl eax,1 + shl ebx,1 + shl ecx,1 + lea eax,[eax*3] + lea ebx,[ebx*3] + lea ecx,[ecx*3] + add eax,[points_translated_ptr] + add ebx,[points_translated_ptr] + add ecx,[points_translated_ptr] + mov eax,[eax] + mov ebx,[ebx] + mov ecx,[ecx] + ror eax,16 + ror ebx,16 + ror ecx,16 + + + mov esi,[Zbuffer_ptr] + + call stencil_tri + + pop ecx + pop esi + add esi,12 + dec ecx + jnz @b + + .no_stencil: + end if mov esi,[triangles_ptr] - mov [edges_counter],0 mov ecx,[triangles_count_var] .again_dts: push ecx @@ -1744,6 +1876,7 @@ draw_triangles: .no_edit: end if + push esi ; fninit ; DO culling AT FIRST cmp [culling_flag],1 ; (if culling_flag = 1) @@ -1790,6 +1923,8 @@ end if if Ext >= SSE3 cmp [dr_flag],12 je .r_phg + cmp [dr_flag],13 + je .glass end if ; **************** mov esi,point_index3 ; do Gouraud shading mov ecx,3 @@ -1802,15 +1937,16 @@ end if fld dword[eax] ; x cooficient of normal vector fimul [correct_tex] fiadd [correct_tex] - fistp word[esp-2] + fistp [temp1] ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] ; y cooficient fimul [correct_tex] fiadd [correct_tex] - fistp word[esp-4] + fistp [temp2] - movzx eax,word[esp-4] - movzx ebx,word[esp-2] + mov eax,[temp2] + mov ebx,[temp1] + and ebx,0xfffffff shl eax,TEX_SHIFT add eax,ebx lea eax,[eax*3+color_map] @@ -1873,8 +2009,8 @@ end if ; push [temp_col] ; push [temp_col] .rainbow: - cmp [catmull_flag],1 ; put on stack z coordinate if necessary - jne @f + ; cmp [catmull_flag],1 ; put on stack z coordinate if necessary + ; jne @f push [zz3] @@: mov eax,dword[yy3] @@ -1883,19 +2019,15 @@ end if push eax neg al push ax - ; cmp [catmull_flag],1 - ; jne @f push [zz2] - ; @@: + mov eax,dword[yy2] and eax,ebx push eax neg al push ax - ; cmp [catmull_flag],1 - ; jne @f push [zz1] - ; @@: + mov eax,dword[yy1] and eax,ebx push eax @@ -1909,17 +2041,12 @@ end if mov ecx,dword[xx3] ror ecx,16 mov edi,[screen_ptr] - ; cmp [catmull_flag],0 - ; je @f mov esi,[Zbuffer_ptr] call gouraud_triangle_z - ; jmp .end_draw - ; @@: - ; call gouraud_triangle jmp .end_draw .flat_draw: ;************************** - ; FLAT DRAWING + fninit ; FLAT DRAWING mov eax,[point_index1] mov ebx,[point_index2] mov ecx,[point_index3] @@ -1938,17 +2065,20 @@ end if fidiv [i3] fimul [correct_tex] fiadd [correct_tex] - fistp dword[esp-4] ; x temp variables + fistp [temp1] ;dword[esp-4] ; x temp variables fld dword[eax+4] ; y cooficient of normal vector fadd dword[ebx+4] fadd dword[ecx+4] fidiv [i3] fimul [correct_tex] fiadd [correct_tex] - fistp dword[esp-8] ; y - mov edx,dword[esp-8] + fistp [temp2] ;dword[esp-8] ; y + mov edx,[temp2] ;dword[esp-8] + and edx,0xfffffff + and [temp1],0xfffffff shl edx,TEX_SHIFT - add edx,dword[esp-4] + add edx,[temp1] ;dword[esp-4] + lea eax,[3*edx] add eax,color_map mov edx,dword[eax] @@ -1976,25 +2106,19 @@ end if mov ecx,dword[xx3] ror ecx,16 mov edi,[screen_ptr] - ; cmp [catmull_flag],0 - ; je @f + mov esi,[Zbuffer_ptr] push word[zz3] push word[zz2] push word[zz1] call flat_triangle_z jmp .end_draw - ; @@: - ; call draw_triangle - ; jmp .end_draw + .env_mapping: - ; fninit - ; cmp [catmull_flag],0 - ; je @f push [zz3] push [zz2] push [zz1] - ; @@: + mov esi,point_index1 sub esp,12 mov edi,esp @@ -2004,19 +2128,6 @@ end if lea eax,[eax*3] shl eax,2 add eax,[points_normals_rot_ptr] ;point_normals_rotated -; # -; fld dword[eax] -; fmul dword[eax+4] -; fld1 -; fld1 -; faddp -; fmulp -; fimul [correct_tex] -; fiadd [correct_tex] -; fistp word[edi] -; mov word[edi+2],0 -;; fistp word[edi+2] -; # last change ; texture x=(rotated point normal -> x * 255)+255 fld dword[eax] fimul [correct_tex] @@ -2027,7 +2138,7 @@ end if fimul [correct_tex] fiadd [correct_tex] fistp word[edi+2] -; # end of last ch. + add edi,4 add esi,4 loop @b @@ -2040,24 +2151,17 @@ end if ror ecx,16 mov edi,[screen_ptr] mov esi,envmap - ; cmp [catmull_flag],0 - ; je @f mov edx,[Zbuffer_ptr] call tex_triangle_z - ; jmp .end_draw - ; @@: - ; call tex_triangle + jmp .end_draw ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .cubic_env_mapping: - ; fninit - ; cmp [catmull_flag],0 - ; je @f push [zz3] push [zz2] push [zz1] - ; @@: + mov esi,point_index1 sub esp,12 mov edi,esp @@ -2066,8 +2170,8 @@ end if mov eax,dword[esi] lea eax,[eax*3] shl eax,2 - add eax,[points_normals_rot_ptr] ;point_normals_rotated -; # + add eax,[points_normals_rot_ptr] + fld dword[eax] fmul dword[eax+4] fld1 @@ -2103,27 +2207,21 @@ end if ror ecx,16 mov edi,[screen_ptr] mov esi,envmap_cub - ; cmp [catmull_flag],0 - ; je @f mov edx,[Zbuffer_ptr] + call tex_triangle_z - ; jmp .end_draw - ; @@: - ; call tex_triangle + jmp .end_draw ;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .bump_mapping: - ; fninit - ; cmp [catmull_flag],0 - ; je @f push [Zbuffer_ptr] push [zz3] push [zz2] push [zz1] -; @@: + mov esi,point_index1 sub esp,12 mov edi,esp @@ -2177,19 +2275,12 @@ end if mov esi,envmap mov edx,bumpmap ;BUMP_MAPPING - ; cmp [catmull_flag],0 - ; je @f call bump_triangle_z - ; jmp .end_draw - ; @@: - ; call bump_triangle + jmp .end_draw .tex_mapping: - ; fninit - ; cmp [catmull_flag],0 - ; je @f push [zz3] push [zz2] push [zz1] @@ -2215,13 +2306,10 @@ end if ror ecx,16 mov edi,[screen_ptr] mov esi,texmap - ; cmp [catmull_flag],0 - ; je @f mov edx,[Zbuffer_ptr] + call tex_triangle_z -; jmp .end_draw -; @@: -; call tex_triangle + jmp .end_draw ; .ray: ; grd_triangle according to points index @@ -2283,7 +2371,7 @@ end if lea edx,[ecx*3] push word[edx*2+xx1-2] ; zz1 ,2 ,3 - + fninit mov eax,dword[esi] shl eax,2 lea eax,[eax*3] ;+point_normals_rotated] @@ -2292,18 +2380,20 @@ end if fld dword[eax] ; x cooficient of normal vector fimul [correct_tex] fiadd [correct_tex] - fistp word[ebp-2] + fistp [temp1] ;word[ebp-2] ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] ; y cooficient fimul [correct_tex] fiadd [correct_tex] - fistp word[ebp-4] + fistp [temp2] ;word[ebp-4] - movzx eax,word[ebp-4] - movzx ebx,word[ebp-2] + mov eax,[temp2] ;word[ebp-4] + mov ebx,[temp1] ;word[ebp-2] + and ebx,0xfffffff ; some onjects need thid 'and' shl eax,TEX_SHIFT add eax,ebx - lea eax,[eax*3+color_map] + lea eax,[eax*3] + add eax,color_map mov eax,dword[eax] ror eax,16 ; eax -0xxxrrggbb -> 0xggbbxxrr @@ -2370,12 +2460,13 @@ end if fimul [correct_tex] fiadd [correct_tex] fistp word[edi] + ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] fimul [correct_tex] fiadd [correct_tex] fistp word[edi+2] - + and word[edi+2],0x7fff ; some objects need it add edi,4 add esi,4 loop @b @@ -2488,15 +2579,16 @@ end if fld dword[eax] ; x cooficient of normal vector fimul [correct_tex] fiadd [correct_tex] - fistp word[esp-2] + fistp [temp1] ;word[esp-2] ; texture y=(rotated point normal -> y * 255)+255 fld dword[eax+4] ; y cooficient fimul [correct_tex] fiadd [correct_tex] - fistp word[esp-4] + fistp [temp2] ;word[esp-4] - movzx eax,word[esp-4] - movzx ebx,word[esp-2] + mov eax,[temp2] ;word[esp-4] + mov ebx,[temp1] ;word[esp-2] + and ebx,0xfffffff shl eax,TEX_SHIFT add eax,ebx lea eax,[eax*3+color_map] @@ -2509,11 +2601,6 @@ end if dec ecx jnz .again_line_param -; mov eax,[edges_ptr] ; this not works correctly -; add eax,[edges_counter] ; I mean chosing overlapped edges. -; mov bl,[eax] ; -; test bl,00000001b ; -; jz @f ; mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] @@ -2543,11 +2630,6 @@ end if call smooth_line @@: -; mov eax,[edges_ptr] ; this not works correctly -; add eax,[edges_counter] -; mov bl,[eax] -; test bl,00000010b -; jz @f mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] @@ -2579,11 +2661,6 @@ end if call smooth_line @@: -; mov eax,[edges_ptr] ; this not works correctly -; add eax,[edges_counter] ; -; mov bl,[eax] ; -; test bl,00000100b ; -; jz @f ; mov edi,[screen_ptr] mov esi,[Zbuffer_ptr] @@ -2615,7 +2692,8 @@ end if call smooth_line jmp .end_draw @@: - if Ext >= SSE3 + +if Ext >= SSE3 .r_phg: @@ -2671,11 +2749,62 @@ end if call real_phong_tri_z jmp .end_draw - end if + + .glass: + + movd xmm5,[size_y_var] + punpcklwd xmm5,[the_zero] + pshufd xmm5,xmm5,01110011b + + + mov eax,[point_index1] + mov ebx,[point_index2] + mov ecx,[point_index3] + imul eax,[i12] + imul ebx,[i12] + imul ecx,[i12] + add eax,[points_normals_rot_ptr] + add ebx,[points_normals_rot_ptr] + add ecx,[points_normals_rot_ptr] + movups xmm0,[eax] + movups xmm1,[ebx] + movups xmm2,[ecx] + andps xmm0,[zero_hgst_dd] + andps xmm1,[zero_hgst_dd] + andps xmm2,[zero_hgst_dd] + xorps xmm3,xmm3 + + mov eax,[point_index1] + mov ebx,[point_index2] + mov ecx,[point_index3] + imul eax,[i12] + imul ebx,[i12] + imul ecx,[i12] + add eax,[points_rotated_ptr] + add ebx,[points_rotated_ptr] + add ecx,[points_rotated_ptr] + push dword[ecx+8] + push dword[ebx+8] + push dword[eax+8] + movups xmm4,[esp] + add esp,12 + andps xmm4,[zero_hgst_dd] + mov eax,dword[xx1] + ror eax,16 + mov ebx,dword[xx2] + ror ebx,16 + mov ecx,dword[xx3] + ror ecx,16 + mov edi,[screen_ptr] + mov esi,[Zbuffer_ptr] + call glass_tri + + jmp .end_draw +end if @@ -2683,10 +2812,10 @@ end if .end_draw: pop esi add esi,12 - inc [edges_counter] + pop ecx sub ecx,1 - ; cmp dword[esi],-1 + jnz .again_dts ret @@ -2716,7 +2845,7 @@ draw_handlers: movzx eax,word[size_x_var] cmp [dr_flag],12 - je @f + jge @f lea ebx,[eax*3] sub ebx,18 add eax,eax @@ -2774,7 +2903,7 @@ draw_handlers: push eax lea edi,[eax*3] cmp [dr_flag],12 - jne @f + jl @f add edi,[esp] @@: add esp,4 @@ -2799,7 +2928,7 @@ draw_handlers: mov word[eax],dx add eax,2 cmp [dr_flag],12 - jne @f + jl @f add edi,4 loop .do jmp .ad @@ -3073,7 +3202,6 @@ read_from_file: mov dword[edi],-1 ret -if USE_LFN alloc_mem_for_tp: mov eax, 68 cmp [re_alloc_flag],1 @@ -3092,28 +3220,6 @@ alloc_mem_for_tp: int 0x40 ; -> allocate memory to triangles mov [triangles_ptr], eax ; -> eax = pointer to allocated mem -; mov eax, 68 -; movzx ecx, [triangles_count_var] -; inc ecx -; mov edx,[edges_ptr] -; int 0x40 ; -> allocate memory to edges -; mov [edges_ptr], eax ; -> eax = pointer to allocated mem - -; mov eax,-1 ; fill edges list -; movzx ecx,[triangles_count_var] ; importand if object generated -; shr ecx,2 -; inc ecx -; mov edi,[edges_ptr] -; cld -; rep stosd - - -; mov eax, 68 -; mov ebx, 12 -; movzx ecx, [triangles_count_var] -; shl ecx, 4 -; int 0x40 -; mov [triangles_w_z_ptr], eax ; for trainagles_with_z list ; ststic memory mov eax, 68 @@ -3124,12 +3230,6 @@ alloc_mem_for_tp: int 0x40 ; -> allocate memory for triangles normals mov [triangles_normals_ptr], eax ; -> eax = pointer to allocated mem - ; mov eax, 68 - ; movzx ecx,[points_count_var] - ; lea ecx,[2+ecx*2] - ; mov edx,dword [vertices_index_ptr] - ; int 0x40 - ; mov dword[vertices_index_ptr], eax mov eax, 68 mov ecx, [points_count_var] @@ -3138,10 +3238,6 @@ alloc_mem_for_tp: mov edx,[points_normals_ptr] int 0x40 mov [points_normals_ptr], eax - ; int3 - - - ; int3 mov eax, 68 @@ -3171,7 +3267,7 @@ alloc_mem_for_tp: int 0x40 mov [points_translated_ptr], eax ret -end if + read_from_disk: