319 lines
6.3 KiB
PHP
319 lines
6.3 KiB
PHP
|
;// fast life generator: ~2.8 pixel*generation/tact
|
||
|
|
||
|
macro live_shl x,do_shl
|
||
|
{
|
||
|
if do_shl eq yes
|
||
|
psllq x,1
|
||
|
end if
|
||
|
}
|
||
|
|
||
|
macro live_shr x,do_shr
|
||
|
{
|
||
|
if do_shr eq yes
|
||
|
psrlq x,1
|
||
|
end if
|
||
|
}
|
||
|
|
||
|
macro live_mov x,reg,how
|
||
|
{
|
||
|
if how eq low
|
||
|
xorps x,x
|
||
|
movlps x,[reg+24]
|
||
|
else if how eq high
|
||
|
xorps x,x
|
||
|
movhps x,[reg+edx]
|
||
|
else
|
||
|
movaps x,[reg+ecx]
|
||
|
end if
|
||
|
}
|
||
|
|
||
|
macro live_load x,y,z,t,shl_edi,shr_esi,how
|
||
|
{
|
||
|
live_mov y,edi,how
|
||
|
live_mov x,ebx,how
|
||
|
live_shl y,shl_edi
|
||
|
movaps t,y
|
||
|
xorps y,x
|
||
|
live_mov z,esi,how
|
||
|
andps x,t
|
||
|
live_shr z,shr_esi
|
||
|
movaps t,y
|
||
|
xorps y,z
|
||
|
andps t,z
|
||
|
orps x,t
|
||
|
}
|
||
|
|
||
|
macro live_operation a,A,b,B,c,C,d,D
|
||
|
{
|
||
|
movaps D,A
|
||
|
xorps A,B
|
||
|
andps D,B
|
||
|
movaps d,a
|
||
|
xorps a,D
|
||
|
andps d,D
|
||
|
movaps D,a
|
||
|
xorps a,b
|
||
|
andps D,b
|
||
|
orps d,D
|
||
|
movaps D,a
|
||
|
xorps a,c
|
||
|
andps D,c
|
||
|
xorps d,D
|
||
|
xorps a,d
|
||
|
movaps D,A
|
||
|
orps D,C
|
||
|
xorps A,C
|
||
|
xorps d,D
|
||
|
orps A,[ebx+ecx+16]
|
||
|
andps a,d
|
||
|
andps a,A
|
||
|
movaps [ebp+ecx],a
|
||
|
}
|
||
|
|
||
|
macro live_cycle shl_edi,shr_esi
|
||
|
{
|
||
|
local cycle
|
||
|
local cycle_entry
|
||
|
mov ecx,edx
|
||
|
live_load xmm2,xmm3,xmm4,xmm5,shl_edi,shr_esi,low
|
||
|
live_load xmm4,xmm5,xmm6,xmm7,shl_edi,shr_esi
|
||
|
sub ecx,eax
|
||
|
jmp cycle_entry
|
||
|
cycle:
|
||
|
live_load xmm4,xmm5,xmm6,xmm7,shl_edi,shr_esi
|
||
|
live_operation xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7
|
||
|
sub ecx,eax
|
||
|
cycle_entry:
|
||
|
live_load xmm6,xmm7,xmm0,xmm1,shl_edi,shr_esi
|
||
|
live_operation xmm2,xmm3,xmm4,xmm5,xmm6,xmm7,xmm0,xmm1
|
||
|
sub ecx,eax
|
||
|
live_load xmm0,xmm1,xmm2,xmm3,shl_edi,shr_esi
|
||
|
live_operation xmm4,xmm5,xmm6,xmm7,xmm0,xmm1,xmm2,xmm3
|
||
|
sub ecx,eax
|
||
|
live_load xmm2,xmm3,xmm4,xmm5,shl_edi,shr_esi
|
||
|
live_operation xmm6,xmm7,xmm0,xmm1,xmm2,xmm3,xmm4,xmm5
|
||
|
sub ecx,eax
|
||
|
jg cycle
|
||
|
live_load xmm4,xmm5,xmm6,xmm7,shl_edi,shr_esi,high
|
||
|
live_operation xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7
|
||
|
}
|
||
|
|
||
|
OneGeneration_Flag12:
|
||
|
push edi
|
||
|
lea esi,[eax+1]
|
||
|
bt dword [esp+48],1
|
||
|
jnc OneGeneration_flag2_end
|
||
|
bt dword [esp+48],3
|
||
|
jc OneGeneration_flag2_end
|
||
|
mov edi,[esp+36]
|
||
|
shl edi,4
|
||
|
cmp edi,edx
|
||
|
jb OneGeneration_flag2_uphalf
|
||
|
sub edi,edx
|
||
|
cmp edi,edx
|
||
|
jnb OneGeneration_flag2_end
|
||
|
add edi,8
|
||
|
OneGeneration_flag2_uphalf:
|
||
|
mov ecx,esi
|
||
|
lea edi,[edi+ebx+16]
|
||
|
pxor mm0,mm0
|
||
|
OneGeneration_flag2_cycle:
|
||
|
movq [edi],mm0
|
||
|
add edi,edx
|
||
|
loop OneGeneration_flag2_cycle
|
||
|
OneGeneration_flag2_end:
|
||
|
bt dword [esp+48],0
|
||
|
jnc OneGeneration_flag1_end
|
||
|
bt dword [esp+48],2
|
||
|
jc OneGeneration_flag1_end
|
||
|
push edx
|
||
|
mov eax,[esp+36]
|
||
|
xor edx,edx
|
||
|
div esi
|
||
|
mov esi,edx
|
||
|
pop edx
|
||
|
cmp eax,64
|
||
|
jnb OneGeneration_flag1_end
|
||
|
imul esi,edx
|
||
|
lea esi,[esi+ebx+16]
|
||
|
btr eax,5
|
||
|
jnc OneGeneration_flag1_noadd4
|
||
|
add esi,4
|
||
|
OneGeneration_flag1_noadd4:
|
||
|
lea ecx,[edx-8]
|
||
|
mov edi,8
|
||
|
OneGeneration_flag1_cycle:
|
||
|
btr dword [esi+ecx],eax
|
||
|
sub ecx,edi
|
||
|
btr dword [esi+ecx],eax
|
||
|
sub ecx,edi
|
||
|
btr dword [esi+ecx],eax
|
||
|
sub ecx,edi
|
||
|
btr dword [esi+ecx],eax
|
||
|
sub ecx,edi
|
||
|
jnl OneGeneration_flag1_cycle
|
||
|
OneGeneration_flag1_end:
|
||
|
pop edi
|
||
|
ret
|
||
|
|
||
|
@OneGeneration$qqsiipvpxvi:
|
||
|
push ebp
|
||
|
push ebx
|
||
|
push esi
|
||
|
push edi
|
||
|
mov eax,[esp+20]
|
||
|
mov edx,[esp+24]
|
||
|
mov ebp,[esp+28]
|
||
|
mov ebx,[esp+32]
|
||
|
dec eax
|
||
|
jl OneGeneration_end
|
||
|
add edx,7
|
||
|
add ebp,15
|
||
|
dec ebx
|
||
|
shr eax,6
|
||
|
shl edx,3
|
||
|
and ebp,not 15
|
||
|
and ebx,not 15
|
||
|
and edx,not 63
|
||
|
jng OneGeneration_end
|
||
|
test eax,eax
|
||
|
jz OneGeneration_single
|
||
|
mov edi,edx
|
||
|
imul edi,eax
|
||
|
jo OneGeneration_end
|
||
|
push eax
|
||
|
add edi,ebx
|
||
|
call OneGeneration_Flag12
|
||
|
lea esi,[ebx+edx]
|
||
|
push dword [esp]
|
||
|
mov eax,16
|
||
|
live_cycle yes,no
|
||
|
jmp OneGeneration_cycle_fin
|
||
|
OneGeneration_cycle:
|
||
|
mov edi,ebx
|
||
|
mov ebx,esi
|
||
|
add ebp,edx
|
||
|
add esi,edx
|
||
|
live_cycle no,no
|
||
|
OneGeneration_cycle_fin:
|
||
|
dec dword [esp]
|
||
|
jg OneGeneration_cycle
|
||
|
mov edi,ebx
|
||
|
pop ecx
|
||
|
mov ebx,esi
|
||
|
mov esi,edx
|
||
|
add ebp,edx
|
||
|
imul esi,[esp]
|
||
|
neg esi
|
||
|
add esi,ebx
|
||
|
live_cycle no,yes
|
||
|
jmp OneGeneration_flag48
|
||
|
OneGeneration_single:
|
||
|
push eax
|
||
|
mov edi,ebx
|
||
|
call OneGeneration_Flag12
|
||
|
mov esi,ebx
|
||
|
mov eax,16
|
||
|
live_cycle yes,yes
|
||
|
OneGeneration_flag48:
|
||
|
pop ebp
|
||
|
inc ebp
|
||
|
bt dword [esp+36],3
|
||
|
jnc OneGeneration_flag8_end
|
||
|
mov edi,[esp+24]
|
||
|
mov ebx,[esp+28]
|
||
|
dec edi
|
||
|
add ebx,15
|
||
|
shl edi,4
|
||
|
lea esi,[edi-16]
|
||
|
and ebx,not 15
|
||
|
cmp edi,edx
|
||
|
jb OneGeneration_flag8_uphalf
|
||
|
sub edi,edx
|
||
|
add edi,8
|
||
|
cmp esi,edx
|
||
|
jb OneGeneration_flag8_uphalf
|
||
|
sub esi,edx
|
||
|
add esi,8
|
||
|
OneGeneration_flag8_uphalf:
|
||
|
mov ecx,ebp
|
||
|
OneGeneration_flag8_cycle:
|
||
|
movq mm0,[ebx+esi]
|
||
|
movq [ebx],mm0
|
||
|
movq mm0,[ebx+16]
|
||
|
movq [ebx+edi],mm0
|
||
|
add ebx,edx
|
||
|
loop OneGeneration_flag8_cycle
|
||
|
OneGeneration_flag8_end:
|
||
|
bt dword [esp+36],2
|
||
|
jnc OneGeneration_flag4_end
|
||
|
mov eax,[esp+20]
|
||
|
push edx
|
||
|
dec eax
|
||
|
xor edx,edx
|
||
|
mov ebx,[esp+32]
|
||
|
div ebp
|
||
|
add ebx,15
|
||
|
mov esi,eax
|
||
|
mov edi,edx
|
||
|
and ebx,not 15
|
||
|
dec edx
|
||
|
jl OneGeneration_flag4_dec0
|
||
|
mov ebp,edx
|
||
|
jmp OneGeneration_flag4_after_dec
|
||
|
OneGeneration_flag4_dec0:
|
||
|
dec ebp
|
||
|
dec eax
|
||
|
OneGeneration_flag4_after_dec:
|
||
|
pop edx
|
||
|
imul edi,edx
|
||
|
imul ebp,edx
|
||
|
add edi,ebx
|
||
|
add ebp,ebx
|
||
|
btr esi,5
|
||
|
jnc OneGeneration_flag4_noadd4f
|
||
|
add edi,4
|
||
|
OneGeneration_flag4_noadd4f:
|
||
|
btr eax,5
|
||
|
jnc OneGeneration_flag4_noadd4s
|
||
|
add ebp,4
|
||
|
OneGeneration_flag4_noadd4s:
|
||
|
mov ecx,edx
|
||
|
jmp OneGeneration_flag4_cycle0_entry
|
||
|
OneGeneration_flag4_cycle0:
|
||
|
btr dword [ebx+ecx],0
|
||
|
OneGeneration_flag4_cycle0_entry:
|
||
|
sub ecx,8
|
||
|
jl OneGeneration_flag4_cycle0_end
|
||
|
bt dword [ebp+ecx],eax
|
||
|
jnc OneGeneration_flag4_cycle0
|
||
|
bts dword [ebx+ecx],0
|
||
|
jmp OneGeneration_flag4_cycle0_entry
|
||
|
OneGeneration_flag4_cycle0_end:
|
||
|
xor eax,eax
|
||
|
cmp dword [esp+20],64
|
||
|
jng OneGeneration_flag4_single
|
||
|
add ebx,edx
|
||
|
jmp OneGeneration_flag4_cycle1_entry
|
||
|
OneGeneration_flag4_single:
|
||
|
inc eax
|
||
|
jmp OneGeneration_flag4_cycle1_entry
|
||
|
OneGeneration_flag4_cycle1:
|
||
|
btr dword [edi+edx],esi
|
||
|
OneGeneration_flag4_cycle1_entry:
|
||
|
sub edx,8
|
||
|
jl OneGeneration_flag4_end
|
||
|
bt dword [ebx+edx],eax
|
||
|
jnc OneGeneration_flag4_cycle1
|
||
|
bts dword [edi+edx],esi
|
||
|
jmp OneGeneration_flag4_cycle1_entry
|
||
|
OneGeneration_flag4_end:
|
||
|
emms
|
||
|
OneGeneration_end:
|
||
|
pop edi
|
||
|
pop esi
|
||
|
pop ebx
|
||
|
pop ebp
|
||
|
ret 20
|
||
|
|