420 lines
9.1 KiB
PHP
420 lines
9.1 KiB
PHP
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;; ;;
|
||
|
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
|
||
|
;; Distributed under terms of the GNU General Public License ;;
|
||
|
;; ;;
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
$Revision$
|
||
|
|
||
|
|
||
|
init_fpu:
|
||
|
clts
|
||
|
fninit
|
||
|
|
||
|
bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32
|
||
|
jnc .no_xsave
|
||
|
|
||
|
mov ecx, cr4
|
||
|
or ecx, CR4_OSXSAVE
|
||
|
mov cr4, ecx
|
||
|
; don't call cpuid again
|
||
|
bts [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||
|
|
||
|
; zero xsave header
|
||
|
mov ecx, 64/4
|
||
|
xor eax, eax
|
||
|
mov edi, fpu_data + 512 ; skip legacy region
|
||
|
rep stosd
|
||
|
|
||
|
mov eax, 0x0d ; extended state enumeration main leaf
|
||
|
xor ecx, ecx
|
||
|
cpuid
|
||
|
and eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
|
||
|
xor edx, edx
|
||
|
mov [xsave_eax], eax
|
||
|
mov [xsave_edx], edx
|
||
|
xor ecx, ecx
|
||
|
xsetbv
|
||
|
|
||
|
mov eax, 0x0d
|
||
|
xor ecx, ecx
|
||
|
cpuid
|
||
|
add ebx, 63
|
||
|
and ebx, NOT 63
|
||
|
mov [xsave_area_size], ebx
|
||
|
cmp ebx, fpu_data_size
|
||
|
ja $
|
||
|
|
||
|
test eax, XCR0_AVX512
|
||
|
jz @f
|
||
|
call init_avx512
|
||
|
mov eax, [xsave_eax]
|
||
|
mov edx, [xsave_edx]
|
||
|
xsave [fpu_data]
|
||
|
ret
|
||
|
@@:
|
||
|
test eax, XCR0_AVX
|
||
|
jz @f
|
||
|
call init_avx
|
||
|
mov eax, [xsave_eax]
|
||
|
mov edx, [xsave_edx]
|
||
|
xsave [fpu_data]
|
||
|
ret
|
||
|
@@:
|
||
|
test eax, XCR0_SSE
|
||
|
jz $
|
||
|
call init_sse
|
||
|
mov eax, [xsave_eax]
|
||
|
mov edx, [xsave_edx]
|
||
|
xsave [fpu_data]
|
||
|
ret
|
||
|
.no_xsave:
|
||
|
mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE
|
||
|
bt [cpu_caps], CAPS_SSE
|
||
|
jnc .fpu_mmx
|
||
|
.sse:
|
||
|
call init_sse
|
||
|
fxsave [fpu_data]
|
||
|
ret
|
||
|
.fpu_mmx:
|
||
|
call init_fpu_mmx
|
||
|
fnsave [fpu_data]
|
||
|
ret
|
||
|
|
||
|
init_fpu_mmx:
|
||
|
mov ecx, cr0
|
||
|
and ecx, not CR0_EM
|
||
|
or ecx, CR0_MP + CR0_NE
|
||
|
mov cr0, ecx
|
||
|
ret
|
||
|
|
||
|
init_sse:
|
||
|
mov ebx, cr4
|
||
|
mov ecx, cr0
|
||
|
or ebx, CR4_OSFXSR + CR4_OSXMMEXPT
|
||
|
mov cr4, ebx
|
||
|
|
||
|
and ecx, not (CR0_EM + CR0_MP)
|
||
|
or ecx, CR0_NE
|
||
|
mov cr0, ecx
|
||
|
|
||
|
mov dword [esp-4], MXCSR_INIT
|
||
|
ldmxcsr [esp-4]
|
||
|
|
||
|
xorps xmm0, xmm0
|
||
|
xorps xmm1, xmm1
|
||
|
xorps xmm2, xmm2
|
||
|
xorps xmm3, xmm3
|
||
|
xorps xmm4, xmm4
|
||
|
xorps xmm5, xmm5
|
||
|
xorps xmm6, xmm6
|
||
|
xorps xmm7, xmm7
|
||
|
ret
|
||
|
|
||
|
init_avx:
|
||
|
mov ebx, cr4
|
||
|
or ebx, CR4_OSFXSR + CR4_OSXMMEXPT
|
||
|
mov cr4, ebx
|
||
|
|
||
|
mov ecx, cr0
|
||
|
and ecx, not (CR0_EM + CR0_MP)
|
||
|
or ecx, CR0_NE
|
||
|
mov cr0, ecx
|
||
|
|
||
|
mov dword [esp-4], MXCSR_INIT
|
||
|
vldmxcsr [esp-4]
|
||
|
|
||
|
vzeroall
|
||
|
ret
|
||
|
|
||
|
init_avx512:
|
||
|
mov ebx, cr4
|
||
|
or ebx, CR4_OSFXSR + CR4_OSXMMEXPT
|
||
|
mov cr4, ebx
|
||
|
|
||
|
mov ecx, cr0
|
||
|
and ecx, not (CR0_EM + CR0_MP)
|
||
|
or ecx, CR0_NE
|
||
|
mov cr0, ecx
|
||
|
|
||
|
mov dword [esp-4], MXCSR_INIT
|
||
|
vldmxcsr [esp-4]
|
||
|
|
||
|
vpxorq zmm0, zmm0, zmm0
|
||
|
vpxorq zmm1, zmm1, zmm1
|
||
|
vpxorq zmm2, zmm2, zmm2
|
||
|
vpxorq zmm3, zmm3, zmm3
|
||
|
vpxorq zmm4, zmm4, zmm4
|
||
|
vpxorq zmm5, zmm5, zmm5
|
||
|
vpxorq zmm6, zmm6, zmm6
|
||
|
vpxorq zmm7, zmm7, zmm7
|
||
|
|
||
|
ret
|
||
|
|
||
|
; param
|
||
|
; eax= 512 bytes memory area aligned on a 16-byte boundary
|
||
|
|
||
|
align 4
|
||
|
fpu_save:
|
||
|
push ecx
|
||
|
push esi
|
||
|
push edi
|
||
|
|
||
|
pushfd
|
||
|
cli
|
||
|
|
||
|
clts
|
||
|
mov edi, eax
|
||
|
|
||
|
mov ecx, [fpu_owner]
|
||
|
mov esi, [current_slot_idx]
|
||
|
cmp ecx, esi
|
||
|
jne .save
|
||
|
|
||
|
call save_fpu_context
|
||
|
jmp .exit
|
||
|
.save:
|
||
|
mov [fpu_owner], esi
|
||
|
|
||
|
shl ecx, 8
|
||
|
mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
|
||
|
|
||
|
call save_context
|
||
|
|
||
|
; first 512 bytes of XSAVE area have the same format as FXSAVE
|
||
|
shl esi, 8
|
||
|
mov esi, [esi+SLOT_BASE+APPDATA.fpu_state]
|
||
|
mov ecx, 512/4
|
||
|
cld
|
||
|
rep movsd
|
||
|
fninit
|
||
|
.exit:
|
||
|
popfd
|
||
|
pop edi
|
||
|
pop esi
|
||
|
pop ecx
|
||
|
ret
|
||
|
|
||
|
avx_save_size:
|
||
|
mov eax, [xsave_area_size]
|
||
|
ret
|
||
|
|
||
|
; param
|
||
|
; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
|
||
|
|
||
|
align 4
|
||
|
avx_save:
|
||
|
push ecx
|
||
|
push esi
|
||
|
push edi
|
||
|
|
||
|
pushfd
|
||
|
cli
|
||
|
|
||
|
clts
|
||
|
mov edi, eax
|
||
|
|
||
|
mov ecx, [fpu_owner]
|
||
|
mov esi, [current_slot_idx]
|
||
|
cmp ecx, esi
|
||
|
jne .save
|
||
|
|
||
|
call save_context
|
||
|
jmp .exit
|
||
|
.save:
|
||
|
mov [fpu_owner], esi
|
||
|
|
||
|
shl ecx, 8
|
||
|
mov eax, [ecx+SLOT_BASE+APPDATA.fpu_state]
|
||
|
|
||
|
call save_context
|
||
|
|
||
|
shl esi, 8
|
||
|
mov esi, [esi+SLOT_BASE+APPDATA.fpu_state]
|
||
|
mov ecx, [xsave_area_size]
|
||
|
add ecx, 3
|
||
|
shr ecx, 2
|
||
|
rep movsd
|
||
|
fninit
|
||
|
.exit:
|
||
|
popfd
|
||
|
pop edi
|
||
|
pop esi
|
||
|
pop ecx
|
||
|
ret
|
||
|
|
||
|
align 4
|
||
|
save_context:
|
||
|
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||
|
jnc save_fpu_context
|
||
|
push eax edx
|
||
|
mov ecx, eax
|
||
|
mov eax, [xsave_eax]
|
||
|
mov edx, [xsave_edx]
|
||
|
xsave [ecx]
|
||
|
pop edx eax
|
||
|
ret
|
||
|
save_fpu_context:
|
||
|
bt [cpu_caps], CAPS_SSE
|
||
|
jnc .no_SSE
|
||
|
fxsave [eax]
|
||
|
ret
|
||
|
.no_SSE:
|
||
|
fnsave [eax]
|
||
|
ret
|
||
|
|
||
|
|
||
|
align 4
|
||
|
fpu_restore:
|
||
|
push ecx
|
||
|
push esi
|
||
|
|
||
|
mov esi, eax
|
||
|
|
||
|
pushfd
|
||
|
cli
|
||
|
|
||
|
mov ecx, [fpu_owner]
|
||
|
mov eax, [current_slot_idx]
|
||
|
cmp ecx, eax
|
||
|
jne .copy
|
||
|
|
||
|
clts
|
||
|
bt [cpu_caps], CAPS_SSE
|
||
|
jnc .no_SSE
|
||
|
|
||
|
fxrstor [esi]
|
||
|
popfd
|
||
|
pop esi
|
||
|
pop ecx
|
||
|
ret
|
||
|
.no_SSE:
|
||
|
fnclex ;fix possible problems
|
||
|
frstor [esi]
|
||
|
popfd
|
||
|
pop esi
|
||
|
pop ecx
|
||
|
ret
|
||
|
.copy:
|
||
|
shl eax, 8
|
||
|
mov edi, [eax+SLOT_BASE+APPDATA.fpu_state]
|
||
|
mov ecx, 512/4
|
||
|
cld
|
||
|
rep movsd
|
||
|
popfd
|
||
|
pop esi
|
||
|
pop ecx
|
||
|
ret
|
||
|
|
||
|
align 4
|
||
|
avx_restore:
|
||
|
push ecx
|
||
|
push esi
|
||
|
|
||
|
mov esi, eax
|
||
|
|
||
|
pushfd
|
||
|
cli
|
||
|
|
||
|
mov ecx, [fpu_owner]
|
||
|
mov eax, [current_slot_idx]
|
||
|
cmp ecx, eax
|
||
|
jne .copy
|
||
|
|
||
|
clts
|
||
|
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||
|
jnc .no_xsave
|
||
|
push edx
|
||
|
mov eax, [xsave_eax]
|
||
|
mov edx, [xsave_edx]
|
||
|
xrstor [esi]
|
||
|
pop edx
|
||
|
popfd
|
||
|
pop esi
|
||
|
pop ecx
|
||
|
ret
|
||
|
.no_xsave:
|
||
|
bt [cpu_caps], CAPS_SSE
|
||
|
jnc .no_SSE
|
||
|
|
||
|
fxrstor [esi]
|
||
|
popfd
|
||
|
pop esi
|
||
|
pop ecx
|
||
|
ret
|
||
|
.no_SSE:
|
||
|
fnclex ;fix possible problems
|
||
|
frstor [esi]
|
||
|
popfd
|
||
|
pop esi
|
||
|
pop ecx
|
||
|
ret
|
||
|
.copy:
|
||
|
shl eax, 8
|
||
|
mov edi, [eax+SLOT_BASE+APPDATA.fpu_state]
|
||
|
mov ecx, [xsave_area_size]
|
||
|
add ecx, 3
|
||
|
shr ecx, 2
|
||
|
cld
|
||
|
rep movsd
|
||
|
popfd
|
||
|
pop esi
|
||
|
pop ecx
|
||
|
ret
|
||
|
|
||
|
align 4
|
||
|
except_7: ;#NM exception handler
|
||
|
save_ring3_context
|
||
|
clts
|
||
|
mov ax, app_data;
|
||
|
mov ds, ax
|
||
|
mov es, ax
|
||
|
|
||
|
mov ebx, [fpu_owner]
|
||
|
cmp ebx, [current_slot_idx]
|
||
|
je .exit
|
||
|
|
||
|
shl ebx, 8
|
||
|
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
||
|
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||
|
jnc .no_xsave
|
||
|
mov ecx, eax
|
||
|
mov eax, [xsave_eax]
|
||
|
mov edx, [xsave_edx]
|
||
|
xsave [ecx]
|
||
|
mov ebx, [current_slot_idx]
|
||
|
mov [fpu_owner], ebx
|
||
|
shl ebx, 8
|
||
|
mov ecx, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
||
|
xrstor [ecx]
|
||
|
.exit:
|
||
|
restore_ring3_context
|
||
|
iret
|
||
|
.no_xsave:
|
||
|
bt [cpu_caps], CAPS_SSE
|
||
|
jnc .no_SSE
|
||
|
|
||
|
fxsave [eax]
|
||
|
mov ebx, [current_slot_idx]
|
||
|
mov [fpu_owner], ebx
|
||
|
shl ebx, 8
|
||
|
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
||
|
fxrstor [eax]
|
||
|
restore_ring3_context
|
||
|
iret
|
||
|
|
||
|
.no_SSE:
|
||
|
fnsave [eax]
|
||
|
mov ebx, [current_slot_idx]
|
||
|
mov [fpu_owner], ebx
|
||
|
shl ebx, 8
|
||
|
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
||
|
frstor [eax]
|
||
|
restore_ring3_context
|
||
|
iret
|
||
|
|
||
|
iglobal
|
||
|
fpu_owner dd 2
|
||
|
endg
|