Use xsave/xrestor if available, enable AVX* instructions.
git-svn-id: svn://kolibrios.org@7124 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
parent
11a65eeec4
commit
9e6b9b61f7
@ -8,7 +8,7 @@
|
||||
$Revision$
|
||||
|
||||
|
||||
dpl0 equ 10010000b ; data read dpl0
|
||||
dpl0 equ 10010000b ; data read dpl0
|
||||
drw0 equ 10010010b ; data read/write dpl0
|
||||
drw3 equ 11110010b ; data read/write dpl3
|
||||
cpl0 equ 10011010b ; code read dpl0
|
||||
@ -78,6 +78,12 @@ CAPS_CID equ 42 ;
|
||||
; 44
|
||||
CAPS_CX16 equ 45 ;CMPXCHG16B instruction
|
||||
CAPS_xTPR equ 46 ;
|
||||
CAPS_XSAVE equ (32 + 26) ; XSAVE and XRSTOR instructions
|
||||
CAPS_OSXSAVE equ (32 + 27)
|
||||
; A value of 1 indicates that the OS has set CR4.OSXSAVE[bit 18] to enable
|
||||
; XSETBV/XGETBV instructions to access XCR0 and to support processor extended
|
||||
; state management using XSAVE/XRSTOR.
|
||||
CAPS_AVX equ (32 + 28) ; not AVX2
|
||||
;
|
||||
;reserved
|
||||
;
|
||||
@ -117,34 +123,41 @@ CR0_CD equ 0x40000000 ;cache disable
|
||||
CR0_PG equ 0x80000000 ;paging
|
||||
|
||||
|
||||
CR4_VME equ 0x0001
|
||||
CR4_PVI equ 0x0002
|
||||
CR4_TSD equ 0x0004
|
||||
CR4_DE equ 0x0008
|
||||
CR4_PSE equ 0x0010
|
||||
CR4_PAE equ 0x0020
|
||||
CR4_MCE equ 0x0040
|
||||
CR4_PGE equ 0x0080
|
||||
CR4_PCE equ 0x0100
|
||||
CR4_OSFXSR equ 0x0200
|
||||
CR4_OSXMMEXPT equ 0x0400
|
||||
CR4_VME equ 0x000001
|
||||
CR4_PVI equ 0x000002
|
||||
CR4_TSD equ 0x000004
|
||||
CR4_DE equ 0x000008
|
||||
CR4_PSE equ 0x000010
|
||||
CR4_PAE equ 0x000020
|
||||
CR4_MCE equ 0x000040
|
||||
CR4_PGE equ 0x000080
|
||||
CR4_PCE equ 0x000100
|
||||
CR4_OSFXSR equ 0x000200
|
||||
CR4_OSXMMEXPT equ 0x000400
|
||||
CR4_OSXSAVE equ 0x040000
|
||||
|
||||
SSE_IE equ 0x0001
|
||||
SSE_DE equ 0x0002
|
||||
SSE_ZE equ 0x0004
|
||||
SSE_OE equ 0x0008
|
||||
SSE_UE equ 0x0010
|
||||
SSE_PE equ 0x0020
|
||||
SSE_DAZ equ 0x0040
|
||||
SSE_IM equ 0x0080
|
||||
SSE_DM equ 0x0100
|
||||
SSE_ZM equ 0x0200
|
||||
SSE_OM equ 0x0400
|
||||
SSE_UM equ 0x0800
|
||||
SSE_PM equ 0x1000
|
||||
SSE_FZ equ 0x8000
|
||||
XCR0_FPU_MMX equ 0x0001
|
||||
XCR0_SSE equ 0x0002
|
||||
XCR0_AVX equ 0x0004
|
||||
XCR0_MPX equ 0x0018
|
||||
XCR0_AVX512 equ 0x00e0
|
||||
|
||||
SSE_INIT equ (SSE_IM+SSE_DM+SSE_ZM+SSE_OM+SSE_UM+SSE_PM)
|
||||
MXCSR_IE equ 0x0001
|
||||
MXCSR_DE equ 0x0002
|
||||
MXCSR_ZE equ 0x0004
|
||||
MXCSR_OE equ 0x0008
|
||||
MXCSR_UE equ 0x0010
|
||||
MXCSR_PE equ 0x0020
|
||||
MXCSR_DAZ equ 0x0040
|
||||
MXCSR_IM equ 0x0080
|
||||
MXCSR_DM equ 0x0100
|
||||
MXCSR_ZM equ 0x0200
|
||||
MXCSR_OM equ 0x0400
|
||||
MXCSR_UM equ 0x0800
|
||||
MXCSR_PM equ 0x1000
|
||||
MXCSR_FZ equ 0x8000
|
||||
|
||||
MXCSR_INIT equ (MXCSR_IM+MXCSR_DM+MXCSR_ZM+MXCSR_OM+MXCSR_UM+MXCSR_PM)
|
||||
|
||||
IRQ_PIC equ 0
|
||||
IRQ_APIC equ 1
|
||||
@ -252,7 +265,7 @@ new_app_base equ 0;
|
||||
twdw equ 0x2000 ;(CURRENT_TASK-window_data)
|
||||
|
||||
std_application_base_address equ new_app_base
|
||||
RING0_STACK_SIZE equ (0x2000 - 512) ;512 байт для контекста FPU
|
||||
RING0_STACK_SIZE equ 0x2000
|
||||
|
||||
REG_SS equ (RING0_STACK_SIZE-4)
|
||||
REG_APP_ESP equ (RING0_STACK_SIZE-8)
|
||||
|
@ -169,7 +169,7 @@ debug_getcontext:
|
||||
|
||||
.ring0:
|
||||
; note that following code assumes that all interrupt/exception handlers
|
||||
; saves ring-3 context by pushad in this order
|
||||
; save ring-3 context by pushad in this order
|
||||
; top of ring0 stack: ring3 stack ptr (ss+esp), iret data (cs+eip+eflags), pushad
|
||||
sub esi, 8+12+20h
|
||||
lodsd ;edi
|
||||
|
@ -1,6 +1,6 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; ;;
|
||||
;; Copyright (C) KolibriOS team 2004-2015. All rights reserved. ;;
|
||||
;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
|
||||
;; Distributed under terms of the GNU General Public License ;;
|
||||
;; ;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
@ -12,19 +12,76 @@ init_fpu:
|
||||
clts
|
||||
fninit
|
||||
|
||||
bt [cpu_caps], CAPS_SSE
|
||||
jnc .no_SSE
|
||||
bt [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8
|
||||
jnc .no_xsave
|
||||
|
||||
mov ecx, cr4
|
||||
or ecx, CR4_OSXSAVE
|
||||
mov cr4, ecx
|
||||
|
||||
mov eax, 0x0d
|
||||
xor ecx, ecx
|
||||
cpuid
|
||||
mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
|
||||
and ebx, eax
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
or eax, ebx
|
||||
xor ecx, ecx
|
||||
xsetbv
|
||||
|
||||
mov eax, 0x0d
|
||||
xor ecx, ecx
|
||||
cpuid
|
||||
mov [xsave_area_size], ebx
|
||||
|
||||
test eax, XCR0_AVX512
|
||||
jz @f
|
||||
call init_avx512
|
||||
ret
|
||||
@@:
|
||||
test eax, XCR0_AVX
|
||||
jz @f
|
||||
call init_avx
|
||||
ret
|
||||
@@:
|
||||
test eax, XCR0_SSE
|
||||
jz @f
|
||||
call init_sse
|
||||
ret
|
||||
@@:
|
||||
call init_fpu_mmx
|
||||
ret
|
||||
.no_xsave:
|
||||
mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE
|
||||
bt [cpu_caps], CAPS_SSE
|
||||
jnc @f
|
||||
call init_sse
|
||||
fxsave [fpu_data]
|
||||
ret
|
||||
@@:
|
||||
call init_fpu_mmx
|
||||
fnsave [fpu_data]
|
||||
ret
|
||||
|
||||
init_fpu_mmx:
|
||||
mov ecx, cr0
|
||||
and ecx, not CR0_EM
|
||||
or ecx, CR0_MP + CR0_NE
|
||||
mov cr0, ecx
|
||||
ret
|
||||
|
||||
init_sse:
|
||||
mov ebx, cr4
|
||||
mov ecx, cr0
|
||||
or ebx, CR4_OSFXSR+CR4_OSXMMEXPT
|
||||
mov cr4, ebx
|
||||
|
||||
and ecx, not (CR0_MP+CR0_EM)
|
||||
and ecx, not (CR0_EM + CR0_MP)
|
||||
or ecx, CR0_NE
|
||||
mov cr0, ecx
|
||||
|
||||
mov dword [esp-4], SSE_INIT
|
||||
mov dword [esp-4], MXCSR_INIT
|
||||
ldmxcsr [esp-4]
|
||||
|
||||
xorps xmm0, xmm0
|
||||
@ -35,14 +92,46 @@ init_fpu:
|
||||
xorps xmm5, xmm5
|
||||
xorps xmm6, xmm6
|
||||
xorps xmm7, xmm7
|
||||
fxsave [fpu_data] ;[eax]
|
||||
ret
|
||||
.no_SSE:
|
||||
|
||||
init_avx:
|
||||
mov ebx, cr4
|
||||
or ebx, CR4_OSFXSR + CR4_OSXMMEXPT
|
||||
mov cr4, ebx
|
||||
|
||||
mov ecx, cr0
|
||||
and ecx, not CR0_EM
|
||||
or ecx, CR0_MP+CR0_NE
|
||||
and ecx, not (CR0_EM + CR0_MP)
|
||||
or ecx, CR0_NE
|
||||
mov cr0, ecx
|
||||
fnsave [fpu_data]
|
||||
|
||||
mov dword [esp-4], MXCSR_INIT
|
||||
vldmxcsr [esp-4]
|
||||
|
||||
vzeroall
|
||||
ret
|
||||
|
||||
init_avx512:
|
||||
mov ebx, cr4
|
||||
or ebx, CR4_OSFXSR + CR4_OSXMMEXPT
|
||||
mov cr4, ebx
|
||||
|
||||
mov ecx, cr0
|
||||
and ecx, not (CR0_EM + CR0_MP)
|
||||
or ecx, CR0_NE
|
||||
mov cr0, ecx
|
||||
|
||||
mov dword [esp-4], MXCSR_INIT
|
||||
vldmxcsr [esp-4]
|
||||
|
||||
vpxorq zmm0, zmm0, zmm0
|
||||
vpxorq zmm1, zmm1, zmm1
|
||||
vpxorq zmm2, zmm2, zmm2
|
||||
vpxorq zmm3, zmm3, zmm3
|
||||
vpxorq zmm4, zmm4, zmm4
|
||||
vpxorq zmm5, zmm5, zmm5
|
||||
vpxorq zmm6, zmm6, zmm6
|
||||
vpxorq zmm7, zmm7, zmm7
|
||||
|
||||
ret
|
||||
|
||||
; param
|
||||
@ -90,6 +179,11 @@ fpu_save:
|
||||
|
||||
align 4
|
||||
save_context:
|
||||
bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
|
||||
jnc .no_xsave
|
||||
xsave [eax]
|
||||
ret
|
||||
.no_xsave:
|
||||
bt [cpu_caps], CAPS_SSE
|
||||
jnc .no_SSE
|
||||
|
||||
@ -115,6 +209,14 @@ fpu_restore:
|
||||
jne .copy
|
||||
|
||||
clts
|
||||
bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
|
||||
jnc .no_xsave
|
||||
xrstor [esi]
|
||||
popfd
|
||||
pop esi
|
||||
pop ecx
|
||||
ret
|
||||
.no_xsave:
|
||||
bt [cpu_caps], CAPS_SSE
|
||||
jnc .no_SSE
|
||||
|
||||
|
@ -901,7 +901,9 @@ proc set_app_params stdcall,slot:dword, params:dword, flags:dword
|
||||
pl0_stack dd ?
|
||||
endl
|
||||
|
||||
stdcall kernel_alloc, RING0_STACK_SIZE+512
|
||||
mov eax, [xsave_area_size]
|
||||
add eax, RING0_STACK_SIZE
|
||||
stdcall kernel_alloc, eax
|
||||
mov [pl0_stack], eax
|
||||
|
||||
lea edi, [eax+RING0_STACK_SIZE]
|
||||
|
@ -432,6 +432,8 @@ cpu_sign dd ?
|
||||
cpu_info dd ?
|
||||
cpu_caps rd 4
|
||||
|
||||
xsave_area_size dd ?
|
||||
|
||||
pg_data PG_DATA
|
||||
heap_test dd ?
|
||||
|
||||
|
@ -1184,7 +1184,7 @@ proc setup_os_slot
|
||||
mov [edx+APPDATA.io_map+4], eax
|
||||
|
||||
mov dword [edx+APPDATA.pl0_stack], ebx
|
||||
lea edi, [ebx+0x2000-512]
|
||||
lea edi, [ebx+RING0_STACK_SIZE]
|
||||
mov dword [edx+APPDATA.fpu_state], edi
|
||||
mov dword [edx+APPDATA.saved_esp0], edi
|
||||
mov dword [edx+APPDATA.saved_esp], edi
|
||||
|
Loading…
Reference in New Issue
Block a user