Use xsave/xrestor if available, enable AVX* instructions.

git-svn-id: svn://kolibrios.org@7124 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Ivan Baravy 2017-12-05 05:24:21 +00:00
parent 11a65eeec4
commit 9e6b9b61f7
6 changed files with 160 additions and 41 deletions

View File

@ -8,7 +8,7 @@
$Revision$ $Revision$
dpl0 equ 10010000b ; data read dpl0 dpl0 equ 10010000b ; data read dpl0
drw0 equ 10010010b ; data read/write dpl0 drw0 equ 10010010b ; data read/write dpl0
drw3 equ 11110010b ; data read/write dpl3 drw3 equ 11110010b ; data read/write dpl3
cpl0 equ 10011010b ; code read dpl0 cpl0 equ 10011010b ; code read dpl0
@ -78,6 +78,12 @@ CAPS_CID equ 42 ;
; 44 ; 44
CAPS_CX16 equ 45 ;CMPXCHG16B instruction CAPS_CX16 equ 45 ;CMPXCHG16B instruction
CAPS_xTPR equ 46 ; CAPS_xTPR equ 46 ;
CAPS_XSAVE equ (32 + 26) ; XSAVE and XRSTOR instructions
CAPS_OSXSAVE equ (32 + 27)
; A value of 1 indicates that the OS has set CR4.OSXSAVE[bit 18] to enable
; XSETBV/XGETBV instructions to access XCR0 and to support processor extended
; state management using XSAVE/XRSTOR.
CAPS_AVX equ (32 + 28) ; not AVX2
; ;
;reserved ;reserved
; ;
@ -117,34 +123,41 @@ CR0_CD equ 0x40000000 ;cache disable
CR0_PG equ 0x80000000 ;paging CR0_PG equ 0x80000000 ;paging
CR4_VME equ 0x0001 CR4_VME equ 0x000001
CR4_PVI equ 0x0002 CR4_PVI equ 0x000002
CR4_TSD equ 0x0004 CR4_TSD equ 0x000004
CR4_DE equ 0x0008 CR4_DE equ 0x000008
CR4_PSE equ 0x0010 CR4_PSE equ 0x000010
CR4_PAE equ 0x0020 CR4_PAE equ 0x000020
CR4_MCE equ 0x0040 CR4_MCE equ 0x000040
CR4_PGE equ 0x0080 CR4_PGE equ 0x000080
CR4_PCE equ 0x0100 CR4_PCE equ 0x000100
CR4_OSFXSR equ 0x0200 CR4_OSFXSR equ 0x000200
CR4_OSXMMEXPT equ 0x0400 CR4_OSXMMEXPT equ 0x000400
CR4_OSXSAVE equ 0x040000
SSE_IE equ 0x0001 XCR0_FPU_MMX equ 0x0001
SSE_DE equ 0x0002 XCR0_SSE equ 0x0002
SSE_ZE equ 0x0004 XCR0_AVX equ 0x0004
SSE_OE equ 0x0008 XCR0_MPX equ 0x0018
SSE_UE equ 0x0010 XCR0_AVX512 equ 0x00e0
SSE_PE equ 0x0020
SSE_DAZ equ 0x0040
SSE_IM equ 0x0080
SSE_DM equ 0x0100
SSE_ZM equ 0x0200
SSE_OM equ 0x0400
SSE_UM equ 0x0800
SSE_PM equ 0x1000
SSE_FZ equ 0x8000
SSE_INIT equ (SSE_IM+SSE_DM+SSE_ZM+SSE_OM+SSE_UM+SSE_PM) MXCSR_IE equ 0x0001
MXCSR_DE equ 0x0002
MXCSR_ZE equ 0x0004
MXCSR_OE equ 0x0008
MXCSR_UE equ 0x0010
MXCSR_PE equ 0x0020
MXCSR_DAZ equ 0x0040
MXCSR_IM equ 0x0080
MXCSR_DM equ 0x0100
MXCSR_ZM equ 0x0200
MXCSR_OM equ 0x0400
MXCSR_UM equ 0x0800
MXCSR_PM equ 0x1000
MXCSR_FZ equ 0x8000
MXCSR_INIT equ (MXCSR_IM+MXCSR_DM+MXCSR_ZM+MXCSR_OM+MXCSR_UM+MXCSR_PM)
IRQ_PIC equ 0 IRQ_PIC equ 0
IRQ_APIC equ 1 IRQ_APIC equ 1
@ -252,7 +265,7 @@ new_app_base equ 0;
twdw equ 0x2000 ;(CURRENT_TASK-window_data) twdw equ 0x2000 ;(CURRENT_TASK-window_data)
std_application_base_address equ new_app_base std_application_base_address equ new_app_base
RING0_STACK_SIZE equ (0x2000 - 512) ;512 байт для контекста FPU RING0_STACK_SIZE equ 0x2000
REG_SS equ (RING0_STACK_SIZE-4) REG_SS equ (RING0_STACK_SIZE-4)
REG_APP_ESP equ (RING0_STACK_SIZE-8) REG_APP_ESP equ (RING0_STACK_SIZE-8)

View File

@ -169,7 +169,7 @@ debug_getcontext:
.ring0: .ring0:
; note that following code assumes that all interrupt/exception handlers ; note that following code assumes that all interrupt/exception handlers
; saves ring-3 context by pushad in this order ; save ring-3 context by pushad in this order
; top of ring0 stack: ring3 stack ptr (ss+esp), iret data (cs+eip+eflags), pushad ; top of ring0 stack: ring3 stack ptr (ss+esp), iret data (cs+eip+eflags), pushad
sub esi, 8+12+20h sub esi, 8+12+20h
lodsd ;edi lodsd ;edi

View File

@ -1,6 +1,6 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;; ;; ;;
;; Copyright (C) KolibriOS team 2004-2015. All rights reserved. ;; ;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;;
;; Distributed under terms of the GNU General Public License ;; ;; Distributed under terms of the GNU General Public License ;;
;; ;; ;; ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -12,19 +12,76 @@ init_fpu:
clts clts
fninit fninit
bt [cpu_caps], CAPS_SSE bt [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8
jnc .no_SSE jnc .no_xsave
mov ecx, cr4
or ecx, CR4_OSXSAVE
mov cr4, ecx
mov eax, 0x0d
xor ecx, ecx
cpuid
mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
and ebx, eax
xor ecx, ecx
xgetbv
or eax, ebx
xor ecx, ecx
xsetbv
mov eax, 0x0d
xor ecx, ecx
cpuid
mov [xsave_area_size], ebx
test eax, XCR0_AVX512
jz @f
call init_avx512
ret
@@:
test eax, XCR0_AVX
jz @f
call init_avx
ret
@@:
test eax, XCR0_SSE
jz @f
call init_sse
ret
@@:
call init_fpu_mmx
ret
.no_xsave:
mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE
bt [cpu_caps], CAPS_SSE
jnc @f
call init_sse
fxsave [fpu_data]
ret
@@:
call init_fpu_mmx
fnsave [fpu_data]
ret
init_fpu_mmx:
mov ecx, cr0
and ecx, not CR0_EM
or ecx, CR0_MP + CR0_NE
mov cr0, ecx
ret
init_sse:
mov ebx, cr4 mov ebx, cr4
mov ecx, cr0 mov ecx, cr0
or ebx, CR4_OSFXSR+CR4_OSXMMEXPT or ebx, CR4_OSFXSR+CR4_OSXMMEXPT
mov cr4, ebx mov cr4, ebx
and ecx, not (CR0_MP+CR0_EM) and ecx, not (CR0_EM + CR0_MP)
or ecx, CR0_NE or ecx, CR0_NE
mov cr0, ecx mov cr0, ecx
mov dword [esp-4], SSE_INIT mov dword [esp-4], MXCSR_INIT
ldmxcsr [esp-4] ldmxcsr [esp-4]
xorps xmm0, xmm0 xorps xmm0, xmm0
@ -35,14 +92,46 @@ init_fpu:
xorps xmm5, xmm5 xorps xmm5, xmm5
xorps xmm6, xmm6 xorps xmm6, xmm6
xorps xmm7, xmm7 xorps xmm7, xmm7
fxsave [fpu_data] ;[eax]
ret ret
.no_SSE:
init_avx:
mov ebx, cr4
or ebx, CR4_OSFXSR + CR4_OSXMMEXPT
mov cr4, ebx
mov ecx, cr0 mov ecx, cr0
and ecx, not CR0_EM and ecx, not (CR0_EM + CR0_MP)
or ecx, CR0_MP+CR0_NE or ecx, CR0_NE
mov cr0, ecx mov cr0, ecx
fnsave [fpu_data]
mov dword [esp-4], MXCSR_INIT
vldmxcsr [esp-4]
vzeroall
ret
init_avx512:
mov ebx, cr4
or ebx, CR4_OSFXSR + CR4_OSXMMEXPT
mov cr4, ebx
mov ecx, cr0
and ecx, not (CR0_EM + CR0_MP)
or ecx, CR0_NE
mov cr0, ecx
mov dword [esp-4], MXCSR_INIT
vldmxcsr [esp-4]
vpxorq zmm0, zmm0, zmm0
vpxorq zmm1, zmm1, zmm1
vpxorq zmm2, zmm2, zmm2
vpxorq zmm3, zmm3, zmm3
vpxorq zmm4, zmm4, zmm4
vpxorq zmm5, zmm5, zmm5
vpxorq zmm6, zmm6, zmm6
vpxorq zmm7, zmm7, zmm7
ret ret
; param ; param
@ -90,6 +179,11 @@ fpu_save:
align 4 align 4
save_context: save_context:
bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
jnc .no_xsave
xsave [eax]
ret
.no_xsave:
bt [cpu_caps], CAPS_SSE bt [cpu_caps], CAPS_SSE
jnc .no_SSE jnc .no_SSE
@ -115,6 +209,14 @@ fpu_restore:
jne .copy jne .copy
clts clts
bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8
jnc .no_xsave
xrstor [esi]
popfd
pop esi
pop ecx
ret
.no_xsave:
bt [cpu_caps], CAPS_SSE bt [cpu_caps], CAPS_SSE
jnc .no_SSE jnc .no_SSE

View File

@ -901,7 +901,9 @@ proc set_app_params stdcall,slot:dword, params:dword, flags:dword
pl0_stack dd ? pl0_stack dd ?
endl endl
stdcall kernel_alloc, RING0_STACK_SIZE+512 mov eax, [xsave_area_size]
add eax, RING0_STACK_SIZE
stdcall kernel_alloc, eax
mov [pl0_stack], eax mov [pl0_stack], eax
lea edi, [eax+RING0_STACK_SIZE] lea edi, [eax+RING0_STACK_SIZE]

View File

@ -432,6 +432,8 @@ cpu_sign dd ?
cpu_info dd ? cpu_info dd ?
cpu_caps rd 4 cpu_caps rd 4
xsave_area_size dd ?
pg_data PG_DATA pg_data PG_DATA
heap_test dd ? heap_test dd ?

View File

@ -1184,7 +1184,7 @@ proc setup_os_slot
mov [edx+APPDATA.io_map+4], eax mov [edx+APPDATA.io_map+4], eax
mov dword [edx+APPDATA.pl0_stack], ebx mov dword [edx+APPDATA.pl0_stack], ebx
lea edi, [ebx+0x2000-512] lea edi, [ebx+RING0_STACK_SIZE]
mov dword [edx+APPDATA.fpu_state], edi mov dword [edx+APPDATA.fpu_state], edi
mov dword [edx+APPDATA.saved_esp0], edi mov dword [edx+APPDATA.saved_esp0], edi
mov dword [edx+APPDATA.saved_esp], edi mov dword [edx+APPDATA.saved_esp], edi