From 9e6b9b61f77b6041eef202606e7b1db2d48b246d Mon Sep 17 00:00:00 2001 From: Ivan Baravy Date: Tue, 5 Dec 2017 05:24:21 +0000 Subject: [PATCH] Use xsave/xrestor if available, enable AVX* instructions. git-svn-id: svn://kolibrios.org@7124 a494cfbc-eb01-0410-851d-a64ba20cac60 --- kernel/trunk/const.inc | 69 +++++++++++-------- kernel/trunk/core/debug.inc | 2 +- kernel/trunk/core/fpu.inc | 122 +++++++++++++++++++++++++++++++--- kernel/trunk/core/taskman.inc | 4 +- kernel/trunk/data32.inc | 2 + kernel/trunk/kernel.asm | 2 +- 6 files changed, 160 insertions(+), 41 deletions(-) diff --git a/kernel/trunk/const.inc b/kernel/trunk/const.inc index 765f122bf6..2a55c30161 100644 --- a/kernel/trunk/const.inc +++ b/kernel/trunk/const.inc @@ -8,7 +8,7 @@ $Revision$ -dpl0 equ 10010000b ; data read dpl0 +dpl0 equ 10010000b ; data read dpl0 drw0 equ 10010010b ; data read/write dpl0 drw3 equ 11110010b ; data read/write dpl3 cpl0 equ 10011010b ; code read dpl0 @@ -78,6 +78,12 @@ CAPS_CID equ 42 ; ; 44 CAPS_CX16 equ 45 ;CMPXCHG16B instruction CAPS_xTPR equ 46 ; +CAPS_XSAVE equ (32 + 26) ; XSAVE and XRSTOR instructions +CAPS_OSXSAVE equ (32 + 27) +; A value of 1 indicates that the OS has set CR4.OSXSAVE[bit 18] to enable +; XSETBV/XGETBV instructions to access XCR0 and to support processor extended +; state management using XSAVE/XRSTOR. +CAPS_AVX equ (32 + 28) ; not AVX2 ; ;reserved ; @@ -117,34 +123,41 @@ CR0_CD equ 0x40000000 ;cache disable CR0_PG equ 0x80000000 ;paging -CR4_VME equ 0x0001 -CR4_PVI equ 0x0002 -CR4_TSD equ 0x0004 -CR4_DE equ 0x0008 -CR4_PSE equ 0x0010 -CR4_PAE equ 0x0020 -CR4_MCE equ 0x0040 -CR4_PGE equ 0x0080 -CR4_PCE equ 0x0100 -CR4_OSFXSR equ 0x0200 -CR4_OSXMMEXPT equ 0x0400 +CR4_VME equ 0x000001 +CR4_PVI equ 0x000002 +CR4_TSD equ 0x000004 +CR4_DE equ 0x000008 +CR4_PSE equ 0x000010 +CR4_PAE equ 0x000020 +CR4_MCE equ 0x000040 +CR4_PGE equ 0x000080 +CR4_PCE equ 0x000100 +CR4_OSFXSR equ 0x000200 +CR4_OSXMMEXPT equ 0x000400 +CR4_OSXSAVE equ 0x040000 -SSE_IE equ 0x0001 -SSE_DE equ 0x0002 -SSE_ZE equ 0x0004 -SSE_OE equ 0x0008 -SSE_UE equ 0x0010 -SSE_PE equ 0x0020 -SSE_DAZ equ 0x0040 -SSE_IM equ 0x0080 -SSE_DM equ 0x0100 -SSE_ZM equ 0x0200 -SSE_OM equ 0x0400 -SSE_UM equ 0x0800 -SSE_PM equ 0x1000 -SSE_FZ equ 0x8000 +XCR0_FPU_MMX equ 0x0001 +XCR0_SSE equ 0x0002 +XCR0_AVX equ 0x0004 +XCR0_MPX equ 0x0018 +XCR0_AVX512 equ 0x00e0 -SSE_INIT equ (SSE_IM+SSE_DM+SSE_ZM+SSE_OM+SSE_UM+SSE_PM) +MXCSR_IE equ 0x0001 +MXCSR_DE equ 0x0002 +MXCSR_ZE equ 0x0004 +MXCSR_OE equ 0x0008 +MXCSR_UE equ 0x0010 +MXCSR_PE equ 0x0020 +MXCSR_DAZ equ 0x0040 +MXCSR_IM equ 0x0080 +MXCSR_DM equ 0x0100 +MXCSR_ZM equ 0x0200 +MXCSR_OM equ 0x0400 +MXCSR_UM equ 0x0800 +MXCSR_PM equ 0x1000 +MXCSR_FZ equ 0x8000 + +MXCSR_INIT equ (MXCSR_IM+MXCSR_DM+MXCSR_ZM+MXCSR_OM+MXCSR_UM+MXCSR_PM) IRQ_PIC equ 0 IRQ_APIC equ 1 @@ -252,7 +265,7 @@ new_app_base equ 0; twdw equ 0x2000 ;(CURRENT_TASK-window_data) std_application_base_address equ new_app_base -RING0_STACK_SIZE equ (0x2000 - 512) ;512 байт для контекста FPU +RING0_STACK_SIZE equ 0x2000 REG_SS equ (RING0_STACK_SIZE-4) REG_APP_ESP equ (RING0_STACK_SIZE-8) diff --git a/kernel/trunk/core/debug.inc b/kernel/trunk/core/debug.inc index 3c152b8bbe..ad53495c0b 100644 --- a/kernel/trunk/core/debug.inc +++ b/kernel/trunk/core/debug.inc @@ -169,7 +169,7 @@ debug_getcontext: .ring0: ; note that following code assumes that all interrupt/exception handlers -; saves ring-3 context by pushad in this order +; save ring-3 context by pushad in this order ; top of ring0 stack: ring3 stack ptr (ss+esp), iret data (cs+eip+eflags), pushad sub esi, 8+12+20h lodsd ;edi diff --git a/kernel/trunk/core/fpu.inc b/kernel/trunk/core/fpu.inc index 3d652ecf43..6c4291ca6e 100644 --- a/kernel/trunk/core/fpu.inc +++ b/kernel/trunk/core/fpu.inc @@ -1,6 +1,6 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; -;; Copyright (C) KolibriOS team 2004-2015. All rights reserved. ;; +;; Copyright (C) KolibriOS team 2004-2017. All rights reserved. ;; ;; Distributed under terms of the GNU General Public License ;; ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -12,19 +12,76 @@ init_fpu: clts fninit - bt [cpu_caps], CAPS_SSE - jnc .no_SSE + bt [cpu_caps+(CAPS_XSAVE/8)], CAPS_XSAVE mod 8 + jnc .no_xsave + mov ecx, cr4 + or ecx, CR4_OSXSAVE + mov cr4, ecx + + mov eax, 0x0d + xor ecx, ecx + cpuid + mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512 + and ebx, eax + xor ecx, ecx + xgetbv + or eax, ebx + xor ecx, ecx + xsetbv + + mov eax, 0x0d + xor ecx, ecx + cpuid + mov [xsave_area_size], ebx + + test eax, XCR0_AVX512 + jz @f + call init_avx512 + ret +@@: + test eax, XCR0_AVX + jz @f + call init_avx + ret +@@: + test eax, XCR0_SSE + jz @f + call init_sse + ret +@@: + call init_fpu_mmx + ret +.no_xsave: + mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE + bt [cpu_caps], CAPS_SSE + jnc @f + call init_sse + fxsave [fpu_data] + ret +@@: + call init_fpu_mmx + fnsave [fpu_data] + ret + +init_fpu_mmx: + mov ecx, cr0 + and ecx, not CR0_EM + or ecx, CR0_MP + CR0_NE + mov cr0, ecx + ret + +init_sse: mov ebx, cr4 mov ecx, cr0 or ebx, CR4_OSFXSR+CR4_OSXMMEXPT mov cr4, ebx - and ecx, not (CR0_MP+CR0_EM) + and ecx, not (CR0_EM + CR0_MP) or ecx, CR0_NE mov cr0, ecx - mov dword [esp-4], SSE_INIT + mov dword [esp-4], MXCSR_INIT ldmxcsr [esp-4] xorps xmm0, xmm0 @@ -35,14 +92,46 @@ init_fpu: xorps xmm5, xmm5 xorps xmm6, xmm6 xorps xmm7, xmm7 - fxsave [fpu_data] ;[eax] ret -.no_SSE: + +init_avx: + mov ebx, cr4 + or ebx, CR4_OSFXSR + CR4_OSXMMEXPT + mov cr4, ebx + mov ecx, cr0 - and ecx, not CR0_EM - or ecx, CR0_MP+CR0_NE + and ecx, not (CR0_EM + CR0_MP) + or ecx, CR0_NE mov cr0, ecx - fnsave [fpu_data] + + mov dword [esp-4], MXCSR_INIT + vldmxcsr [esp-4] + + vzeroall + ret + +init_avx512: + mov ebx, cr4 + or ebx, CR4_OSFXSR + CR4_OSXMMEXPT + mov cr4, ebx + + mov ecx, cr0 + and ecx, not (CR0_EM + CR0_MP) + or ecx, CR0_NE + mov cr0, ecx + + mov dword [esp-4], MXCSR_INIT + vldmxcsr [esp-4] + + vpxorq zmm0, zmm0, zmm0 + vpxorq zmm1, zmm1, zmm1 + vpxorq zmm2, zmm2, zmm2 + vpxorq zmm3, zmm3, zmm3 + vpxorq zmm4, zmm4, zmm4 + vpxorq zmm5, zmm5, zmm5 + vpxorq zmm6, zmm6, zmm6 + vpxorq zmm7, zmm7, zmm7 + ret ; param @@ -90,6 +179,11 @@ fpu_save: align 4 save_context: + bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8 + jnc .no_xsave + xsave [eax] + ret +.no_xsave: bt [cpu_caps], CAPS_SSE jnc .no_SSE @@ -115,6 +209,14 @@ fpu_restore: jne .copy clts + bt [cpu_caps+(CAPS_OSXSAVE/8)], CAPS_OSXSAVE mod 8 + jnc .no_xsave + xrstor [esi] + popfd + pop esi + pop ecx + ret +.no_xsave: bt [cpu_caps], CAPS_SSE jnc .no_SSE diff --git a/kernel/trunk/core/taskman.inc b/kernel/trunk/core/taskman.inc index c4c34c3bbf..3e6f20fe8b 100644 --- a/kernel/trunk/core/taskman.inc +++ b/kernel/trunk/core/taskman.inc @@ -901,7 +901,9 @@ proc set_app_params stdcall,slot:dword, params:dword, flags:dword pl0_stack dd ? endl - stdcall kernel_alloc, RING0_STACK_SIZE+512 + mov eax, [xsave_area_size] + add eax, RING0_STACK_SIZE + stdcall kernel_alloc, eax mov [pl0_stack], eax lea edi, [eax+RING0_STACK_SIZE] diff --git a/kernel/trunk/data32.inc b/kernel/trunk/data32.inc index 5f2d2e21bb..68d42e9034 100644 --- a/kernel/trunk/data32.inc +++ b/kernel/trunk/data32.inc @@ -432,6 +432,8 @@ cpu_sign dd ? cpu_info dd ? cpu_caps rd 4 +xsave_area_size dd ? + pg_data PG_DATA heap_test dd ? diff --git a/kernel/trunk/kernel.asm b/kernel/trunk/kernel.asm index e1bb9edeba..ae446803f6 100644 --- a/kernel/trunk/kernel.asm +++ b/kernel/trunk/kernel.asm @@ -1184,7 +1184,7 @@ proc setup_os_slot mov [edx+APPDATA.io_map+4], eax mov dword [edx+APPDATA.pl0_stack], ebx - lea edi, [ebx+0x2000-512] + lea edi, [ebx+RING0_STACK_SIZE] mov dword [edx+APPDATA.fpu_state], edi mov dword [edx+APPDATA.saved_esp0], edi mov dword [edx+APPDATA.saved_esp], edi