forked from KolibriOS/kolibrios
Enable xsave/xrstor, attempt 2.
git-svn-id: svn://kolibrios.org@7276 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
parent
06eafb0c92
commit
db8eddbd53
@ -13,26 +13,35 @@ init_fpu:
|
|||||||
fninit
|
fninit
|
||||||
|
|
||||||
bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32
|
bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32
|
||||||
jmp .no_xsave ; not ready to be jnc so far
|
jnc .no_xsave
|
||||||
|
|
||||||
mov ecx, cr4
|
mov ecx, cr4
|
||||||
or ecx, CR4_OSXSAVE
|
or ecx, CR4_OSXSAVE
|
||||||
mov cr4, ecx
|
mov cr4, ecx
|
||||||
|
; don't call cpuid again
|
||||||
|
bts [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||||
|
|
||||||
mov eax, 0x0d
|
; zero xsave header
|
||||||
|
mov ecx, 64/4
|
||||||
|
xor eax, eax
|
||||||
|
mov edi, fpu_data + 512 ; skip legacy region
|
||||||
|
rep stosd
|
||||||
|
|
||||||
|
mov eax, 0x0d ; extended state enumeration main leaf
|
||||||
xor ecx, ecx
|
xor ecx, ecx
|
||||||
cpuid
|
cpuid
|
||||||
mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
|
and eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
|
||||||
and ebx, eax
|
xor edx, edx
|
||||||
xor ecx, ecx
|
mov [xsave_eax], eax
|
||||||
xgetbv
|
mov [xsave_edx], edx
|
||||||
or eax, ebx
|
|
||||||
xor ecx, ecx
|
xor ecx, ecx
|
||||||
xsetbv
|
xsetbv
|
||||||
|
|
||||||
mov eax, 0x0d
|
mov eax, 0x0d
|
||||||
xor ecx, ecx
|
xor ecx, ecx
|
||||||
cpuid
|
cpuid
|
||||||
|
add ebx, 63
|
||||||
|
and ebx, NOT 63
|
||||||
mov [xsave_area_size], ebx
|
mov [xsave_area_size], ebx
|
||||||
cmp ebx, fpu_data_size
|
cmp ebx, fpu_data_size
|
||||||
ja $
|
ja $
|
||||||
@ -40,18 +49,26 @@ init_fpu:
|
|||||||
test eax, XCR0_AVX512
|
test eax, XCR0_AVX512
|
||||||
jz @f
|
jz @f
|
||||||
call init_avx512
|
call init_avx512
|
||||||
|
mov eax, [xsave_eax]
|
||||||
|
mov edx, [xsave_edx]
|
||||||
xsave [fpu_data]
|
xsave [fpu_data]
|
||||||
ret
|
ret
|
||||||
@@:
|
@@:
|
||||||
test eax, XCR0_AVX
|
test eax, XCR0_AVX
|
||||||
jz @f
|
jz @f
|
||||||
call init_avx
|
call init_avx
|
||||||
|
mov eax, [xsave_eax]
|
||||||
|
mov edx, [xsave_edx]
|
||||||
xsave [fpu_data]
|
xsave [fpu_data]
|
||||||
ret
|
ret
|
||||||
@@:
|
@@:
|
||||||
test eax, XCR0_SSE
|
test eax, XCR0_SSE
|
||||||
jnz .sse
|
jz $
|
||||||
jmp .fpu_mmx
|
call init_sse
|
||||||
|
mov eax, [xsave_eax]
|
||||||
|
mov edx, [xsave_edx]
|
||||||
|
xsave [fpu_data]
|
||||||
|
ret
|
||||||
.no_xsave:
|
.no_xsave:
|
||||||
mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE
|
mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE
|
||||||
bt [cpu_caps], CAPS_SSE
|
bt [cpu_caps], CAPS_SSE
|
||||||
@ -75,7 +92,7 @@ init_fpu_mmx:
|
|||||||
init_sse:
|
init_sse:
|
||||||
mov ebx, cr4
|
mov ebx, cr4
|
||||||
mov ecx, cr0
|
mov ecx, cr0
|
||||||
or ebx, CR4_OSFXSR+CR4_OSXMMEXPT
|
or ebx, CR4_OSFXSR + CR4_OSXMMEXPT
|
||||||
mov cr4, ebx
|
mov cr4, ebx
|
||||||
|
|
||||||
and ecx, not (CR0_EM + CR0_MP)
|
and ecx, not (CR0_EM + CR0_MP)
|
||||||
@ -186,6 +203,7 @@ avx_save_size:
|
|||||||
; param
|
; param
|
||||||
; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
|
; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
|
||||||
|
|
||||||
|
align 4
|
||||||
avx_save:
|
avx_save:
|
||||||
push ecx
|
push ecx
|
||||||
push esi
|
push esi
|
||||||
@ -230,7 +248,12 @@ align 4
|
|||||||
save_context:
|
save_context:
|
||||||
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||||
jnc save_fpu_context
|
jnc save_fpu_context
|
||||||
xsave [eax]
|
push eax edx
|
||||||
|
mov ecx, eax
|
||||||
|
mov eax, [xsave_eax]
|
||||||
|
mov edx, [xsave_edx]
|
||||||
|
xsave [ecx]
|
||||||
|
pop edx eax
|
||||||
ret
|
ret
|
||||||
save_fpu_context:
|
save_fpu_context:
|
||||||
bt [cpu_caps], CAPS_SSE
|
bt [cpu_caps], CAPS_SSE
|
||||||
@ -284,6 +307,7 @@ fpu_restore:
|
|||||||
pop ecx
|
pop ecx
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
align 4
|
||||||
avx_restore:
|
avx_restore:
|
||||||
push ecx
|
push ecx
|
||||||
push esi
|
push esi
|
||||||
@ -301,7 +325,11 @@ avx_restore:
|
|||||||
clts
|
clts
|
||||||
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||||
jnc .no_xsave
|
jnc .no_xsave
|
||||||
|
push edx
|
||||||
|
mov eax, [xsave_eax]
|
||||||
|
mov edx, [xsave_edx]
|
||||||
xrstor [esi]
|
xrstor [esi]
|
||||||
|
pop edx
|
||||||
popfd
|
popfd
|
||||||
pop esi
|
pop esi
|
||||||
pop ecx
|
pop ecx
|
||||||
@ -351,12 +379,15 @@ except_7: ;#NM exception handler
|
|||||||
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
||||||
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||||
jnc .no_xsave
|
jnc .no_xsave
|
||||||
xsave [eax]
|
mov ecx, eax
|
||||||
|
mov eax, [xsave_eax]
|
||||||
|
mov edx, [xsave_edx]
|
||||||
|
xsave [ecx]
|
||||||
mov ebx, [CURRENT_TASK]
|
mov ebx, [CURRENT_TASK]
|
||||||
mov [fpu_owner], ebx
|
mov [fpu_owner], ebx
|
||||||
shl ebx, 8
|
shl ebx, 8
|
||||||
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
mov ecx, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
||||||
xrstor [eax]
|
xrstor [ecx]
|
||||||
.exit:
|
.exit:
|
||||||
restore_ring3_context
|
restore_ring3_context
|
||||||
iret
|
iret
|
||||||
|
@ -132,6 +132,20 @@ do_change_task:
|
|||||||
; set gs selector unconditionally
|
; set gs selector unconditionally
|
||||||
Mov ax, graph_data
|
Mov ax, graph_data
|
||||||
Mov gs, ax
|
Mov gs, ax
|
||||||
|
; TS flag is not triggered by AVX* instructions, therefore
|
||||||
|
; we have to xsave/xrstor SIMD registers each task change
|
||||||
|
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||||
|
jnc .no_xsave
|
||||||
|
mov ecx, [esi+APPDATA.fpu_state]
|
||||||
|
mov eax, [xsave_eax]
|
||||||
|
mov edx, [xsave_edx]
|
||||||
|
xsave [ecx]
|
||||||
|
mov ecx, [CURRENT_TASK]
|
||||||
|
mov [fpu_owner], ecx
|
||||||
|
mov ecx, [current_slot]
|
||||||
|
mov ecx, [ecx+APPDATA.fpu_state]
|
||||||
|
xrstor [ecx]
|
||||||
|
.no_xsave:
|
||||||
; set CR0.TS
|
; set CR0.TS
|
||||||
cmp bh, byte[fpu_owner] ;bh == incoming task (new)
|
cmp bh, byte[fpu_owner] ;bh == incoming task (new)
|
||||||
clts ;clear a task switch flag
|
clts ;clear a task switch flag
|
||||||
|
@ -929,10 +929,8 @@ proc set_app_params stdcall,slot:dword, params:dword, flags:dword
|
|||||||
shr ecx, 2
|
shr ecx, 2
|
||||||
rep movsd
|
rep movsd
|
||||||
|
|
||||||
cmp ebx, [TASK_COUNT]
|
cmp [TASK_COUNT], ebx
|
||||||
jle .noinc
|
adc dword [TASK_COUNT], 0 ; update number of processes
|
||||||
inc dword [TASK_COUNT] ;update number of processes
|
|
||||||
.noinc:
|
|
||||||
shl ebx, 8
|
shl ebx, 8
|
||||||
lea edx, [ebx+SLOT_BASE+APP_EV_OFFSET]
|
lea edx, [ebx+SLOT_BASE+APP_EV_OFFSET]
|
||||||
mov [SLOT_BASE+APPDATA.fd_ev+ebx], edx
|
mov [SLOT_BASE+APPDATA.fd_ev+ebx], edx
|
||||||
|
@ -335,8 +335,9 @@ diff16 "end of .data segment",0,$
|
|||||||
align 16
|
align 16
|
||||||
cur_saved_data:
|
cur_saved_data:
|
||||||
rb 4096
|
rb 4096
|
||||||
|
align 64
|
||||||
fpu_data:
|
fpu_data:
|
||||||
rb 1024
|
rb 0xa80 ; bochs avx512
|
||||||
fpu_data_size = $ - fpu_data
|
fpu_data_size = $ - fpu_data
|
||||||
draw_data:
|
draw_data:
|
||||||
rb 32*256
|
rb 32*256
|
||||||
@ -434,6 +435,8 @@ cpu_info dd ?
|
|||||||
cpu_caps rd 4
|
cpu_caps rd 4
|
||||||
|
|
||||||
xsave_area_size dd ?
|
xsave_area_size dd ?
|
||||||
|
xsave_eax dd ?
|
||||||
|
xsave_edx dd ?
|
||||||
|
|
||||||
pg_data PG_DATA
|
pg_data PG_DATA
|
||||||
heap_test dd ?
|
heap_test dd ?
|
||||||
|
@ -434,7 +434,10 @@ high_code:
|
|||||||
;lidt [idtreg]
|
;lidt [idtreg]
|
||||||
|
|
||||||
call init_kernel_heap
|
call init_kernel_heap
|
||||||
stdcall kernel_alloc, (RING0_STACK_SIZE+512) * 2
|
call init_fpu
|
||||||
|
mov eax, [xsave_area_size]
|
||||||
|
lea eax, [eax*2 + RING0_STACK_SIZE*2]
|
||||||
|
stdcall kernel_alloc, eax
|
||||||
mov [os_stack_seg], eax
|
mov [os_stack_seg], eax
|
||||||
|
|
||||||
lea esp, [eax+RING0_STACK_SIZE]
|
lea esp, [eax+RING0_STACK_SIZE]
|
||||||
@ -469,7 +472,6 @@ high_code:
|
|||||||
mov [LFBAddress], LFB_BASE
|
mov [LFBAddress], LFB_BASE
|
||||||
mov ecx, bios_fb
|
mov ecx, bios_fb
|
||||||
call set_framebuffer
|
call set_framebuffer
|
||||||
call init_fpu
|
|
||||||
call init_malloc
|
call init_malloc
|
||||||
|
|
||||||
stdcall alloc_kernel_space, 0x50000 ; FIXME check size
|
stdcall alloc_kernel_space, 0x50000 ; FIXME check size
|
||||||
@ -590,7 +592,8 @@ high_code:
|
|||||||
|
|
||||||
mov edx, SLOT_BASE+256*1
|
mov edx, SLOT_BASE+256*1
|
||||||
mov ebx, [os_stack_seg]
|
mov ebx, [os_stack_seg]
|
||||||
add ebx, 0x2000
|
add ebx, RING0_STACK_SIZE
|
||||||
|
add ebx, [xsave_area_size]
|
||||||
call setup_os_slot
|
call setup_os_slot
|
||||||
mov dword [edx], 'IDLE'
|
mov dword [edx], 'IDLE'
|
||||||
sub [edx+APPDATA.saved_esp], 4
|
sub [edx+APPDATA.saved_esp], 4
|
||||||
|
@ -7,7 +7,8 @@
|
|||||||
; Optimized for KolibriOS, By Diamond
|
; Optimized for KolibriOS, By Diamond
|
||||||
; Assemble with
|
; Assemble with
|
||||||
; c:fasm firework.asm firework.kex
|
; c:fasm firework.asm firework.kex
|
||||||
; NOTE: Needs MMX & SSE, optionally AVX
|
; NOTE: Needs MMX & SSE,
|
||||||
|
; optionally AVX, AVX2, AVX512
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
use32
|
use32
|
||||||
org 0x0
|
org 0x0
|
||||||
@ -23,8 +24,12 @@ use32
|
|||||||
include '../../../macros.inc'
|
include '../../../macros.inc'
|
||||||
SCREEN_WIDTH = 320
|
SCREEN_WIDTH = 320
|
||||||
SCREEN_HEIGHT = 200
|
SCREEN_HEIGHT = 200
|
||||||
SIMD equ SSE
|
SIMD equ AVX
|
||||||
SIMD_BYTES = 8
|
SIMD_BYTES = 16
|
||||||
|
; SSE 8
|
||||||
|
; AVX 16
|
||||||
|
; AVX2 32
|
||||||
|
; AVX512 64
|
||||||
assert SCREEN_WIDTH mod SIMD_BYTES = 0
|
assert SCREEN_WIDTH mod SIMD_BYTES = 0
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; Global defines
|
; Global defines
|
||||||
|
Loading…
Reference in New Issue
Block a user