forked from KolibriOS/kolibrios
Enable xsave/xrstor, attempt 2.
git-svn-id: svn://kolibrios.org@7276 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
parent
06eafb0c92
commit
db8eddbd53
@ -13,26 +13,35 @@ init_fpu:
|
||||
fninit
|
||||
|
||||
bt [cpu_caps+(CAPS_XSAVE/32)*4], CAPS_XSAVE mod 32
|
||||
jmp .no_xsave ; not ready to be jnc so far
|
||||
jnc .no_xsave
|
||||
|
||||
mov ecx, cr4
|
||||
or ecx, CR4_OSXSAVE
|
||||
mov cr4, ecx
|
||||
; don't call cpuid again
|
||||
bts [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||
|
||||
mov eax, 0x0d
|
||||
; zero xsave header
|
||||
mov ecx, 64/4
|
||||
xor eax, eax
|
||||
mov edi, fpu_data + 512 ; skip legacy region
|
||||
rep stosd
|
||||
|
||||
mov eax, 0x0d ; extended state enumeration main leaf
|
||||
xor ecx, ecx
|
||||
cpuid
|
||||
mov ebx, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
|
||||
and ebx, eax
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
or eax, ebx
|
||||
and eax, XCR0_FPU_MMX + XCR0_SSE + XCR0_AVX + XCR0_AVX512
|
||||
xor edx, edx
|
||||
mov [xsave_eax], eax
|
||||
mov [xsave_edx], edx
|
||||
xor ecx, ecx
|
||||
xsetbv
|
||||
|
||||
mov eax, 0x0d
|
||||
xor ecx, ecx
|
||||
cpuid
|
||||
add ebx, 63
|
||||
and ebx, NOT 63
|
||||
mov [xsave_area_size], ebx
|
||||
cmp ebx, fpu_data_size
|
||||
ja $
|
||||
@ -40,18 +49,26 @@ init_fpu:
|
||||
test eax, XCR0_AVX512
|
||||
jz @f
|
||||
call init_avx512
|
||||
mov eax, [xsave_eax]
|
||||
mov edx, [xsave_edx]
|
||||
xsave [fpu_data]
|
||||
ret
|
||||
@@:
|
||||
test eax, XCR0_AVX
|
||||
jz @f
|
||||
call init_avx
|
||||
mov eax, [xsave_eax]
|
||||
mov edx, [xsave_edx]
|
||||
xsave [fpu_data]
|
||||
ret
|
||||
@@:
|
||||
test eax, XCR0_SSE
|
||||
jnz .sse
|
||||
jmp .fpu_mmx
|
||||
jz $
|
||||
call init_sse
|
||||
mov eax, [xsave_eax]
|
||||
mov edx, [xsave_edx]
|
||||
xsave [fpu_data]
|
||||
ret
|
||||
.no_xsave:
|
||||
mov [xsave_area_size], 512 ; enough for FPU/MMX and SSE
|
||||
bt [cpu_caps], CAPS_SSE
|
||||
@ -75,7 +92,7 @@ init_fpu_mmx:
|
||||
init_sse:
|
||||
mov ebx, cr4
|
||||
mov ecx, cr0
|
||||
or ebx, CR4_OSFXSR+CR4_OSXMMEXPT
|
||||
or ebx, CR4_OSFXSR + CR4_OSXMMEXPT
|
||||
mov cr4, ebx
|
||||
|
||||
and ecx, not (CR0_EM + CR0_MP)
|
||||
@ -186,6 +203,7 @@ avx_save_size:
|
||||
; param
|
||||
; eax= avx_save_size() bytes memory area aligned on a 64-byte boundary
|
||||
|
||||
align 4
|
||||
avx_save:
|
||||
push ecx
|
||||
push esi
|
||||
@ -230,7 +248,12 @@ align 4
|
||||
save_context:
|
||||
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||
jnc save_fpu_context
|
||||
xsave [eax]
|
||||
push eax edx
|
||||
mov ecx, eax
|
||||
mov eax, [xsave_eax]
|
||||
mov edx, [xsave_edx]
|
||||
xsave [ecx]
|
||||
pop edx eax
|
||||
ret
|
||||
save_fpu_context:
|
||||
bt [cpu_caps], CAPS_SSE
|
||||
@ -284,6 +307,7 @@ fpu_restore:
|
||||
pop ecx
|
||||
ret
|
||||
|
||||
align 4
|
||||
avx_restore:
|
||||
push ecx
|
||||
push esi
|
||||
@ -301,7 +325,11 @@ avx_restore:
|
||||
clts
|
||||
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||
jnc .no_xsave
|
||||
push edx
|
||||
mov eax, [xsave_eax]
|
||||
mov edx, [xsave_edx]
|
||||
xrstor [esi]
|
||||
pop edx
|
||||
popfd
|
||||
pop esi
|
||||
pop ecx
|
||||
@ -351,12 +379,15 @@ except_7: ;#NM exception handler
|
||||
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
||||
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||
jnc .no_xsave
|
||||
xsave [eax]
|
||||
mov ecx, eax
|
||||
mov eax, [xsave_eax]
|
||||
mov edx, [xsave_edx]
|
||||
xsave [ecx]
|
||||
mov ebx, [CURRENT_TASK]
|
||||
mov [fpu_owner], ebx
|
||||
shl ebx, 8
|
||||
mov eax, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
||||
xrstor [eax]
|
||||
mov ecx, [ebx+SLOT_BASE+APPDATA.fpu_state]
|
||||
xrstor [ecx]
|
||||
.exit:
|
||||
restore_ring3_context
|
||||
iret
|
||||
|
@ -132,6 +132,20 @@ do_change_task:
|
||||
; set gs selector unconditionally
|
||||
Mov ax, graph_data
|
||||
Mov gs, ax
|
||||
; TS flag is not triggered by AVX* instructions, therefore
|
||||
; we have to xsave/xrstor SIMD registers each task change
|
||||
bt [cpu_caps+(CAPS_OSXSAVE/32)*4], CAPS_OSXSAVE mod 32
|
||||
jnc .no_xsave
|
||||
mov ecx, [esi+APPDATA.fpu_state]
|
||||
mov eax, [xsave_eax]
|
||||
mov edx, [xsave_edx]
|
||||
xsave [ecx]
|
||||
mov ecx, [CURRENT_TASK]
|
||||
mov [fpu_owner], ecx
|
||||
mov ecx, [current_slot]
|
||||
mov ecx, [ecx+APPDATA.fpu_state]
|
||||
xrstor [ecx]
|
||||
.no_xsave:
|
||||
; set CR0.TS
|
||||
cmp bh, byte[fpu_owner] ;bh == incoming task (new)
|
||||
clts ;clear a task switch flag
|
||||
|
@ -929,10 +929,8 @@ proc set_app_params stdcall,slot:dword, params:dword, flags:dword
|
||||
shr ecx, 2
|
||||
rep movsd
|
||||
|
||||
cmp ebx, [TASK_COUNT]
|
||||
jle .noinc
|
||||
inc dword [TASK_COUNT] ;update number of processes
|
||||
.noinc:
|
||||
cmp [TASK_COUNT], ebx
|
||||
adc dword [TASK_COUNT], 0 ; update number of processes
|
||||
shl ebx, 8
|
||||
lea edx, [ebx+SLOT_BASE+APP_EV_OFFSET]
|
||||
mov [SLOT_BASE+APPDATA.fd_ev+ebx], edx
|
||||
|
@ -335,8 +335,9 @@ diff16 "end of .data segment",0,$
|
||||
align 16
|
||||
cur_saved_data:
|
||||
rb 4096
|
||||
align 64
|
||||
fpu_data:
|
||||
rb 1024
|
||||
rb 0xa80 ; bochs avx512
|
||||
fpu_data_size = $ - fpu_data
|
||||
draw_data:
|
||||
rb 32*256
|
||||
@ -434,6 +435,8 @@ cpu_info dd ?
|
||||
cpu_caps rd 4
|
||||
|
||||
xsave_area_size dd ?
|
||||
xsave_eax dd ?
|
||||
xsave_edx dd ?
|
||||
|
||||
pg_data PG_DATA
|
||||
heap_test dd ?
|
||||
|
@ -434,7 +434,10 @@ high_code:
|
||||
;lidt [idtreg]
|
||||
|
||||
call init_kernel_heap
|
||||
stdcall kernel_alloc, (RING0_STACK_SIZE+512) * 2
|
||||
call init_fpu
|
||||
mov eax, [xsave_area_size]
|
||||
lea eax, [eax*2 + RING0_STACK_SIZE*2]
|
||||
stdcall kernel_alloc, eax
|
||||
mov [os_stack_seg], eax
|
||||
|
||||
lea esp, [eax+RING0_STACK_SIZE]
|
||||
@ -469,7 +472,6 @@ high_code:
|
||||
mov [LFBAddress], LFB_BASE
|
||||
mov ecx, bios_fb
|
||||
call set_framebuffer
|
||||
call init_fpu
|
||||
call init_malloc
|
||||
|
||||
stdcall alloc_kernel_space, 0x50000 ; FIXME check size
|
||||
@ -590,7 +592,8 @@ high_code:
|
||||
|
||||
mov edx, SLOT_BASE+256*1
|
||||
mov ebx, [os_stack_seg]
|
||||
add ebx, 0x2000
|
||||
add ebx, RING0_STACK_SIZE
|
||||
add ebx, [xsave_area_size]
|
||||
call setup_os_slot
|
||||
mov dword [edx], 'IDLE'
|
||||
sub [edx+APPDATA.saved_esp], 4
|
||||
|
@ -7,7 +7,8 @@
|
||||
; Optimized for KolibriOS, By Diamond
|
||||
; Assemble with
|
||||
; c:fasm firework.asm firework.kex
|
||||
; NOTE: Needs MMX & SSE, optionally AVX
|
||||
; NOTE: Needs MMX & SSE,
|
||||
; optionally AVX, AVX2, AVX512
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
use32
|
||||
org 0x0
|
||||
@ -23,8 +24,12 @@ use32
|
||||
include '../../../macros.inc'
|
||||
SCREEN_WIDTH = 320
|
||||
SCREEN_HEIGHT = 200
|
||||
SIMD equ SSE
|
||||
SIMD_BYTES = 8
|
||||
SIMD equ AVX
|
||||
SIMD_BYTES = 16
|
||||
; SSE 8
|
||||
; AVX 16
|
||||
; AVX2 32
|
||||
; AVX512 64
|
||||
assert SCREEN_WIDTH mod SIMD_BYTES = 0
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Global defines
|
||||
|
Loading…
Reference in New Issue
Block a user