;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; Copyright (C) KolibriOS team 2004-2022. All rights reserved. ;; ;; Distributed under terms of the GNU General Public License ;; ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; $Revision$ ; Initializes MTRRs. proc init_mtrr cmp [BOOT.mtrr], byte 2 je .exit bt [cpu_caps], CAPS_MTRR jnc .exit call mtrr_reconfigure stdcall set_mtrr, [LFBAddress], 0x1000000, MEM_WC .exit: ret endp ; Helper procedure for mtrr_reconfigure and set_mtrr, ; called before changes in MTRRs. ; 1. disable and flush caches ; 2. clear PGE bit in cr4 ; 3. flush TLB ; 4. disable mtrr proc mtrr_begin_change mov eax, cr0 or eax, 0x60000000 ;disable caching mov cr0, eax wbinvd ;invalidate cache bt [cpu_caps], CAPS_PGE jnc .cr3_flush mov eax, cr4 btr eax, 7 ;clear cr4.PGE mov cr4, eax ;flush TLB jmp @F ;skip extra serialization .cr3_flush: mov eax, cr3 mov cr3, eax ;flush TLB @@: mov ecx, MSR_MTRR_DEF_TYPE rdmsr btr eax, 11 ;clear enable flag wrmsr ;disable mtrr ret endp ; Helper procedure for mtrr_reconfigure and set_mtrr, ; called after changes in MTRRs. ; 1. enable mtrr ; 2. flush all caches ; 3. flush TLB ; 4. restore cr4.PGE flag, if required proc mtrr_end_change mov ecx, MSR_MTRR_DEF_TYPE rdmsr or ah, 8 ; enable variable-ranges MTRR and al, 0xF0 ; default memtype = UC wrmsr wbinvd ;again invalidate mov eax, cr0 and eax, not 0x60000000 mov cr0, eax ; enable caching mov eax, cr3 mov cr3, eax ;flush tlb bt [cpu_caps], CAPS_PGE jnc @F mov eax, cr4 bts eax, 7 ;set cr4.PGE flag mov cr4, eax @@: ret endp ; Some limits to number of structures located in the stack. MAX_USEFUL_MTRRS = 16 MAX_RANGES = 16 ; mtrr_reconfigure keeps a list of MEM_WB ranges. ; This structure describes one item in the list. struct mtrr_range next dd ? ; next item start dq ? ; first byte length dq ? ; length in bytes ends uglobal align 4 num_variable_mtrrs dd 0 ; number of variable-range MTRRs endg ; Helper procedure for MTRR initialization. ; Takes MTRR configured by BIOS and tries to recongifure them ; in order to allow non-UC data at top of 4G memory. ; Example: if low part of physical memory is 3.5G = 0xE0000000 bytes wide, ; BIOS can configure two MTRRs so that the first MTRR describes [0, 4G) as WB ; and the second MTRR describes [3.5G, 4G) as UC; ; WB+UC=UC, so the resulting memory map would be as needed, ; but in this configuration our attempts to map LFB at (say) 0xE8000000 as WC ; would be ignored, WB+UC+WC is still UC. ; So we must keep top of 4G memory not covered by MTRRs, ; using three WB MTRRs [0,2G) + [2G,3G) + [3G,3.5G), ; this gives the same memory map, but allows to add further entries. ; See mtrrtest.asm for detailed input/output from real hardware+BIOS. proc mtrr_reconfigure push ebp ; we're called from init_LFB, and it feels hurt when ebp is destroyed ; 1. Prepare local variables. ; 1a. Create list of MAX_RANGES free (aka not yet allocated) ranges. xor eax, eax lea ecx, [eax + MAX_RANGES] .init_ranges: sub esp, sizeof.mtrr_range - 4 push eax mov eax, esp dec ecx jnz .init_ranges mov eax, esp ; 1b. Fill individual local variables. xor edx, edx sub esp, MAX_USEFUL_MTRRS * 16 ; .mtrrs push edx ; .mtrrs_end push edx ; .num_used_mtrrs push eax ; .first_free_range push edx ; .first_range: no ranges yet mov cl, [cpu_phys_addr_width] or eax, -1 shl eax, cl ; note: this uses cl&31 = cl-32, not the entire cl push eax ; .phys_reserved_mask virtual at esp .phys_reserved_mask dd ? .first_range dd ? .first_free_range dd ? .num_used_mtrrs dd ? .mtrrs_end dd ? .mtrrs rq MAX_USEFUL_MTRRS * 2 .local_vars_size = $ - esp end virtual ; 2. Get the number of variable-range MTRRs from MTRRCAP register. ; Abort if zero. mov ecx, 0xFE rdmsr test al, al jz .abort mov byte [num_variable_mtrrs], al ; 3. Validate MTRR_DEF_TYPE register. mov ecx, 0x2FF rdmsr ; If BIOS has not initialized variable-range MTRRs, fallback to step 7. test ah, 8 jz .fill_ranges_from_memory_map ; If the default memory type (not covered by MTRRs) is not UC, ; then probably BIOS did something strange, so it is better to exit immediately ; hoping for the best. cmp al, MEM_UC jnz .abort ; 4. Validate all variable-range MTRRs ; and copy configured MTRRs to the local array [.mtrrs]. ; 4a. Prepare for the loop over existing variable-range MTRRs. mov ecx, 0x200 lea edi, [.mtrrs] .get_used_mtrrs_loop: ; 4b. For every MTRR, read PHYSBASEn and PHYSMASKn. ; In PHYSBASEn, clear upper bits and copy to ebp:ebx. rdmsr or edx, [.phys_reserved_mask] xor edx, [.phys_reserved_mask] mov ebp, edx mov ebx, eax inc ecx ; If PHYSMASKn is not active, ignore this MTRR. rdmsr inc ecx test ah, 8 jz .get_used_mtrrs_next ; 4c. For every active MTRR, check that number of local entries is not too large. inc [.num_used_mtrrs] cmp [.num_used_mtrrs], MAX_USEFUL_MTRRS ja .abort ; 4d. For every active MTRR, store PHYSBASEn with upper bits cleared. ; This contains the MTRR base and the memory type in low byte. mov [edi], ebx mov [edi+4], ebp ; 4e. For every active MTRR, check that the range is continuous: ; PHYSMASKn with upper bits set must be negated power of two, and ; low bits of PHYSBASEn must be zeroes: ; PHYSMASKn = 1...10...0, ; PHYSBASEn = x...x0...0, ; this defines a continuous range from x...x0...0 to x...x1...1, ; length = 10...0 = negated PHYSMASKn. ; Store length in the local array. and eax, not 0xFFF or edx, [.phys_reserved_mask] mov dword [edi+8], 0 mov dword [edi+12], 0 sub [edi+8], eax sbb [edi+12], edx ; (x and -x) is the maximum power of two that divides x. ; Condition for powers of two: (x and -x) equals x. and eax, [edi+8] and edx, [edi+12] cmp eax, [edi+8] jnz .abort cmp edx, [edi+12] jnz .abort sub eax, 1 sbb edx, 0 and eax, not 0xFFF and eax, ebx jnz .abort and edx, ebp jnz .abort ; 4f. For every active MTRR, validate memory type: it must be either WB or UC. add edi, 16 cmp bl, MEM_UC jz .get_used_mtrrs_next cmp bl, MEM_WB jnz .abort .get_used_mtrrs_next: ; 4g. Repeat the loop at 4b-4f for all [num_variable_mtrrs] entries. mov eax, [num_variable_mtrrs] lea eax, [0x200+eax*2] cmp ecx, eax jb .get_used_mtrrs_loop ; 4h. If no active MTRRs were detected, fallback to step 7. cmp [.num_used_mtrrs], 0 jz .fill_ranges_from_memory_map mov [.mtrrs_end], edi ; 5. Generate sorted list of ranges marked as WB. ; 5a. Prepare for the loop over configured MTRRs filled at step 4. lea ecx, [.mtrrs] .fill_wb_ranges: ; 5b. Ignore non-WB MTRRs. mov ebx, [ecx] cmp bl, MEM_WB jnz .next_wb_range mov ebp, [ecx+4] and ebx, not 0xFFF ; clear memory type and reserved bits ; ebp:ebx = start of the range described by the current MTRR. ; 5c. Find the first existing range containing a point greater than ebp:ebx. lea esi, [.first_range] .find_range_wb: ; If there is no next range or start of the next range is greater than ebp:ebx, ; exit the loop to 5d. mov edi, [esi] test edi, edi jz .found_place_wb mov eax, ebx mov edx, ebp sub eax, dword [edi + mtrr_range.start] sbb edx, dword [edi + mtrr_range.start+4] jb .found_place_wb ; Otherwise, if end of the next range is greater than or equal to ebp:ebx, ; exit the loop to 5e. mov esi, edi sub eax, dword [edi + mtrr_range.length] sbb edx, dword [edi + mtrr_range.length+4] jb .expand_wb or eax, edx jnz .find_range_wb jmp .expand_wb .found_place_wb: ; 5d. ebp:ebx is not within any existing range. ; Insert a new range between esi and edi. ; (Later, during 5e, it can be merged with the following ranges.) mov eax, [.first_free_range] test eax, eax jz .abort mov [esi], eax mov edx, [eax + mtrr_range.next] mov [.first_free_range], edx mov dword [eax + mtrr_range.start], ebx mov dword [eax + mtrr_range.start+4], ebp ; Don't fill [eax+mtrr_range.next] and [eax+mtrr_range.length] yet, ; they will be calculated including merges at step 5e. mov esi, edi mov edi, eax .expand_wb: ; 5e. The range at edi contains ebp:ebx, and esi points to the first range ; to be checked for merge: esi=edi if ebp:ebx was found in an existing range, ; esi is next after edi if a new range with ebp:ebx was created. ; Merge it with following ranges while start of the next range is not greater ; than the end of the new range. add ebx, [ecx+8] adc ebp, [ecx+12] ; ebp:ebx = end of the range described by the current MTRR. .expand_wb_loop: ; If there is no next range or start of the next range is greater than ebp:ebx, ; exit the loop to 5g. test esi, esi jz .expand_wb_done mov eax, ebx mov edx, ebp sub eax, dword [esi + mtrr_range.start] sbb edx, dword [esi + mtrr_range.start+4] jb .expand_wb_done ; Otherwise, if end of the next range is greater than or equal to ebp:ebx, ; exit the loop to 5f. sub eax, dword [esi + mtrr_range.length] sbb edx, dword [esi + mtrr_range.length+4] jb .expand_wb_last ; Otherwise, the current range is completely within the new range. ; Free it and continue the loop. mov edx, [esi + mtrr_range.next] cmp esi, edi jz @f mov eax, [.first_free_range] mov [esi + mtrr_range.next], eax mov [.first_free_range], esi @@: mov esi, edx jmp .expand_wb_loop .expand_wb_last: ; 5f. Start of the new range is inside range described by esi, ; end of the new range is inside range described by edi. ; If esi is equal to edi, the new range is completely within ; an existing range, so proceed to the next range. cmp esi, edi jz .next_wb_range ; Otherwise, set end of interval at esi to end of interval at edi ; and free range described by edi. mov ebx, dword [esi + mtrr_range.start] mov ebp, dword [esi + mtrr_range.start+4] add ebx, dword [esi + mtrr_range.length] adc ebp, dword [esi + mtrr_range.length+4] mov edx, [esi + mtrr_range.next] mov eax, [.first_free_range] mov [esi + mtrr_range.next], eax mov [.first_free_range], esi mov esi, edx .expand_wb_done: ; 5g. We have found the next range (maybe 0) after merging and ; the new end of range (maybe ebp:ebx from the new range ; or end of another existing interval calculated at step 5f). ; Write them to range at edi. mov [edi + mtrr_range.next], esi sub ebx, dword [edi + mtrr_range.start] sbb ebp, dword [edi + mtrr_range.start+4] mov dword [edi + mtrr_range.length], ebx mov dword [edi + mtrr_range.length+4], ebp .next_wb_range: ; 5h. Continue the loop 5b-5g over all configured MTRRs. add ecx, 16 cmp ecx, [.mtrrs_end] jb .fill_wb_ranges ; 6. Exclude all ranges marked as UC. ; 6a. Prepare for the loop over configured MTRRs filled at step 4. lea ecx, [.mtrrs] .fill_uc_ranges: ; 6b. Ignore non-UC MTRRs. mov ebx, [ecx] cmp bl, MEM_UC jnz .next_uc_range mov ebp, [ecx+4] and ebx, not 0xFFF ; clear memory type and reserved bits ; ebp:ebx = start of the range described by the current MTRR. lea esi, [.first_range] ; 6c. Find the first existing range containing a point greater than ebp:ebx. .find_range_uc: ; If there is no next range, ignore this MTRR, ; exit the loop and continue to next MTRR. mov edi, [esi] test edi, edi jz .next_uc_range ; If start of the next range is greater than or equal to ebp:ebx, ; exit the loop to 6e. mov eax, dword [edi + mtrr_range.start] mov edx, dword [edi + mtrr_range.start+4] sub eax, ebx sbb edx, ebp jnb .truncate_uc ; Otherwise, continue the loop if end of the next range is less than ebp:ebx, ; exit the loop to 6d otherwise. mov esi, edi add eax, dword [edi + mtrr_range.length] adc edx, dword [edi + mtrr_range.length+4] jnb .find_range_uc ; 6d. ebp:ebx is inside (or at end of) an existing range. ; Split the range. (The second range, maybe containing completely within UC-range, ; maybe of zero length, can be removed at step 6e, if needed.) mov edi, [.first_free_range] test edi, edi jz .abort mov dword [edi + mtrr_range.start], ebx mov dword [edi + mtrr_range.start+4], ebp mov dword [edi + mtrr_range.length], eax mov dword [edi + mtrr_range.length+4], edx mov eax, [edi + mtrr_range.next] mov [.first_free_range], eax mov eax, [esi + mtrr_range.next] mov [edi + mtrr_range.next], eax ; don't change [esi+mtrr_range.next] yet, it will be filled at step 6e mov eax, ebx mov edx, ebp sub eax, dword [esi + mtrr_range.start] sbb edx, dword [esi + mtrr_range.start+4] mov dword [esi + mtrr_range.length], eax mov dword [esi + mtrr_range.length+4], edx .truncate_uc: ; 6e. edi is the first range after ebp:ebx, check it and next ranges ; for intersection with the new range, truncate heads. add ebx, [ecx+8] adc ebp, [ecx+12] ; ebp:ebx = end of the range described by the current MTRR. .truncate_uc_loop: ; If start of the next range is greater than ebp:ebx, ; exit the loop to 6g. mov eax, ebx mov edx, ebp sub eax, dword [edi + mtrr_range.start] sbb edx, dword [edi + mtrr_range.start+4] jb .truncate_uc_done ; Otherwise, if end of the next range is greater than ebp:ebx, ; exit the loop to 6f. sub eax, dword [edi + mtrr_range.length] sbb edx, dword [edi + mtrr_range.length+4] jb .truncate_uc_last ; Otherwise, the current range is completely within the new range. ; Free it and continue the loop if there is a next range. ; If that was a last range, exit the loop to 6g. mov edx, [edi + mtrr_range.next] mov eax, [.first_free_range] mov [.first_free_range], edi mov [edi + mtrr_range.next], eax mov edi, edx test edi, edi jnz .truncate_uc_loop jmp .truncate_uc_done .truncate_uc_last: ; 6f. The range at edi partially intersects with the UC-range described by MTRR. ; Truncate it from the head. mov dword [edi + mtrr_range.start], ebx mov dword [edi + mtrr_range.start+4], ebp neg eax adc edx, 0 neg edx mov dword [edi + mtrr_range.length], eax mov dword [edi + mtrr_range.length+4], edx .truncate_uc_done: ; 6g. We have found the next range (maybe 0) after intersection. ; Write it to [esi+mtrr_range.next]. mov [esi + mtrr_range.next], edi .next_uc_range: ; 6h. Continue the loop 6b-6g over all configured MTRRs. add ecx, 16 cmp ecx, [.mtrrs_end] jb .fill_uc_ranges ; Sanity check: if there are no ranges after steps 5-6, ; fallback to step 7. Otherwise, go to 8. cmp [.first_range], 0 jnz .ranges_ok .fill_ranges_from_memory_map: ; 7. BIOS has not configured variable-range MTRRs. ; Create one range from 0 to [MEM_AMOUNT]. mov eax, [.first_free_range] mov edx, [eax + mtrr_range.next] mov [.first_free_range], edx mov [.first_range], eax xor edx, edx mov [eax + mtrr_range.next], edx mov dword [eax + mtrr_range.start], edx mov dword [eax + mtrr_range.start+4], edx mov ecx, [MEM_AMOUNT] mov dword [eax + mtrr_range.length], ecx mov dword [eax + mtrr_range.length+4], edx .ranges_ok: ; 8. We have calculated list of WB-ranges. ; Now we should calculate a list of MTRRs so that ; * every MTRR describes a range with length = power of 2 and start that is aligned, ; * every MTRR can be WB or UC ; * (sum of all WB ranges) minus (sum of all UC ranges) equals the calculated list ; * top of 4G memory must not be covered by any ranges ; Example: range [0,0xBC000000) can be converted to ; [0,0x80000000)+[0x80000000,0xC0000000)-[0xBC000000,0xC0000000) ; WB +WB -UC ; but not to [0,0x100000000)-[0xC0000000,0x100000000)-[0xBC000000,0xC0000000). ; 8a. Check that list of ranges is [0,something) plus, optionally, [4G,something). ; This holds in practice (see mtrrtest.asm for real-life examples) ; and significantly simplifies the code: ranges are independent, start of range ; is almost always aligned (the only exception >4G upper memory can be easily covered), ; there is no need to consider adding holes before start of range, only ; append them to end of range. xor eax, eax mov edi, [.first_range] cmp dword [edi + mtrr_range.start], eax jnz .abort cmp dword [edi + mtrr_range.start+4], eax jnz .abort cmp dword [edi + mtrr_range.length+4], eax jnz .abort mov edx, [edi + mtrr_range.next] test edx, edx jz @f cmp dword [edx + mtrr_range.start], eax jnz .abort cmp dword [edx + mtrr_range.start+4], 1 jnz .abort cmp [edx + mtrr_range.next], eax jnz .abort @@: ; 8b. Initialize: no MTRRs filled. mov [.num_used_mtrrs], eax lea esi, [.mtrrs] .range2mtrr_loop: ; 8c. If we are dealing with upper-memory range (after 4G) ; with length > start, create one WB MTRR with [start,2*start), ; reset start to 2*start and return to this step. ; Example: [4G,24G) -> [4G,8G) {returning} + [8G,16G) {returning} ; + [16G,24G) {advancing to ?}. mov eax, dword [edi + mtrr_range.length+4] test eax, eax jz .less4G mov edx, dword [edi + mtrr_range.start+4] cmp eax, edx jb .start_aligned inc [.num_used_mtrrs] cmp [.num_used_mtrrs], MAX_USEFUL_MTRRS ja .abort mov dword [esi], MEM_WB mov dword [esi+4], edx mov dword [esi+8], 0 mov dword [esi+12], edx add esi, 16 add dword [edi + mtrr_range.start+4], edx sub dword [edi + mtrr_range.length+4], edx jnz .range2mtrr_loop cmp dword [edi + mtrr_range.length], 0 jz .range2mtrr_next .less4G: ; 8d. If we are dealing with low-memory range (before 4G) ; and appending a maximal-size hole would create a range covering top of 4G, ; create a maximal-size WB range and return to this step. ; Example: for [0,0xBC000000) the following steps would consider ; variants [0,0x80000000)+(another range to be splitted) and ; [0,0x100000000)-(another range to be splitted); we forbid the last variant, ; so the first variant must be used. bsr ecx, dword [edi + mtrr_range.length] xor edx, edx inc edx shl edx, cl lea eax, [edx*2] add eax, dword [edi + mtrr_range.start] jnz .start_aligned inc [.num_used_mtrrs] cmp [.num_used_mtrrs], MAX_USEFUL_MTRRS ja .abort mov eax, dword [edi + mtrr_range.start] mov dword [esi], eax or dword [esi], MEM_WB mov dword [esi+4], 0 mov dword [esi+8], edx mov dword [esi+12], 0 add esi, 16 add dword [edi + mtrr_range.start], edx sub dword [edi + mtrr_range.length], edx jnz .less4G jmp .range2mtrr_next .start_aligned: ; Start is aligned for any allowed length, maximum-size hole is allowed. ; Select the best MTRR configuration for one range. ; length=...101101 ; Without hole at the end, we need one WB MTRR for every 1-bit in length: ; length=...100000 + ...001000 + ...000100 + ...000001 ; We can also append one hole at the end so that one 0-bit (selected by us) ; becomes 1 and all lower bits become 0 for WB-range: ; length=...110000 - (...00010 + ...00001) ; In this way, we need one WB MTRR for every 1-bit higher than the selected bit, ; one WB MTRR for the selected bit, one UC MTRR for every 0-bit between ; the selected bit and lowest 1-bit (they become 1-bits after negation) ; and one UC MTRR for lowest 1-bit. ; So we need to select 0-bit with the maximal difference ; (number of 0-bits) - (number of 1-bits) between selected and lowest 1-bit, ; this equals the gain from using a hole. If the difference is negative for ; all 0-bits, don't append hole. ; Note that lowest 1-bit is not included when counting, but selected 0-bit is. ; 8e. Find the optimal bit position for hole. ; eax = current difference, ebx = best difference, ; ecx = hole bit position, edx = current bit position. xor eax, eax xor ebx, ebx xor ecx, ecx bsf edx, dword [edi + mtrr_range.length] jnz @f bsf edx, dword [edi + mtrr_range.length+4] add edx, 32 @@: push edx ; save position of lowest 1-bit for step 8f .calc_stat: inc edx cmp edx, 64 jae .stat_done inc eax ; increment difference in hope for 1-bit ; Note: bt conveniently works with both .length and .length+4, ; depending on whether edx>=32. bt dword [edi + mtrr_range.length], edx jc .calc_stat dec eax ; hope was wrong, decrement difference to correct 'inc' dec eax ; and again, now getting the real difference cmp eax, ebx jle .calc_stat mov ebx, eax mov ecx, edx jmp .calc_stat .stat_done: ; 8f. If we decided to create a hole, flip all bits between lowest and selected. pop edx ; restore position of lowest 1-bit saved at step 8e test ecx, ecx jz .fill_hi_init @@: inc edx cmp edx, ecx ja .fill_hi_init btc dword [edi + mtrr_range.length], edx jmp @b .fill_hi_init: ; 8g. Create MTRR ranges corresponding to upper 32 bits. sub ecx, 32 .fill_hi_loop: bsr edx, dword [edi + mtrr_range.length+4] jz .fill_hi_done inc [.num_used_mtrrs] cmp [.num_used_mtrrs], MAX_USEFUL_MTRRS ja .abort mov eax, dword [edi + mtrr_range.start] mov [esi], eax mov eax, dword [edi + mtrr_range.start+4] mov [esi+4], eax xor eax, eax mov [esi+8], eax bts eax, edx mov [esi+12], eax cmp edx, ecx jl .fill_hi_uc or dword [esi], MEM_WB add dword [edi + mtrr_range.start+4], eax jmp @f .fill_hi_uc: sub dword [esi+4], eax sub dword [edi + mtrr_range.start+4], eax @@: add esi, 16 sub dword [edi + mtrr_range.length], eax jmp .fill_hi_loop .fill_hi_done: ; 8h. Create MTRR ranges corresponding to lower 32 bits. add ecx, 32 .fill_lo_loop: bsr edx, dword [edi+mtrr_range.length] jz .range2mtrr_next inc [.num_used_mtrrs] cmp [.num_used_mtrrs], MAX_USEFUL_MTRRS ja .abort mov eax, dword [edi + mtrr_range.start] mov [esi], eax mov eax, dword [edi + mtrr_range.start+4] mov [esi+4], eax xor eax, eax mov [esi+12], eax bts eax, edx mov [esi+8], eax cmp edx, ecx jl .fill_lo_uc or dword [esi], MEM_WB add dword [edi + mtrr_range.start], eax jmp @f .fill_lo_uc: sub dword [esi], eax sub dword [edi + mtrr_range.start], eax @@: add esi, 16 sub dword [edi + mtrr_range.length], eax jmp .fill_lo_loop .range2mtrr_next: ; 8i. Repeat the loop at 8c-8h for all ranges. mov edi, [edi + mtrr_range.next] test edi, edi jnz .range2mtrr_loop ; 9. We have calculated needed MTRRs, now setup them in the CPU. ; 9a. Abort if number of MTRRs is too large. mov eax, [num_variable_mtrrs] cmp [.num_used_mtrrs], eax ja .abort ; 9b. Prepare for changes. call mtrr_begin_change ; 9c. Prepare for loop over MTRRs. lea esi, [.mtrrs] mov ecx, 0x200 @@: ; 9d. For every MTRR, copy PHYSBASEn as is: step 8 has configured ; start value and type bits as needed. mov eax, [esi] mov edx, [esi+4] wrmsr inc ecx ; 9e. For every MTRR, calculate PHYSMASKn = -(length) or 0x800 ; with upper bits cleared, 0x800 = MTRR is valid. xor eax, eax xor edx, edx sub eax, [esi+8] sbb edx, [esi+12] or eax, 0x800 or edx, [.phys_reserved_mask] xor edx, [.phys_reserved_mask] wrmsr inc ecx ; 9f. Continue steps 9d and 9e for all MTRRs calculated at step 8. add esi, 16 dec [.num_used_mtrrs] jnz @b ; 9g. Zero other MTRRs. xor eax, eax xor edx, edx mov ebx, [num_variable_mtrrs] lea ebx, [0x200+ebx*2] @@: cmp ecx, ebx jae @f wrmsr inc ecx wrmsr inc ecx jmp @b @@: ; 9i. Check PAT support and reprogram PAT_MASR for write combining memory bt [cpu_caps], CAPS_PAT jnc @F mov ecx, MSR_CR_PAT mov eax, PAT_VALUE ;UC UCM WC WB mov edx, eax wrmsr @@: ; 9j. Changes are done. call mtrr_end_change .abort: add esp, .local_vars_size + MAX_RANGES * sizeof.mtrr_range pop ebp ret endp ; Allocate&set one MTRR for given range. ; size must be power of 2 that divides base. proc set_mtrr stdcall, base:dword,size:dword,mem_type:dword ; find unused register mov ecx, 0x201 .scan: mov eax, [num_variable_mtrrs] lea eax, [0x200+eax*2] cmp ecx, eax jae .ret rdmsr dec ecx test ah, 8 jz .found rdmsr test edx, edx jnz @f and eax, not 0xFFF ; clear reserved bits cmp eax, [base] jz .ret @@: add ecx, 3 jmp .scan ; no free registers, ignore the call .ret: ret .found: ; found, write values push ecx call mtrr_begin_change pop ecx xor edx, edx mov eax, [base] or eax, [mem_type] wrmsr mov al, [cpu_phys_addr_width] xor edx, edx bts edx, eax xor eax, eax sub eax, [size] sbb edx, 0 or eax, 0x800 inc ecx wrmsr call mtrr_end_change ret endp ; Helper procedure for mtrr_validate. ; Calculates memory type for given address according to variable-range MTRRs. ; Assumes that MTRRs are enabled. ; in: ebx = 32-bit physical address ; out: eax = memory type for ebx proc mtrr_get_real_type ; 1. Initialize: we have not yet found any MTRRs covering ebx. push 0 mov ecx, 0x201 .mtrr_loop: ; 2. For every MTRR, check whether it is valid; if not, continue to the next MTRR. rdmsr dec ecx test ah, 8 jz .next ; 3. For every valid MTRR, check whether (ebx and PHYSMASKn) == PHYSBASEn, ; excluding low 12 bits. and eax, ebx push eax rdmsr test edx, edx pop edx jnz .next xor edx, eax and edx, not 0xFFF jnz .next ; 4. If so, set the bit corresponding to memory type defined by this MTRR. and eax, 7 bts [esp], eax .next: ; 5. Continue loop at 2-4 for all variable-range MTRRs. add ecx, 3 mov eax, [num_variable_mtrrs] lea eax, [0x200+eax*2] cmp ecx, eax jb .mtrr_loop ; 6. If no MTRRs cover address in ebx, use default MTRR type from MTRR_DEF_CAP. pop edx test edx, edx jz .default ; 7. Find&clear 1-bit in edx. bsf eax, edx btr edx, eax ; 8. If there was only one 1-bit, then all MTRRs are consistent, return that bit. test edx, edx jz .nothing ; Otherwise, return MEM_UC (e.g. WB+UC is UC). xor eax, eax .nothing: ret .default: mov ecx, 0x2FF rdmsr movzx eax, al ret endp ; If MTRRs are configured improperly, this is not obvious to the user; ; everything works, but the performance can be horrible. ; Try to detect this and let the user know that the low performance ; is caused by some problem and is not a global property of the system. ; Let's hope he would report it to developers... proc mtrr_validate ; 1. If MTRRs are not supported, they cannot be configured improperly. ; Note: VirtualBox claims MTRR support in cpuid, but emulates MTRRCAP=0, ; which is efficiently equivalent to absent MTRRs. ; So check [num_variable_mtrrs] instead of CAPS_MTRR in [cpu_caps]. cmp [num_variable_mtrrs], 0 jz .exit ; 2. If variable-range MTRRs are not configured, this is a problem. mov ecx, 0x2FF rdmsr test ah, 8 jz .fail ; 3. Get the memory type for address somewhere inside working memory. ; It must be write-back. mov ebx, 0x27FFFF call mtrr_get_real_type cmp al, MEM_WB jnz .fail ; 4. If we're using a mode with LFB, ; get the memory type for last pixel of the framebuffer. ; It must be write-combined. test word [SCR_MODE], 0x4000 jz .exit mov eax, [_display.lfb_pitch] mul [_display.height] dec eax ; LFB is mapped to virtual address LFB_BASE, ; it uses global pages if supported by CPU. mov ebx, [sys_proc + PROC.pdt_0 + (LFB_BASE shr 20)] test ebx, PDE_LARGE jnz @f mov ebx, [page_tabs+(LFB_BASE shr 10)] @@: and ebx, not 0xFFF add ebx, eax call mtrr_get_real_type cmp al, MEM_WC jz .exit ; 5. The check at step 4 fails on Bochs: ; Bochs BIOS configures MTRRs in a strange way not respecting [cpu_phys_addr_width], ; so mtrr_reconfigure avoids to touch anything. ; However, Bochs core ignores MTRRs (keeping them only for rdmsr/wrmsr), ; so we don't care about proper setting for Bochs. ; Use northbridge PCI id to detect Bochs: it emulates either i440fx or i430fx ; depending on configuration file. mov eax, [pcidev_list.fd] cmp eax, pcidev_list ; sanity check: fail if no PCI devices jz .fail cmp [eax + PCIDEV.vendor_device_id], 0x12378086 jz .exit cmp [eax + PCIDEV.vendor_device_id], 0x01228086 jnz .fail .exit: ret .fail: mov ebx, mtrr_user_message mov ebp, notifyapp call fs_execute_from_sysdir_param ret endp