forked from KolibriOS/kolibrios
1415 lines
37 KiB
NASM
1415 lines
37 KiB
NASM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; ;;
|
|
;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;;
|
|
;; Distributed under terms of the GNU General Public License ;;
|
|
;; ;;
|
|
;; GNU GENERAL PUBLIC LICENSE ;;
|
|
;; Version 2, June 1991 ;;
|
|
;; ;;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
format PE DLL native
|
|
entry START
|
|
|
|
API_VERSION = 0 ;debug
|
|
SRV_GETVERSION = 0
|
|
__DEBUG__ = 1
|
|
__DEBUG_LEVEL__ = 1
|
|
DRIVER_VERSION = 1
|
|
DBG_INFO = 1
|
|
NULLPTR = 0
|
|
FALSE = 0
|
|
TRUE = 1
|
|
|
|
section ".flat" code readable writable executable
|
|
include "../proc32.inc"
|
|
include "../struct.inc"
|
|
include "../macros.inc"
|
|
include "../fdo.inc"
|
|
include "../pci.inc"
|
|
include "../peimport.inc"
|
|
include "nvme.inc"
|
|
include "macros.inc"
|
|
include "lib.inc"
|
|
include "command.inc"
|
|
|
|
struct DISKMEDIAINFO
|
|
flags dd ?
|
|
sectorsize dd ?
|
|
capacity dq ?
|
|
ends
|
|
|
|
proc START c, reason:dword, cmdline:dword
|
|
local AnythingLoadedSuccessfully db 0
|
|
|
|
push ebx esi edi
|
|
cmp [reason], DRV_ENTRY
|
|
jne .err
|
|
|
|
.entry:
|
|
DEBUGF DBG_INFO, "Detecting NVMe device...\n"
|
|
call detect_nvme
|
|
test eax, eax
|
|
jz .err
|
|
xor ebx, ebx
|
|
mov esi, dword [p_nvme_devices]
|
|
test esi, esi
|
|
jz .err
|
|
sub esi, sizeof.pcidev
|
|
|
|
.loop:
|
|
add esi, sizeof.pcidev
|
|
push ebx esi
|
|
stdcall device_is_compat, esi
|
|
test eax, eax
|
|
jz .pop
|
|
stdcall nvme_init, esi
|
|
test eax, eax
|
|
jz .pop
|
|
pop esi ebx
|
|
stdcall add_nvme_disk, esi
|
|
jmp .next
|
|
|
|
.pop:
|
|
pop esi ebx
|
|
|
|
.next:
|
|
test eax, eax
|
|
setne [AnythingLoadedSuccessfully]
|
|
inc ebx
|
|
cmp ebx, dword [pcidevs_len]
|
|
jne .loop
|
|
cmp [AnythingLoadedSuccessfully], 0
|
|
jz .err
|
|
invoke RegService, my_service, service_proc
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
.err:
|
|
call nvme_cleanup
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
endp
|
|
|
|
proc service_proc stdcall, ioctl:dword
|
|
|
|
mov esi, [ioctl]
|
|
mov eax, [esi + IOCTL.io_code]
|
|
cmp eax, SRV_GETVERSION
|
|
jne .ret
|
|
|
|
mov eax, [esi + IOCTL.output]
|
|
cmp [esi + IOCTL.out_size], 4
|
|
jne .ret
|
|
mov dword [eax], API_VERSION
|
|
xor eax, eax
|
|
ret
|
|
|
|
.ret:
|
|
or eax, -1
|
|
ret
|
|
|
|
endp
|
|
|
|
; Registers the NVMe disk into KolibriOS. This requires that the
|
|
; device was successfully initialized by nvme_init, otherwise this
|
|
; has will have undefined behavior.
|
|
proc add_nvme_disk stdcall, pci:dword
|
|
|
|
push esi
|
|
mov esi, [pci]
|
|
|
|
; NOTE: If the pcidev.num or pcidev.nsid is more than 9 then
|
|
; this fails to build the string correctly. Ignoring this issue
|
|
; for now since who has more than 9 NVMe SSDs on a desktop computer
|
|
; and a NSID bigger than 9 is also unlikely.
|
|
;
|
|
; Still, will address this problem in the future.
|
|
push 0 ; null terminator
|
|
movzx eax, byte [esi + pcidev.nsid]
|
|
add al, "0"
|
|
mov byte [esp], al
|
|
dec esp
|
|
mov byte [esp], "n"
|
|
dec esp
|
|
movzx eax, byte [esi + pcidev.num]
|
|
add al, "0"
|
|
mov byte [esp], al
|
|
push "nvme"
|
|
mov eax, esp
|
|
invoke DiskAdd, disk_functions, eax, [esi + pcidev.nsinfo], 0
|
|
add esp, 10
|
|
test eax, eax
|
|
jz @f
|
|
invoke DiskMediaChanged, eax, 1
|
|
DEBUGF DBG_INFO, "nvme%un%u: Successfully registered disk\n", [esi + pcidev.num], [esi + pcidev.nsid]
|
|
xor eax, eax
|
|
inc eax
|
|
pop esi
|
|
ret
|
|
|
|
@@:
|
|
DEBUGF DBG_INFO, "nvme%un%u: Failed to register disk\n", [esi + pcidev.num], [esi + pcidev.nsid]
|
|
xor eax, eax
|
|
pop esi
|
|
ret
|
|
|
|
endp
|
|
|
|
proc nvme_query_media stdcall, userdata:dword, info:dword
|
|
|
|
push ebx esi edi
|
|
mov esi, [userdata]
|
|
mov ebx, dword [esi + NSINFO.pci]
|
|
mov edi, [info]
|
|
mov dword [edi + DISKMEDIAINFO.flags], 0
|
|
mov cl, byte [esi + NSINFO.lbads]
|
|
xor eax, eax
|
|
inc eax
|
|
shl eax, cl
|
|
DEBUGF DBG_INFO, "nvme%un%u (Query Media): Sector size = %u\n", [ebx + pcidev.num], [esi + NSINFO.nsid], eax
|
|
mov dword [edi + DISKMEDIAINFO.sectorsize], eax
|
|
mov eax, dword [esi + NSINFO.capacity]
|
|
mov dword [edi + DISKMEDIAINFO.capacity], eax
|
|
mov eax, dword [esi + NSINFO.capacity + 4]
|
|
mov dword [edi + DISKMEDIAINFO.capacity + 4], eax
|
|
DEBUGF DBG_INFO, "nvme%un%u (Query Media): Capacity = %u + %u sectors\n", [ebx + pcidev.num], [esi + NSINFO.nsid], [esi + NSINFO.capacity], [esi + NSINFO.capacity + 4]
|
|
xor eax, eax
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
endp
|
|
|
|
; returns 1 if the given NSID is a an active NSID, returns
|
|
; 0 otherwise
|
|
proc is_active_namespace stdcall, pci:dword, nsid:dword
|
|
|
|
push esi edi
|
|
invoke KernelAlloc, 0x1000
|
|
test eax, eax
|
|
jnz @f
|
|
pop edi esi
|
|
ret
|
|
|
|
@@:
|
|
mov esi, eax
|
|
invoke GetPhysAddr
|
|
stdcall nvme_identify, [pci], [nsid], eax, CNS_IDNS
|
|
test eax, eax
|
|
jz .not_active_nsid
|
|
xor ecx, ecx
|
|
|
|
@@:
|
|
mov eax, dword [esi + ecx * 4]
|
|
test eax, eax
|
|
jnz .is_active_nsid
|
|
inc ecx
|
|
cmp ecx, 0x1000 / 4
|
|
jne @b
|
|
|
|
.not_active_nsid:
|
|
invoke KernelFree, esi
|
|
pop edi esi
|
|
xor eax, eax
|
|
ret
|
|
|
|
.is_active_nsid:
|
|
invoke KernelFree, esi
|
|
pop edi esi
|
|
xor eax, eax
|
|
inc eax
|
|
ret
|
|
|
|
endp
|
|
|
|
; See page 248 of the NVMe 1.4 specification for reference
|
|
; Returns the number of namespaces that are active, note this
|
|
; doesn't mean if EAX = 5, then namespaces 1-5 will be active.
|
|
; This also sets [pci + pcidev.nn] and [pci + pcidev.nsids]
|
|
; to their appropriate values.
|
|
proc determine_active_nsids stdcall, pci:dword
|
|
|
|
push ebx esi
|
|
mov esi, [pci]
|
|
xor ebx, ebx
|
|
xor ecx, ecx
|
|
inc ecx
|
|
|
|
.loop:
|
|
cmp ecx, dword [esi + pcidev.nn]
|
|
ja .ret
|
|
push ecx
|
|
stdcall is_active_namespace, [pci], ecx
|
|
pop ecx
|
|
test eax, eax
|
|
jz .not_active_namespace
|
|
mov ebx, ecx
|
|
jmp .ret
|
|
|
|
.not_active_namespace:
|
|
inc ecx
|
|
jmp .loop
|
|
|
|
.ret:
|
|
pop edi esi
|
|
mov eax, ebx
|
|
ret
|
|
|
|
endp
|
|
|
|
; Allocates prp_list_ptr and creates a PRP list there. nprps should
|
|
; be set appropriately to the number of PRPs the caller wants to create.
|
|
;
|
|
; This function should only be called if the conditions for building
|
|
; a PRP list are met (see page 68 of the NVMe 1.4.0 spec).
|
|
;
|
|
; TODO: Currently the code for building recursive PRP lists is untested.
|
|
; If you want to test it, do a read/write with a sector count equivalant
|
|
; to more than 4MiB. Will test in the future.
|
|
proc build_prp_list stdcall, nprps:dword, buf:dword, prp_list_ptr:dword
|
|
|
|
push esi ebx edi
|
|
sub esp, 4
|
|
|
|
; stack:
|
|
; [esp]: virtual pointer to first PRP list
|
|
; here, we store the pointer to the very first
|
|
; PRP list so that free_prp_list can free the
|
|
; entire PRP list if something goes wrong, it
|
|
; also serves as our return value placeholder
|
|
mov dword [esp], 0
|
|
|
|
xor edi, edi
|
|
xor esi, esi
|
|
mov ecx, [nprps]
|
|
shl ecx, 3 ; multiply by 8 since each PRP pointer is a QWORD
|
|
|
|
; we'll store consecutive PRP list buffers here, for example
|
|
; given 2 PRP lists, we allocate 2 continuous pages
|
|
push ecx
|
|
invoke KernelAlloc, ecx ; store pointers to the PRP entries here
|
|
pop ecx
|
|
test eax, eax
|
|
jz .err
|
|
mov dword [esp], eax
|
|
mov edi, eax
|
|
mov eax, [prp_list_ptr]
|
|
mov dword [eax], edi
|
|
shr ecx, 1
|
|
stdcall memsetdz, edi, ecx
|
|
|
|
; note we assume buf is page-aligned
|
|
mov esi, [buf]
|
|
|
|
.build_prp_list:
|
|
; ensure we don't cross a page boundary
|
|
mov ebx, [nprps]
|
|
cmp ebx, PAGE_SIZE / 8
|
|
jb @f
|
|
mov ebx, PAGE_SIZE / 8
|
|
sub [nprps], ebx
|
|
|
|
@@:
|
|
xor ecx, ecx
|
|
cmp dword [esp], edi
|
|
je .loop
|
|
|
|
; we need to store the pointer of the next
|
|
; PRP list to the previous PRP list last entry
|
|
mov eax, edi
|
|
invoke GetPhysAddr
|
|
mov dword [edi - 8], eax
|
|
mov dword [edi - 4], 0
|
|
|
|
.loop:
|
|
mov eax, esi
|
|
invoke GetPhysAddr
|
|
mov dword [edi + ecx * 8], eax
|
|
mov dword [edi + ecx * 8 - 4], 0
|
|
add esi, PAGE_SIZE
|
|
inc ecx
|
|
cmp ecx, ebx
|
|
jne .loop
|
|
|
|
; check if we we need to build another PRP list
|
|
add edi, PAGE_SIZE
|
|
cmp ebx, PAGE_SIZE / 8
|
|
je .build_prp_list
|
|
|
|
; PRP list successfully created
|
|
mov eax, dword [esp]
|
|
invoke GetPhysAddr
|
|
add esp, 4
|
|
pop edi ebx esi
|
|
ret
|
|
|
|
.err:
|
|
add esp, 4
|
|
pop edi ebx esi
|
|
xor eax, eax
|
|
ret
|
|
|
|
endp
|
|
|
|
; Allocates PRP1/PRP2. Note that it is not required to call this function
|
|
; unless you're doing read and writes with an arbitrary buffer that the
|
|
; kernel passes to driver. In most other cases, it's better to just allocate a
|
|
; page-aligned buffer.
|
|
;
|
|
; ns: Pointer to the device's respective namespace struct
|
|
;
|
|
; prps_ptr: should be a pointer to at least 2 DWORDS (PRP1 and PRP2 respectively),
|
|
; the caller is allowed to not initialize PRP1, however PRP2 should explicitly be
|
|
; initialized to 0.
|
|
;
|
|
; prp_list_ptr: pointer to 1 DWORD, the caller must initialize this value to 0.
|
|
; If a PRP list is allocated, then prp_list_ptr shall contain the pointer to
|
|
; the PRP list. The caller is required to free the allocated memory afterwards.
|
|
;
|
|
; buf: Pointer to the buffer
|
|
;
|
|
; On success, the function will return 1 and the PRPs will be initialized. If an
|
|
; error occurs (most likely due to memory allocation), the function returns 0.
|
|
proc alloc_dptr stdcall, ns:dword, prps_ptr:dword, numsectors:dword, prp_list_ptr:dword, buf:dword
|
|
|
|
push ebx esi edi
|
|
mov esi, [ns]
|
|
mov edi, [prps_ptr]
|
|
mov eax, [buf]
|
|
invoke GetPhysAddr
|
|
mov dword [edi], eax
|
|
mov cl, byte [esi + NSINFO.lbads]
|
|
mov ebx, PAGE_SIZE
|
|
shr ebx, cl
|
|
mov edx, [numsectors]
|
|
|
|
; is the buffer offset portion equal to 0?
|
|
mov eax, [buf]
|
|
mov ecx, eax
|
|
and eax, PAGE_SIZE - 1
|
|
mov eax, ebx
|
|
jnz @f
|
|
|
|
; is the number of sectors less than or equal to one memory page?
|
|
cmp edx, ebx
|
|
jbe .success
|
|
shl ebx, 1 ; it is page aligned, so set ebx to 2 memory pages
|
|
|
|
@@:
|
|
; is the number of sectors greater than one or two memory pages?
|
|
cmp edx, ebx
|
|
ja .build_prp_list
|
|
|
|
; set PRP2
|
|
mov eax, ecx
|
|
and eax, not (PAGE_SIZE - 1)
|
|
add eax, PAGE_SIZE
|
|
invoke GetPhysAddr
|
|
mov dword [edi + 4], eax
|
|
jmp .success
|
|
|
|
.build_prp_list:
|
|
mov ebx, ecx
|
|
mov ecx, eax
|
|
and ebx, not (PAGE_SIZE - 1)
|
|
add ebx, PAGE_SIZE
|
|
mov eax, [numsectors]
|
|
xor edx, edx
|
|
div ecx
|
|
stdcall build_prp_list, eax, ebx, [prp_list_ptr]
|
|
test eax, eax
|
|
jz .err
|
|
mov dword [edi + 4], eax
|
|
|
|
.success:
|
|
xor eax, eax
|
|
inc eax
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
.err:
|
|
xor eax, eax
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
endp
|
|
|
|
nvme_read:
|
|
mov edx, NVM_CMD_READ
|
|
jmp nvme_readwrite
|
|
|
|
nvme_write:
|
|
mov edx, NVM_CMD_WRITE
|
|
|
|
; Reads from/writes to the disk
|
|
proc nvme_readwrite stdcall, ns:dword, buf:dword, start_sector:qword, numsectors_ptr:dword
|
|
|
|
push ebx esi edi
|
|
sub esp, 20
|
|
|
|
; TODO: check if numsectors exceeds IDENTC.MDTS?
|
|
|
|
; stack:
|
|
; [esp] - PRP1
|
|
; [esp + 4] - PRP2
|
|
; [esp + 8] - command type (read or write)
|
|
; [esp + 12] - original numsectors value
|
|
; [esp + 16] - virtual pointer to PRP2 PRP list (if allocated, 0 if not)
|
|
mov ebx, esp
|
|
|
|
mov esi, [ns]
|
|
mov edi, [buf]
|
|
|
|
mov eax, [numsectors_ptr]
|
|
mov eax, dword [eax]
|
|
DEBUGF DBG_INFO, "buf: %x, start_sector: %u:%u, numsectors: %u\n", [buf], [start_sector + 4], [start_sector], eax
|
|
mov dword [ebx + 4], 0 ; PRP2 entry (0 by default)
|
|
mov dword [ebx + 8], edx ; command type (read or write)
|
|
mov dword [ebx + 12], eax ; save original numsectors value
|
|
mov dword [ebx + 16], 0 ; virtual pointer to PRP2 PRP list (not allocated by default)
|
|
|
|
mov ecx, ebx
|
|
add ecx, 16
|
|
|
|
; Note that [esp] will contain the value of PRP1 and [esp + 4] will
|
|
; contain the value of PRP2. If PRP2 is a PRP list, then [esp + 16] will point
|
|
; to the allocated PRP list (after this call, only if it completes successfully)
|
|
stdcall alloc_dptr, esi, ebx, eax, ecx, [buf]
|
|
test eax, eax
|
|
jz .fail
|
|
|
|
DEBUGF DBG_INFO, "PRP1: %x, PRP2: %x\n", [ebx], [ebx + 4]
|
|
mov eax, dword [start_sector]
|
|
|
|
; According to the NVMe specification, the NLB field in the I/O read and write
|
|
; commands is a 0-based value (i.e., 0 is equivalant to 1, 1 is equivalant to 2, ...)
|
|
; As far as I know, KolibriOS doesn't follow this mechanism so let's just decrement the
|
|
; value and it should have the same effect.
|
|
mov ecx, dword [ebx + 12]
|
|
dec ecx
|
|
|
|
; TODO: add non-blocking mechanisms later on
|
|
push eax
|
|
mov eax, dword [esi + NSINFO.pci]
|
|
mov dword [eax + pcidev.spinlock], 1
|
|
pop eax
|
|
stdcall nvme_io_rw, [esi + NSINFO.pci], \
|
|
1, \
|
|
[esi + NSINFO.nsid], \
|
|
dword [ebx], \
|
|
dword [ebx + 4], \
|
|
eax, \
|
|
dword [start_sector + 4], \
|
|
ecx, \
|
|
dword [ebx + 8]
|
|
|
|
; TODO: add non-blocking mechanisms later on
|
|
stdcall nvme_poll, [esi + NSINFO.pci]
|
|
test eax, eax
|
|
jz .fail
|
|
|
|
; free PRP list (if allocated)
|
|
mov eax, dword [ebx + 16]
|
|
test eax, eax
|
|
jz @f
|
|
invoke KernelFree, eax
|
|
|
|
@@:
|
|
xor eax, eax
|
|
add esp, 20
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
.fail:
|
|
; free PRP list (if allocated)
|
|
mov eax, dword [ebx + 16]
|
|
test eax, eax
|
|
jz @f
|
|
invoke KernelFree, eax
|
|
|
|
@@:
|
|
mov ebx, [numsectors_ptr]
|
|
mov dword [ebx], 0
|
|
add esp, 20
|
|
pop edi esi ebx
|
|
or eax, -1 ; generic disk error
|
|
ret
|
|
|
|
endp
|
|
|
|
; Detects NVMe devices on the PCI bus and stores them into
|
|
; [p_nvme_devices] and sets [pcidevs_len] to the appropriate
|
|
; size based off how many NVMe devices there are.
|
|
proc detect_nvme
|
|
|
|
invoke GetPCIList
|
|
mov esi, eax
|
|
mov ebx, eax
|
|
|
|
.check_dev:
|
|
mov eax, dword [esi + PCIDEV.class]
|
|
and eax, 0x00ffff00 ; retrieve class/subclass code only
|
|
cmp eax, 0x00010800 ; Mass Storage Controller - Non-Volatile Memory Controller
|
|
je .found_dev
|
|
|
|
.next_dev:
|
|
mov esi, dword [esi + PCIDEV.fd]
|
|
cmp esi, ebx
|
|
jne .check_dev
|
|
|
|
.exit_success:
|
|
xor eax, eax
|
|
inc eax
|
|
ret
|
|
|
|
.found_dev:
|
|
; skip PCIDEV.owner check if the PCI device pointer has already been
|
|
; allocated (without this check, more than 1 NVMe device cannot be
|
|
; registered)
|
|
mov eax, dword [p_nvme_devices]
|
|
test eax, eax
|
|
jnz @f
|
|
cmp dword [esi + PCIDEV.owner], 0
|
|
jnz .err
|
|
|
|
@@:
|
|
PDEBUGF DBG_INFO, "PCI(%u.%u.%u): Detected NVMe device...\n", [esi + PCIDEV.bus], [esi + PCIDEV.devfn]
|
|
cmp dword [pcidevs_len], TOTAL_PCIDEVS
|
|
jne @f
|
|
DEBUGF DBG_INFO, "Can't add any more NVMe devices...\n"
|
|
jmp .exit_success
|
|
|
|
@@:
|
|
inc dword [pcidevs_len]
|
|
cmp dword [p_nvme_devices], 0
|
|
jnz @f ; was the pointer already allocated?
|
|
invoke KernelAlloc, sizeof.pcidev * TOTAL_PCIDEVS
|
|
test eax, eax
|
|
jz .err
|
|
mov dword [p_nvme_devices], eax
|
|
mov dword [esi + PCIDEV.owner], eax
|
|
DEBUGF DBG_INFO, "nvme: Allocated memory for PCI devices at: 0x%x\n", eax
|
|
|
|
@@:
|
|
mov ecx, dword [pcidevs_len]
|
|
dec ecx
|
|
mov edi, dword [p_nvme_devices]
|
|
mov edx, ecx
|
|
imul edx, sizeof.pcidev
|
|
lea edi, [edi + edx]
|
|
|
|
movzx eax, byte [esi + PCIDEV.bus]
|
|
mov byte [edi + pcidev.bus], al
|
|
movzx eax, byte [esi + PCIDEV.devfn]
|
|
mov byte [edi + pcidev.devfn], al
|
|
mov dword [edi + pcidev.num], ecx
|
|
|
|
jmp .next_dev
|
|
|
|
.err:
|
|
xor eax, eax
|
|
ret
|
|
|
|
endp
|
|
|
|
; Returns 1 if the NVMe device is compatible. 0 otherwise. In practice, the driver
|
|
; is compatible with (hopefully) most compliant controllers. This also does some
|
|
; initialization for some reason, due to bad design decisions made in the beginning
|
|
; but since the code works I haven't felt inclined to change it.
|
|
proc device_is_compat stdcall, pci:dword
|
|
|
|
push esi edx ecx
|
|
mov esi, [pci]
|
|
invoke PciRead8, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.interrupt_line
|
|
mov byte [esi + pcidev.iline], al
|
|
invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.base_addr_0
|
|
and eax, 0xfffffff0
|
|
test eax, eax
|
|
jz .failure
|
|
mov edx, eax
|
|
|
|
invoke MapIoMem, eax, 0x2000, PG_SW+PG_NOCACHE
|
|
test eax, eax
|
|
jz .failure
|
|
mov dword [esi + pcidev.io_addr], eax
|
|
mov eax, dword [eax + NVME_MMIO.CAP + 4]
|
|
and eax, CAP_DSTRD
|
|
mov byte [esi + pcidev.dstrd], al
|
|
mov eax, dword [esi + pcidev.io_addr]
|
|
mov eax, dword [eax + NVME_MMIO.VS]
|
|
DEBUGF DBG_INFO, "nvme%u: Controller version: 0x%x\n", [esi + pcidev.num], eax
|
|
mov dword [esi + pcidev.version], eax
|
|
pop ecx edx esi
|
|
xor eax, eax
|
|
inc eax
|
|
ret
|
|
|
|
.failure:
|
|
PDEBUGF DBG_INFO, "PCI(%u.%u.%u): something went wrong checking NVMe device compatibility\n", byte [esi + pcidev.bus], byte [esi + pcidev.devfn]
|
|
pop ecx edx esi
|
|
xor eax, eax
|
|
ret
|
|
|
|
endp
|
|
|
|
; nvme_init: Initializes the NVMe controller, I/O queues, and namespaces.
|
|
proc nvme_init stdcall, pci:dword
|
|
|
|
push ebx esi edi
|
|
mov esi, dword [pci]
|
|
|
|
; Check the PCI header to see if interrupts are disabled, if so
|
|
; we have to re-enable them
|
|
invoke PciRead16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.command
|
|
and eax, not (1 shl 10)
|
|
; Enable Bus Master bit, memory space access, and I/O space access. QEMU automatically sets the
|
|
; bus master bit, but Virtualbox does not. Not sure about the other bits though, but let's set them
|
|
; to 1 to anyway just to be extra cautious.
|
|
; See: https://git.kolibrios.org/GSoC/kolibrios-nvme-driver/issues/1#issuecomment-467
|
|
or eax, (1 shl 2) or (1 shl 1) or 1
|
|
invoke PciWrite16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.command, eax
|
|
|
|
; Check if the device has a pointer to the capabilities list (status register bit 4 set to 1)
|
|
; though this check is probably unnecessary since all PCIe devices should have this bit set to 1
|
|
invoke PciRead16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.status
|
|
test ax, (1 shl 4)
|
|
jz .exit_fail
|
|
|
|
invoke PciRead8, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.cap_ptr
|
|
and eax, 0xfc ; bottom two bits are reserved, so mask them before we access the configuration space
|
|
mov edi, eax
|
|
DEBUGF DBG_INFO, "nvme%u: Checking capabilities...\n", [esi + pcidev.num]
|
|
|
|
; We need to check if there are any MSI/MSI-X capabilities, and if so, make sure they're disabled since
|
|
; we're using old fashioned pin-based interrupts (for now)
|
|
.read_cap:
|
|
invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi
|
|
add edi, 2
|
|
cmp al, MSICAP_CID
|
|
je .got_msi_cap
|
|
cmp al, MSIXCAP_CID
|
|
je .got_msix_cap
|
|
movzx edi, ah
|
|
test edi, edi
|
|
jnz .read_cap
|
|
DEBUGF DBG_INFO, "nvme%u: MSI/MSI-X capability not found\n", [esi + pcidev.num]
|
|
jmp .end_cap_parse
|
|
|
|
.got_msi_cap:
|
|
DEBUGF DBG_INFO, "nvme%u: Found MSI capability\n", [esi + pcidev.num]
|
|
invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi
|
|
and eax, not MSICAP_MSIE
|
|
invoke PciWrite32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi
|
|
jmp .end_cap_parse
|
|
|
|
.got_msix_cap:
|
|
DEBUGF DBG_INFO, "nvme%u: Found MSI-X capability\n", [esi + pcidev.num]
|
|
invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi
|
|
and eax, not MSIXCAP_MXE
|
|
invoke PciWrite32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi
|
|
|
|
.end_cap_parse:
|
|
mov edi, dword [esi + pcidev.io_addr]
|
|
|
|
; check maximum queue entries supported
|
|
mov eax, dword [edi + NVME_MMIO.CAP]
|
|
DEBUGF DBG_INFO, "nvme%u: Maximum queue entries available is %u (required: %u)\n", [esi + pcidev.num], ax, SQ_ENTRIES
|
|
cmp ax, SQ_ENTRIES
|
|
jb .exit_fail
|
|
|
|
if __DEBUG__
|
|
test eax, CAP_CQR
|
|
setnz al
|
|
DEBUGF DBG_INFO, "nvme%u: Contiguous queues required: %u\n", [esi + pcidev.num], al
|
|
end if
|
|
|
|
; Check if NVM command set is supported
|
|
mov eax, dword [edi + NVME_MMIO.CAP + 4]
|
|
DEBUGF DBG_INFO, "nvme%u: Checking if NVM command set is supported...\n", [esi + pcidev.num]
|
|
test eax, CAP_CSS_NVM_CMDSET
|
|
jz .exit_fail
|
|
DEBUGF DBG_INFO, "nvme%u: OK... NVM command set supported\n", [esi + pcidev.num]
|
|
|
|
stdcall nvme_disable_ctrl, esi
|
|
DEBUGF DBG_INFO, "nvme%u: Checking if memory page size is supported...\n", [esi + pcidev.num]
|
|
mov eax, dword [edi + NVME_MMIO.CAP + 4]
|
|
mov edx, eax
|
|
and edx, CAP_MPSMIN
|
|
shr edx, 16
|
|
cmp edx, NVM_MPS
|
|
ja .exit_fail
|
|
and eax, CAP_MPSMAX
|
|
shr eax, 20
|
|
cmp eax, NVM_MPS
|
|
jb .exit_fail
|
|
DEBUGF DBG_INFO, "nvme%u: OK... memory page size supported\n", [esi + pcidev.num]
|
|
|
|
; Configure IOSQES, IOCQES, AMS, MPS, CSS
|
|
; CSS = 0 (NVM Command Set)
|
|
; AMS = 0 (Round Robin)
|
|
; MPS = 0 (4KiB Pages)
|
|
; IOSQES = 6 (64B)
|
|
; IOCQES = 4 (16B)
|
|
xor eax, eax
|
|
or eax, CC_DEFAULT_IOSQES or CC_DEFAULT_IOCQES
|
|
mov dword [edi + NVME_MMIO.CC], eax
|
|
DEBUGF DBG_INFO, "nvme%u: OK... controller is configured to appropriate settings\n", [esi + pcidev.num]
|
|
|
|
; Configure Admin Queue Attributes
|
|
xor eax, eax
|
|
or eax, NVM_ASQS or (NVM_ACQS shl 16)
|
|
mov dword [edi + NVME_MMIO.AQA], eax
|
|
DEBUGF DBG_INFO, "nvme%u: Admin queue attributes: 0x%x\n", [esi + pcidev.num], eax
|
|
|
|
; Allocate list of queues
|
|
DEBUGF DBG_INFO, "nvme%u: Allocating Administrator and I/O queues...\n",, [esi + pcidev.num]
|
|
invoke KernelAlloc, sizeof.NVM_QUEUE_ENTRY * (LAST_QUEUE_ID + 1)
|
|
test eax, eax
|
|
jz .exit_fail
|
|
mov dword [esi + pcidev.queue_entries], eax
|
|
mov edi, eax
|
|
stdcall memsetdz, eax, sizeof.NVM_QUEUE_ENTRY * (LAST_QUEUE_ID + 1) / 4
|
|
|
|
; Allocate submission/completion queue pointers
|
|
xor ebx, ebx
|
|
|
|
.init_queues:
|
|
invoke KernelAlloc, QUEUE_ALLOC_SIZE
|
|
test eax, eax
|
|
jz .exit_fail
|
|
DEBUGF DBG_INFO, "nvme%u: Allocated queue at offset %u: 0x%x\n", [esi + pcidev.num], ebx, eax
|
|
mov dword [edi + ebx + NVM_QUEUE_ENTRY.cq_ptr], eax
|
|
mov edx, eax
|
|
add eax, CQ_ALLOC_SIZE
|
|
mov dword [edi + ebx + NVM_QUEUE_ENTRY.sq_ptr], eax
|
|
stdcall memsetdz, edx, QUEUE_ALLOC_SIZE / 4
|
|
|
|
; Initialize command entries
|
|
invoke KernelAlloc, sizeof.NVMQCMD * CQ_ENTRIES
|
|
test eax, eax
|
|
jz .exit_fail
|
|
mov dword [edi + ebx + NVM_QUEUE_ENTRY.cmd_ptr], eax
|
|
push ebx esi
|
|
mov esi, eax
|
|
xor ebx, ebx
|
|
|
|
.init_cmd_entries:
|
|
invoke KernelAlloc, sizeof.MUTEX
|
|
test eax, eax
|
|
jz .exit_fail_cleanup
|
|
mov dword [esi + NVMQCMD.mutex_ptr], eax
|
|
mov dword [esi + NVMQCMD.cid], ebx
|
|
mov ecx, eax
|
|
invoke MutexInit
|
|
inc ebx
|
|
add esi, sizeof.NVMQCMD
|
|
cmp ebx, CQ_ENTRIES
|
|
jne .init_cmd_entries
|
|
|
|
pop esi ebx
|
|
add ebx, sizeof.NVM_QUEUE_ENTRY
|
|
cmp ebx, (LAST_QUEUE_ID + 1) * sizeof.NVM_QUEUE_ENTRY
|
|
jne .init_queues
|
|
|
|
; Configure Admin Completion Queue Base Address
|
|
mov esi, [pci]
|
|
mov esi, dword [esi + pcidev.io_addr]
|
|
mov eax, dword [edi + NVM_QUEUE_ENTRY.cq_ptr]
|
|
invoke GetPhysAddr
|
|
mov dword [esi + NVME_MMIO.ACQ], eax
|
|
mov dword [esi + NVME_MMIO.ACQ + 4], 0
|
|
if __DEBUG__
|
|
push esi
|
|
mov esi, [pci]
|
|
DEBUGF DBG_INFO, "nvme%u: Admin completion queue base address: 0x%x\n", [esi + pcidev.num], eax
|
|
pop esi
|
|
end if
|
|
|
|
; Configure Admin Submission Queue Base Address
|
|
mov eax, dword [edi + NVM_QUEUE_ENTRY.sq_ptr]
|
|
invoke GetPhysAddr
|
|
mov dword [esi + NVME_MMIO.ASQ], eax
|
|
mov dword [esi + NVME_MMIO.ASQ + 4], 0
|
|
if __DEBUG__
|
|
push esi
|
|
mov esi, [pci]
|
|
DEBUGF DBG_INFO, "nvme%u: Admin submission queue base address: 0x%x\n", [esi + pcidev.num], eax
|
|
pop esi
|
|
end if
|
|
|
|
; Attach interrupt handler
|
|
mov esi, [pci]
|
|
movzx eax, byte [esi + pcidev.iline]
|
|
DEBUGF DBG_INFO, "nvme%u: Attaching interrupt handler to IRQ %u\n", [esi + pcidev.num], eax
|
|
invoke AttachIntHandler, eax, irq_handler, 0
|
|
test eax, eax
|
|
jz .exit_fail
|
|
DEBUGF DBG_INFO, "nvme%u: Successfully attached interrupt handler\n", [esi + pcidev.num]
|
|
|
|
; Restart the controller
|
|
stdcall nvme_enable_ctrl, esi
|
|
|
|
invoke KernelAlloc, 0x1000
|
|
test eax, eax
|
|
jz .exit_fail
|
|
mov edi, eax
|
|
invoke GetPhysAddr
|
|
; pci:dword, nsid:dword, dptr:dword, cns:byte
|
|
stdcall nvme_identify, [pci], 0, eax, CNS_IDCS
|
|
test eax, eax
|
|
jz .exit_fail
|
|
mov eax, dword [edi + IDENTC.nn]
|
|
mov dword [esi + pcidev.nn], eax
|
|
DEBUGF DBG_INFO, "nvme%u: Namespace Count: %u\n", [esi + pcidev.num], eax
|
|
|
|
; Note that the specification only allows ASCII strings that contain code
|
|
; values between 0x20 (' ') and 0x7E ('~'). Strings are left justified and
|
|
; padded with spaces (at least according to the 1.4.0 spec) which means there
|
|
; is no null terminator anywhere. To prevent garbage or repeated values from
|
|
; being printed to the debug log, I have inserted a 0 byte at the end of each
|
|
; string.
|
|
lea ebx, byte [edi + IDENTC.sn]
|
|
mov byte [ebx + 19], 0
|
|
DEBUGF DBG_INFO, "nvme%u: Serial Number: %s\n", [esi + pcidev.num], ebx
|
|
add ebx, 20
|
|
mov byte [ebx + 39], 0
|
|
DEBUGF DBG_INFO, "nvme%u: Model Number: %s\n", [esi + pcidev.num], ebx
|
|
add ebx, 40
|
|
mov byte [ebx + 7], 0
|
|
DEBUGF DBG_INFO, "nvme%u: Firmware Revision: %s\n", [esi + pcidev.num], ebx
|
|
mov edx, dword [esi + pcidev.version]
|
|
|
|
cmp edx, VS140
|
|
jb @f
|
|
; This is a reserved field in pre-1.4 controllers
|
|
mov al, byte [edi + IDENTC.cntrltype]
|
|
cmp al, CNTRLTYPE_IO_CONTROLLER
|
|
jne .exit_fail
|
|
;DEBUGF DBG_INFO, "nvme%u: I/O controller detected...\n", [esi + pcidev.num]
|
|
|
|
@@:
|
|
; TODO: check IDENTC.AVSCC
|
|
mov al, byte [edi + IDENTC.sqes]
|
|
and al, 11110000b
|
|
DEBUGF DBG_INFO, "nvme%u: IDENTC.SQES = %u\n", [esi + pcidev.num], al
|
|
cmp al, 0x60 ; maximum submission queue size should at least be 64 bytes
|
|
jb .exit_fail
|
|
mov al, byte [edi + IDENTC.cqes]
|
|
and al, 11110000b
|
|
DEBUGF DBG_INFO, "nvme%u: IDENTC.CQES = %u\n", [esi + pcidev.num], al
|
|
and al, 0x40 ; maximum completion queue entry size should at least be 16 bytes
|
|
jb .exit_fail
|
|
invoke KernelFree, edi
|
|
|
|
mov eax, 1 or (1 shl 16) ; CDW11 (set the number of queues we want)
|
|
mov esi, [pci]
|
|
mov dword [esi + pcidev.spinlock], 1
|
|
stdcall set_features, [pci], NULLPTR, FID_NUMBER_OF_QUEUES, eax
|
|
stdcall nvme_poll, esi
|
|
test eax, eax
|
|
jz .exit_fail
|
|
mov esi, dword [esi + pcidev.queue_entries]
|
|
mov esi, dword [esi + NVM_QUEUE_ENTRY.cq_ptr]
|
|
mov eax, dword [esi + sizeof.CQ_ENTRY + CQ_ENTRY.cdw0]
|
|
;DEBUGF DBG_INFO, "nvme%u: Set Features CDW0: 0x%x\n", [esi + pcidev.num], eax
|
|
test ax, ax ; Number of I/O Submission Queues allocated
|
|
jz .exit_fail
|
|
shl eax, 16
|
|
test ax, ax ; Number of I/O Completion Queues allocated
|
|
jnz .exit_fail
|
|
|
|
; Create I/O Queues
|
|
; (TODO: create N queue pairs for N CPU cores, see page 8 of NVMe 1.4 spec for an explaination)
|
|
mov esi, [pci]
|
|
mov edi, esi
|
|
mov esi, dword [esi + pcidev.queue_entries]
|
|
add esi, sizeof.NVM_QUEUE_ENTRY
|
|
mov eax, dword [esi + NVM_QUEUE_ENTRY.cq_ptr]
|
|
invoke GetPhysAddr
|
|
stdcall create_io_completion_queue, [pci], eax, 1, IEN_ON
|
|
test eax, eax
|
|
jz .exit_fail
|
|
;DEBUGF DBG_INFO, "nvme%u: Successfully created I/O completion queue 1\n", [edi + pcidev.num]
|
|
mov eax, dword [esi + NVM_QUEUE_ENTRY.sq_ptr]
|
|
invoke GetPhysAddr
|
|
stdcall create_io_submission_queue, [pci], eax, 1, 1
|
|
jz .exit_fail
|
|
;DEBUGF DBG_INFO, "nvme%u: Successfully created I/O submission queue 1\n", [edi + pcidev.num]
|
|
|
|
; TODO: This only registers a single namespace. Add support for more
|
|
stdcall determine_active_nsids, [pci]
|
|
test eax, eax
|
|
jz .exit_fail ; No active NSIDS
|
|
mov esi, [pci]
|
|
mov dword [esi + pcidev.nsid], eax
|
|
DEBUGF DBG_INFO, "nvme%u: Found active NSID: %u\n", [esi + pcidev.num], eax
|
|
|
|
invoke KernelAlloc, 0x1000
|
|
test eax, eax
|
|
jz .exit_fail
|
|
mov edi, eax
|
|
invoke GetPhysAddr
|
|
stdcall nvme_identify, [pci], [esi + pcidev.nsid], eax, CNS_IDNS
|
|
test eax, eax
|
|
jz .exit_fail
|
|
invoke KernelAlloc, sizeof.NSINFO
|
|
test eax, eax
|
|
jz .exit_fail
|
|
mov ebx, eax
|
|
mov dword [esi + pcidev.nsinfo], eax
|
|
mov al, byte [edi + IDENTN.nsfeat]
|
|
mov byte [ebx + NSINFO.features], al
|
|
;DEBUGF DBG_INFO, "nvme%un%u: Namespace Features: 0x%x\n", [esi + pcidev.num], [esi + pcidev.nsid], al
|
|
mov eax, dword [esi + pcidev.nsid]
|
|
mov dword [ebx + NSINFO.nsid], eax
|
|
mov dword [ebx + NSINFO.pci], esi
|
|
mov eax, dword [edi + IDENTN.nsze]
|
|
mov dword [ebx + NSINFO.size], eax
|
|
mov eax, dword [edi + IDENTN.nsze + 4]
|
|
mov dword [ebx + NSINFO.size + 4], eax
|
|
mov eax, dword [edi + IDENTN.ncap]
|
|
mov dword [ebx + NSINFO.capacity], eax
|
|
mov eax, dword [edi + IDENTN.ncap + 4]
|
|
mov dword [ebx + NSINFO.capacity + 4], eax
|
|
;DEBUGF DBG_INFO, "nvme%un%u: Namespace Size: %u + %u logical blocks\n", [esi + pcidev.num], [esi + pcidev.nsid], [edi + IDENTN.nsze], [edi + IDENTN.nsze + 4]
|
|
;DEBUGF DBG_INFO, "nvme%un%u: Namespace Capacity: %u + %u logical blocks\n", [esi + pcidev.num], [esi + pcidev.nsid], [edi + IDENTN.ncap], [edi + IDENTN.ncap + 4]
|
|
mov eax, dword [edi + IDENTN.lbaf0]
|
|
shr eax, 16 ; Get LBADS
|
|
|
|
; KolibriOS only supports a LBADS of 512, so if it's a higher value then we
|
|
; have to ignore this namespace
|
|
cmp al, SUPPORTED_LBADS
|
|
jne .exit_fail
|
|
|
|
mov byte [ebx + NSINFO.lbads], al
|
|
invoke KernelFree, edi
|
|
if 0
|
|
invoke KernelAlloc, 0x6000
|
|
test eax, eax
|
|
jz .exit_fail
|
|
mov edi, eax
|
|
invoke KernelAlloc, 0x8
|
|
test eax, eax
|
|
jz .exit_fail
|
|
mov edx, NVM_CMD_READ
|
|
mov dword [eax], 6
|
|
add edi, 0x5
|
|
mov dword [esi + pcidev.spinlock], 1
|
|
stdcall nvme_readwrite, [esi + pcidev.nsinfo], edi, 0x0, 0, eax
|
|
stdcall nvme_poll, esi
|
|
test eax, eax
|
|
jz .exit_fail
|
|
DEBUGF DBG_INFO, "STRING: %s\n", edi
|
|
add edi, 0x2000
|
|
DEBUGF DBG_INFO, "STRING: %s\n", edi
|
|
end if
|
|
DEBUGF DBG_INFO, "nvme%u: Successfully initialized driver\n", [esi + pcidev.num]
|
|
xor eax, eax
|
|
inc eax
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
.exit_fail_cleanup:
|
|
add esp, 8
|
|
|
|
.exit_fail:
|
|
mov esi, [pci]
|
|
DEBUGF DBG_INFO, "nvme%u: Failed to initialize controller\n", [esi + pcidev.num]
|
|
mov edi, dword [esi + pcidev.io_addr]
|
|
mov eax, dword [edi + NVME_MMIO.CSTS]
|
|
test eax, CSTS_CFS
|
|
jz @f
|
|
DEBUGF DBG_INFO, "nvme%u: A fatal controller error has occurred\n", [esi + pcidev.num]
|
|
|
|
@@:
|
|
xor eax, eax
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
endp
|
|
|
|
; Returns a new CID for queue #y
|
|
proc get_new_cid stdcall, pci:dword, y:dword
|
|
|
|
mov eax, [pci]
|
|
mov eax, dword [eax + pcidev.queue_entries]
|
|
mov ecx, [y]
|
|
shl ecx, SIZEOF_NVM_QUEUE_ENTRY
|
|
movzx eax, word [eax + ecx + NVM_QUEUE_ENTRY.head]
|
|
;DEBUGF DBG_INFO, "get_new_cid: %u\n", eax
|
|
ret
|
|
|
|
endp
|
|
|
|
proc nvme_disable_ctrl stdcall, pci:dword
|
|
|
|
; TODO: Add timeout of CAP.TO seconds
|
|
push esi edi
|
|
mov esi, [pci]
|
|
DEBUGF DBG_INFO, "nvme%u: Disabling Controller...\n", [esi + pcidev.num]
|
|
mov edi, dword [esi + pcidev.io_addr]
|
|
and dword [edi + NVME_MMIO.CC], 0xfffffffe ; CC.EN = 0
|
|
|
|
; Wait for controller to be brought to idle state, CSTS.RDY should be cleared to 0 when this happens
|
|
.wait:
|
|
test dword [edi + NVME_MMIO.CSTS], CSTS_RDY
|
|
jnz .wait
|
|
DEBUGF DBG_INFO, "nvme%u: Successfully disabled controller\n", [esi + pcidev.num]
|
|
pop edi esi
|
|
ret
|
|
|
|
endp
|
|
|
|
proc nvme_enable_ctrl stdcall, pci:dword
|
|
|
|
; TODO: Add timeout of CAP.TO seconds
|
|
push esi edi
|
|
mov esi, [pci]
|
|
DEBUGF DBG_INFO, "nvme%u: Enabling Controller...\n", [esi + pcidev.num]
|
|
mov edi, dword [esi + pcidev.io_addr]
|
|
or dword [edi + NVME_MMIO.CC], 1 ; CC.EN = 1
|
|
|
|
; Wait for controller to be brought into active state, CSTS.RDY should be set to 1 when this happens
|
|
.wait:
|
|
test dword [edi + NVME_MMIO.CSTS], CSTS_RDY
|
|
jz .wait
|
|
DEBUGF DBG_INFO, "nvme%u: Successfully enabled controller\n", [esi + pcidev.num]
|
|
pop edi esi
|
|
ret
|
|
|
|
endp
|
|
|
|
; Polls until the device's spinlock is unlocked. Unless
|
|
; the "bad timeout" is reached. The lock should be unlocked
|
|
; by the interrupt handler when all the commands have been
|
|
; completed.
|
|
proc nvme_poll stdcall, pci:dword
|
|
|
|
push esi
|
|
mov esi, [pci]
|
|
xor ecx, ecx
|
|
|
|
@@:
|
|
inc ecx
|
|
cmp ecx, 0x10000000
|
|
je @f
|
|
xor eax, eax
|
|
inc eax
|
|
xchg eax, dword [esi + pcidev.spinlock]
|
|
test eax, eax
|
|
jnz @b
|
|
|
|
; lock was released, return 1
|
|
pop esi
|
|
xor eax, eax
|
|
inc eax
|
|
ret
|
|
|
|
@@:
|
|
; timeout: lock wasn't released, return 0
|
|
pop esi
|
|
xor eax, eax
|
|
ret
|
|
|
|
endp
|
|
|
|
|
|
; Writes to completion queue 'y' head doorbell. 'cqh' should
|
|
; be the new head value that will be stored in the register.
|
|
proc cqyhdbl_write stdcall, pci:dword, y:dword, cqh:dword
|
|
|
|
push esi edi
|
|
mov esi, [pci]
|
|
|
|
; 1000h + ((2y + 1) * (4 << CAP.DSTRD))
|
|
mov eax, [y]
|
|
shl al, 1
|
|
inc al
|
|
mov edx, 4
|
|
mov cl, byte [esi + pcidev.dstrd]
|
|
shl dx, cl
|
|
imul dx, ax
|
|
add dx, 0x1000
|
|
mov ecx, [y]
|
|
shl ecx, SIZEOF_NVM_QUEUE_ENTRY
|
|
mov edi, dword [esi + pcidev.queue_entries]
|
|
lea edi, dword [edi + ecx]
|
|
mov eax, [cqh]
|
|
DEBUGF DBG_INFO, "nvme%u: Writing to CQ%u doorbell register 0x%x: %u\n", [esi + pcidev.num], [y], dx, ax
|
|
mov esi, dword [esi + pcidev.io_addr]
|
|
mov word [esi + edx], ax ; Write to CQyHDBL
|
|
mov word [edi + NVM_QUEUE_ENTRY.head], ax
|
|
|
|
; Unlock the mutex now that the command is complete
|
|
;mov edi, dword [edi + NVM_QUEUE_ENTRY.cmd_ptr]
|
|
;mov ecx, [cqh]
|
|
;shl ecx, SIZEOF_NVMQCMD
|
|
;add edi, ecx
|
|
;mov ecx, dword [edi + NVMQCMD.mutex_ptr]
|
|
;invoke MutexUnlock
|
|
|
|
pop edi esi
|
|
ret
|
|
|
|
endp
|
|
|
|
; Writes to submission queue 'y' tail doorbell. 'cmd' should
|
|
; be a pointer to the submission queue struct.
|
|
proc sqytdbl_write stdcall, pci:dword, y:word, cmd:dword
|
|
|
|
push ebx esi edi
|
|
mov edi, [pci]
|
|
mov edi, dword [edi + pcidev.queue_entries]
|
|
movzx ebx, [y]
|
|
shl ebx, SIZEOF_NVM_QUEUE_ENTRY
|
|
lea edi, [edi + ebx]
|
|
;mov eax, dword [edi + NVM_QUEUE_ENTRY.cmd_ptr]
|
|
mov edx, dword [edi + NVM_QUEUE_ENTRY.sq_ptr]
|
|
mov esi, [cmd]
|
|
mov ecx, dword [esi + SQ_ENTRY.cdw0]
|
|
shr ecx, 16 ; Get CID
|
|
mov ebx, ecx
|
|
shl ebx, SIZEOF_NVM_QUEUE_ENTRY
|
|
add ebx, eax
|
|
shl ecx, SIZEOF_SQ_ENTRY
|
|
lea edx, [edx + ecx]
|
|
stdcall memcpyd, edx, esi, sizeof.SQ_ENTRY / 4
|
|
;mov ecx, dword [ebx + NVMQCMD.mutex_ptr]
|
|
;invoke MutexLock
|
|
|
|
mov esi, [pci]
|
|
mov ax, word [edi + NVM_QUEUE_ENTRY.tail]
|
|
inc ax
|
|
cmp ax, NVM_ASQS
|
|
jbe @f
|
|
xor ax, ax
|
|
|
|
@@:
|
|
; 1000h + (2y * (4 << CAP.DSTRD))
|
|
movzx ebx, [y]
|
|
shl ebx, 1
|
|
mov edx, 4
|
|
mov cl, byte [esi + pcidev.dstrd]
|
|
shl edx, cl
|
|
imul edx, ebx
|
|
add edx, 0x1000
|
|
DEBUGF DBG_INFO, "nvme%u: Writing to SQ%u doorbell register 0x%x: %u\n", [esi + pcidev.num], [y], dx, ax
|
|
mov word [edi + NVM_QUEUE_ENTRY.tail], ax
|
|
mov esi, dword [esi + pcidev.io_addr]
|
|
mov word [esi + edx], ax
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
endp
|
|
|
|
proc is_queue_full stdcall, tail:word, head:word
|
|
|
|
push bx
|
|
mov ax, [tail]
|
|
mov bx, [head]
|
|
cmp ax, bx
|
|
je .not_full
|
|
test bx, bx
|
|
jnz @f
|
|
cmp ax, NVM_ASQS
|
|
jne @f
|
|
pop bx
|
|
xor eax, eax
|
|
inc eax
|
|
ret
|
|
|
|
@@:
|
|
cmp ax, bx
|
|
jae .not_full
|
|
sub ax, bx
|
|
cmp ax, 1
|
|
jne .not_full
|
|
pop bx
|
|
xor eax, eax
|
|
inc eax
|
|
ret
|
|
|
|
.not_full:
|
|
pop bx
|
|
xor eax, eax
|
|
ret
|
|
|
|
endp
|
|
|
|
; Notifies the controller that all the commands of the respective queue
|
|
; have been acknowledged as completed (if any).
|
|
proc consume_cq_entries stdcall, pci:dword, queue:dword
|
|
|
|
push esi edi
|
|
mov esi, [pci]
|
|
mov ecx, [queue]
|
|
shl ecx, SIZEOF_NVM_QUEUE_ENTRY
|
|
mov esi, dword [esi + pcidev.queue_entries]
|
|
lea esi, [esi + ecx]
|
|
movzx ecx, word [esi + NVM_QUEUE_ENTRY.head]
|
|
cmp cx, word [esi + NVM_QUEUE_ENTRY.tail]
|
|
je .end
|
|
inc ecx
|
|
cmp ecx, NVM_ACQS
|
|
jbe @f
|
|
xor ecx, ecx
|
|
mov word [esi + NVM_QUEUE_ENTRY.head], cx
|
|
|
|
@@:
|
|
stdcall cqyhdbl_write, [pci], [queue], ecx
|
|
|
|
.end:
|
|
pop edi esi
|
|
xor eax, eax
|
|
ret
|
|
|
|
endp
|
|
|
|
; Our interrupt handler. Once the controller finishes a command,
|
|
; it should generate an interrupt (assuming that no fatal error
|
|
; occurred). If an interrupt isn't being generated when it is expected
|
|
; to, check the CSTS register to make sure that the error bit isn't being
|
|
; set. The controller doesn't generate any interrupts in such cases.
|
|
;
|
|
; Once a command has complete (successfully or not), the controller will
|
|
; add a new completion queue entry and it is the interrupt handler's
|
|
; responsibility to write to the appropriate completion queue's head doorbell
|
|
; register and update it correctly, otherwise the controller will continue
|
|
; to generate interrupts (the most common causes for freezes with the driver,
|
|
; in my experience).
|
|
proc irq_handler
|
|
|
|
push ebx esi edi
|
|
mov edi, dword [p_nvme_devices]
|
|
mov esi, edi
|
|
sub esi, sizeof.pcidev
|
|
mov ebx, dword [pcidevs_len]
|
|
xor ecx, ecx
|
|
|
|
.check_who_raised_irq:
|
|
add esi, sizeof.pcidev
|
|
inc ecx
|
|
cmp ecx, ebx
|
|
; TODO: Apply solution given by @punk_joker of checking which device
|
|
; generated an interrupt.
|
|
ja .not_our_irq
|
|
mov edi, dword [esi + pcidev.io_addr]
|
|
mov dword [edi + NVME_MMIO.INTMS], 0x3
|
|
stdcall consume_cq_entries, esi, ADMIN_QUEUE
|
|
stdcall consume_cq_entries, esi, 1
|
|
|
|
; Interrupt handled by driver, return 1
|
|
mov dword [edi + NVME_MMIO.INTMC], 0x3
|
|
xor eax, eax
|
|
xchg eax, dword [esi + pcidev.spinlock] ; unlock spinlock
|
|
pop edi esi ebx
|
|
mov eax, 1
|
|
ret
|
|
|
|
.not_our_irq:
|
|
; Interrupt not handled by driver, return 0
|
|
pop edi esi ebx
|
|
xor eax, eax
|
|
ret
|
|
|
|
endp
|
|
|
|
; Deletes the allocated I/O queues for all of the NVMe devices,
|
|
; and shuts down all of the controllers. See page 295-297 of
|
|
; the NVMe 1.4.0 spec for details on how shutdown processing
|
|
; should occur.
|
|
;
|
|
; Currently shutdown still has problems on VMWare.
|
|
; See: https://git.kolibrios.org/GSoC/kolibrios-nvme-driver/issues/5
|
|
proc nvme_cleanup
|
|
|
|
DEBUGF DBG_INFO, "nvme: Cleaning up...\n"
|
|
push ebx esi edi
|
|
mov esi, dword [p_nvme_devices]
|
|
test esi, esi
|
|
jnz @f
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
@@:
|
|
sub esi, sizeof.pcidev
|
|
xor ebx, ebx
|
|
|
|
.get_pcidev:
|
|
add esi, sizeof.pcidev
|
|
|
|
; Free the queues
|
|
mov edi, dword [esi + pcidev.queue_entries]
|
|
test edi, edi
|
|
jz .ret
|
|
sub edi, sizeof.NVM_QUEUE_ENTRY
|
|
push ebx
|
|
xor ebx, ebx
|
|
|
|
.get_queue:
|
|
add edi, sizeof.NVM_QUEUE_ENTRY
|
|
|
|
; TODO: Check if I/O completion and submission queue exist
|
|
; before deleting?
|
|
test ebx, ebx
|
|
jz @f ; we don't want to delete the admin queue
|
|
stdcall delete_io_submission_queue, esi, ebx
|
|
stdcall delete_io_completion_queue, esi, ebx
|
|
|
|
@@:
|
|
inc ebx
|
|
cmp ebx, LAST_QUEUE_ID
|
|
jbe .get_queue
|
|
pop ebx
|
|
inc ebx
|
|
cmp ebx, dword [pcidevs_len]
|
|
jne .get_pcidev
|
|
|
|
; NOTE: This code has a bug! It only shuts down the last
|
|
; controller, not all of them. Move this inside the loop
|
|
; and check if the device is actually valid.
|
|
; Shutdown the controller
|
|
mov edi, dword [esi + pcidev.io_addr]
|
|
mov eax, dword [edi + NVME_MMIO.CC]
|
|
and eax, not CC_SHN
|
|
or eax, CC_SHN_NORMAL_SHUTDOWN
|
|
mov dword [edi + NVME_MMIO.CC], eax
|
|
stdcall nvme_disable_ctrl, esi
|
|
|
|
; Wait for shutdown processing to complete
|
|
@@:
|
|
test byte [edi + NVME_MMIO.CSTS], CSTS_SHST_SHUTDOWN_COMPLETE
|
|
jnz @b
|
|
|
|
.ret:
|
|
pop edi esi ebx
|
|
ret
|
|
|
|
endp
|
|
|
|
;all initialized data place here
|
|
align 4
|
|
p_nvme_devices dd 0
|
|
pcidevs_len dd 0
|
|
my_service db "nvme",0 ;max 16 chars include zero
|
|
disk_functions:
|
|
dd disk_functions.end - disk_functions
|
|
dd 0 ; no close function
|
|
dd 0 ; no closemedia function
|
|
dd nvme_query_media
|
|
dd nvme_read
|
|
dd nvme_write
|
|
dd 0 ; no flush function
|
|
dd 0 ; use default cache size
|
|
.end:
|
|
if __DEBUG__
|
|
include_debug_strings
|
|
end if
|
|
|
|
align 4
|
|
data fixups
|
|
end data
|
|
|
|
; vim: syntax=fasm
|