diff --git a/drivers/nvme/command.inc b/drivers/nvme/command.inc new file mode 100644 index 0000000000..48f93b1bc0 --- /dev/null +++ b/drivers/nvme/command.inc @@ -0,0 +1,268 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;; GNU GENERAL PUBLIC LICENSE ;; +;; Version 2, June 1991 ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +proc set_cdw0 stdcall, pci:dword, y:dword, opcode:byte + + stdcall get_new_cid, [pci], [y] + shl eax, 16 + or al, [opcode] + ret + +endp + +; See pages 161-205 of the NVMe 1.4 specification for reference +proc nvme_identify stdcall, pci:dword, nsid:dword, prp1:dword, cns:byte + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + + mov eax, [nsid] + mov dword [esp + SQ_ENTRY.nsid], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_IDENTIFY + mov dword [esp + SQ_ENTRY.cdw0], eax + mov al, [cns] + mov byte [esp + SQ_ENTRY.cdw10], al + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 101 of the NVMe 1.4 specification for reference +proc create_io_completion_queue stdcall, pci:dword, prp1:dword, qid:dword, ien:byte + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_CRE_IO_COMPLETION_QUEUE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + mov eax, CQ_ENTRIES shl 16 ; CDW10.QSIZE + or eax, [qid] ; CDW10.QID + mov dword [esp + SQ_ENTRY.cdw10], eax + movzx eax, [ien] ; CDW11.IEN + or eax, 0x1 ; CDW11.PC + ; Don't set CDW11.IV since we're not using MSI-X or MSI vector + mov dword [esp + SQ_ENTRY.cdw11], eax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 103-104 of the NVMe 1.4 specification for reference +proc create_io_submission_queue stdcall, pci:dword, prp1:dword, qid:dword, cqid:word + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_CRE_IO_SUBMISSION_QUEUE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + mov eax, SQ_ENTRIES shl 16 ; CDW10.QSIZE + or eax, [qid] + mov dword [esp + SQ_ENTRY.cdw10], eax + movzx eax, [cqid] + shl eax, 16 ; CDW11.CQID + or eax, 0x1 ; CDW11.PC (always set this to 1 as some devices may not support non-contiguous pages) + ; TODO: Set CDW10.QPRIO + mov dword [esp + SQ_ENTRY.cdw11], eax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 95-96 of the NVMe 1.4 specification for reference +proc abort stdcall, pci:dword, cid:word, sqid:word + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_ABORT + mov dword [esp + SQ_ENTRY.cdw0], eax + movzx eax, [cid] + shl eax, 16 + or eax, word [sqid] + mov dword [esp + SQ_ENTRY.cdw10], eax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + + +; See page 205 of the NVMe 1.4 specification for reference +proc set_features stdcall, pci:dword, prp1:dword, fid:byte, cdw11:dword + + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_SET_FEATURES + mov dword [esp + SQ_ENTRY.cdw0], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + movzx eax, [fid] + mov dword [esp + SQ_ENTRY.cdw10], eax + mov eax, [cdw11] + mov dword [esp + SQ_ENTRY.cdw11], eax + stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + ret + +endp + +; See page 105 of the NVMe 1.4 specification for reference +proc delete_io_completion_queue stdcall, pci:dword, qid:word + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_DEL_IO_COMPLETION_QUEUE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov ax, [qid] + mov word [esp + SQ_ENTRY.cdw10], ax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 114-116 of the NVMe 1.4 specification for reference +proc get_features stdcall, pci:dword, prp1:dword, sel:byte, fid:byte + + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_GET_FEATURES + mov dword [esp + SQ_ENTRY.cdw0], eax + movzx eax, [sel] + and eax, 111b + shl eax, 8 ; CDW10.SEL + or eax, byte [fid] ; CDW10.FID + mov dword [esp + SQ_ENTRY.cdw10], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + ; TODO: Implement CDW14.UUID? + stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + ret + +endp + +; See page 105-106 of the NVMe 1.4 specification for reference +proc delete_io_submission_queue stdcall, pci:dword, qid:word + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_DEL_IO_SUBMISSION_QUEUE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov ax, [qid] + mov word [esp + SQ_ENTRY.cdw10], ax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 117-118 of the NVMe 1.4 specification for reference +; INCOMPLETE +proc get_log_page stdcall, pci:dword, prp1:dword, lid:byte + + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_GET_LOG_PAGE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + add esp, sizeof.SQ_ENTRY + ret + +endp + +; See pages 348-349 of the NVMe 1.4 specification for information on creating namespaces +proc create_namespace stdcall, pci:dword, cid:word + + push esi + invoke AllocPage + test eax, eax + jz .fail + invoke GetPhysAddr + stdcall nvme_identify, [pci], 0xffffffff, eax, CNS_IDNS + test eax, eax + jz .fail + +.fail: + pop esi + ret + +endp + +; See page 258-261 (read) and 269-271 (write) of the NVMe 1.4 specification for reference +proc nvme_io_rw stdcall, pci:dword, qid:word, nsid:dword, prps:qword, slba:qword, nlb:dword, opcode:dword + + ; TODO: Use IDENTC.NOIOB to construct read/write commands that don't + ; cross the I/O boundary to achieve optimal performance + ; + ; TODO: Read AWUN/NAWUN + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + movzx ecx, [qid] + stdcall set_cdw0, [pci], ecx, [opcode] + mov dword [esp + SQ_ENTRY.cdw0], eax ; CDW0 + mov eax, dword [prps] + mov dword [esp + SQ_ENTRY.prp1], eax + mov eax, dword [prps + 4] + mov dword [esp + SQ_ENTRY.prp2], eax + mov eax, [nsid] + mov dword [esp + SQ_ENTRY.nsid], eax + mov eax, dword [slba] ; slba_lo + mov dword [esp + SQ_ENTRY.cdw10], eax + mov eax, dword [slba + 4] ; slba_hi + mov dword [esp + SQ_ENTRY.cdw11], eax + mov eax, [nlb] + mov word [esp + SQ_ENTRY.cdw12], ax + movzx ecx, [qid] + stdcall sqytdbl_write, [pci], ecx, esp + add esp, sizeof.SQ_ENTRY + ret + +endp + +; vim: syntax=fasm diff --git a/drivers/nvme/lib.inc b/drivers/nvme/lib.inc new file mode 100644 index 0000000000..233a33492e --- /dev/null +++ b/drivers/nvme/lib.inc @@ -0,0 +1,35 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;; GNU GENERAL PUBLIC LICENSE ;; +;; Version 2, June 1991 ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +proc memsetdz stdcall, dest:dword, sz:dword + + push edi + mov edi, [dest] + mov ecx, [sz] + xor eax, eax + rep stosd + pop edi + ret + +endp + +proc memcpyd stdcall, dest:dword, src:dword, sz:dword + + push esi edi + mov esi, [src] + mov edi, [dest] + mov ecx, [sz] + rep movsd + pop edi esi + ret + +endp + +; vim: syntax=fasm diff --git a/drivers/nvme/nvme.asm b/drivers/nvme/nvme.asm new file mode 100644 index 0000000000..563774e58e --- /dev/null +++ b/drivers/nvme/nvme.asm @@ -0,0 +1,1435 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;; GNU GENERAL PUBLIC LICENSE ;; +;; Version 2, June 1991 ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +format PE DLL native 0.05 +entry START + +API_VERSION = 0 ;debug +SRV_GETVERSION = 0 +__DEBUG__ = 1 +__DEBUG_LEVEL__ = 1 +DRIVER_VERSION = 1 +DBG_INFO = 1 +NULLPTR = 0 +FALSE = 0 +TRUE = 1 + +section ".flat" code readable writable executable +include "../proc32.inc" +include "../struct.inc" +include "../macros.inc" +include "../fdo.inc" +include "../pci.inc" +include "../peimport.inc" +include "nvme.inc" +include "lib.inc" +include "command.inc" + +struct DISKMEDIAINFO + flags dd ? + sectorsize dd ? + capacity dq ? +ends + +proc START c, reason:dword, cmdline:dword +local AnythingLoadedSuccessfully db 0 + + push ebx esi edi + cmp [reason], DRV_ENTRY + jne .err + +.entry: + DEBUGF DBG_INFO, "Detecting NVMe device...\n" + call detect_nvme + test eax, eax + jz .err + xor ebx, ebx + mov esi, dword [p_nvme_devices] + test esi, esi + jz .err + sub esi, sizeof.pcidev + +.loop: + add esi, sizeof.pcidev + push ebx esi + stdcall device_is_compat, esi + test eax, eax + jz .pop + stdcall nvme_init, esi + test eax, eax + jz .pop + pop esi ebx + stdcall add_nvme_disk, esi + jmp .next + +.pop: + pop esi ebx + +.next: + test eax, eax + setne [AnythingLoadedSuccessfully] + inc ebx + cmp ebx, dword [num_pcidevs] + jne .loop + cmp [AnythingLoadedSuccessfully], 0 + jz .err + invoke RegService, my_service, service_proc + pop edi esi ebx + ret + +.err: + call nvme_cleanup + pop edi esi ebx + ret + +endp + +proc service_proc stdcall, ioctl:dword + + mov esi, [ioctl] + mov eax, [esi + IOCTL.io_code] + cmp eax, SRV_GETVERSION + jne .ret + + mov eax, [esi + IOCTL.output] + cmp [esi + IOCTL.out_size], 4 + jne .ret + mov dword [eax], API_VERSION + xor eax, eax + ret + +.ret: + or eax, -1 + ret + +endp + +; Registers the NVMe disk into KolibriOS. This requires that the +; device was successfully initialized by nvme_init, otherwise this +; has will have undefined behavior. +proc add_nvme_disk stdcall, pci:dword + + push esi + mov esi, [pci] + + ; NOTE: If the pcidev.num or pcidev.nsid is more than 9 then + ; this fails to build the string correctly. Ignoring this issue + ; for now since who has more than 9 NVMe SSDs on a desktop computer + ; and a NSID bigger than 9 is also unlikely. + ; + ; Still, will address this problem in the future. + push 0 ; null terminator + movzx eax, byte [esi + pcidev.nsid] + add al, "0" + mov byte [esp], al + dec esp + mov byte [esp], "n" + dec esp + movzx eax, byte [esi + pcidev.num] + add al, "0" + mov byte [esp], al + push "nvme" + mov eax, esp + invoke DiskAdd, disk_functions, eax, [esi + pcidev.nsinfo], 0 + add esp, 10 + test eax, eax + jz @f + invoke DiskMediaChanged, eax, 1 + DEBUGF DBG_INFO, "nvme%un%u: Successfully registered disk\n", [esi + pcidev.num], [esi + pcidev.nsid] + xor eax, eax + inc eax + pop esi + ret + +@@: + DEBUGF DBG_INFO, "nvme%un%u: Failed to register disk\n", [esi + pcidev.num], [esi + pcidev.nsid] + xor eax, eax + pop esi + ret + +endp + +proc nvme_query_media stdcall, userdata:dword, info:dword + + push ebx esi edi + mov esi, [userdata] + mov ebx, dword [esi + NSINFO.pci] + mov edi, [info] + mov dword [edi + DISKMEDIAINFO.flags], 0 + mov cl, byte [esi + NSINFO.lbads] + xor eax, eax + inc eax + shl eax, cl + DEBUGF DBG_INFO, "nvme%un%u (Query Media): Sector size = %u\n", [ebx + pcidev.num], [esi + NSINFO.nsid], eax + mov dword [edi + DISKMEDIAINFO.sectorsize], eax + mov eax, dword [esi + NSINFO.capacity] + mov dword [edi + DISKMEDIAINFO.capacity], eax + mov eax, dword [esi + NSINFO.capacity + 4] + mov dword [edi + DISKMEDIAINFO.capacity + 4], eax + DEBUGF DBG_INFO, "nvme%un%u (Query Media): Capacity = %u + %u sectors\n", [ebx + pcidev.num], [esi + NSINFO.nsid], [esi + NSINFO.capacity], [esi + NSINFO.capacity + 4] + xor eax, eax + pop edi esi ebx + ret + +endp + +; returns 1 if the given NSID is a an active NSID, returns +; 0 otherwise +proc is_active_namespace stdcall, pci:dword, nsid:dword + + push esi edi + invoke KernelAlloc, 0x1000 + test eax, eax + jnz @f + pop edi esi + ret + +@@: + mov esi, eax + invoke GetPhysAddr + stdcall nvme_identify, [pci], [nsid], eax, CNS_IDNS + test eax, eax + jz .not_active_nsid + xor ecx, ecx + +@@: + mov eax, dword [esi + ecx * 4] + test eax, eax + jnz .is_active_nsid + inc ecx + cmp ecx, 0x1000 / 4 + jne @b + +.not_active_nsid: + invoke KernelFree, esi + pop edi esi + xor eax, eax + ret + +.is_active_nsid: + invoke KernelFree, esi + pop edi esi + xor eax, eax + inc eax + ret + +endp + +; See page 248 of the NVMe 1.4 specification for reference +; Returns the number of namespaces that are active, note this +; doesn't mean if EAX = 5, then namespaces 1-5 will be active. +; This also sets [pci + pcidev.nn] and [pci + pcidev.nsids] +; to their appropriate values. +proc determine_active_nsids stdcall, pci:dword + + push ebx esi + mov esi, [pci] + xor ebx, ebx + xor ecx, ecx + inc ecx + +.loop: + cmp ecx, dword [esi + pcidev.nn] + ja .ret + push ecx + stdcall is_active_namespace, [pci], ecx + pop ecx + test eax, eax + jz .not_active_namespace + mov ebx, ecx + jmp .ret + +.not_active_namespace: + inc ecx + jmp .loop + +.ret: + pop edi esi + mov eax, ebx + ret + +endp + +; Allocates prp_list_ptr and creates a PRP list there. nprps should +; be set appropriately to the number of PRPs the caller wants to create. +; +; This function should only be called if the conditions for building +; a PRP list are met (see page 68 of the NVMe 1.4.0 spec). +; +; TODO: Currently the code for building recursive PRP lists is untested. +; If you want to test it, do a read/write with a sector count equivalant +; to more than 4MiB. Will test in the future. +proc build_prp_list stdcall, nprps:dword, buf:dword, prp_list_ptr:dword + + push esi ebx edi + sub esp, 4 + + ; stack: + ; [esp]: virtual pointer to first PRP list + ; here, we store the pointer to the very first + ; PRP list so that free_prp_list can free the + ; entire PRP list if something goes wrong, it + ; also serves as our return value placeholder + mov dword [esp], 0 + + xor edi, edi + xor esi, esi + mov ecx, [nprps] + shl ecx, 3 ; multiply by 8 since each PRP pointer is a QWORD + + ; we'll store consecutive PRP list buffers here, for example + ; given 2 PRP lists, we allocate 2 continuous pages + push ecx + invoke KernelAlloc, ecx ; store pointers to the PRP entries here + pop ecx + test eax, eax + jz .err + mov dword [esp], eax + mov edi, eax + mov eax, [prp_list_ptr] + mov dword [eax], edi + shr ecx, 1 + stdcall memsetdz, edi, ecx + + ; note we assume buf is page-aligned + mov esi, [buf] + +.build_prp_list: + ; ensure we don't cross a page boundary + mov ebx, [nprps] + cmp ebx, PAGE_SIZE / 8 + jb @f + mov ebx, PAGE_SIZE / 8 + sub [nprps], ebx + +@@: + xor ecx, ecx + cmp dword [esp], edi + je .loop + + ; we need to store the pointer of the next + ; PRP list to the previous PRP list last entry + mov eax, edi + invoke GetPhysAddr + mov dword [edi - 8], eax + mov dword [edi - 4], 0 + +.loop: + mov eax, esi + invoke GetPhysAddr + mov dword [edi + ecx * 8], eax + mov dword [edi + ecx * 8 - 4], 0 + add esi, PAGE_SIZE + inc ecx + cmp ecx, ebx + jne .loop + + ; check if we we need to build another PRP list + add edi, PAGE_SIZE + cmp ebx, PAGE_SIZE / 8 + je .build_prp_list + + ; PRP list successfully created + mov eax, dword [esp] + invoke GetPhysAddr + add esp, 4 + pop edi ebx esi + ret + +.err: + add esp, 4 + pop edi ebx esi + xor eax, eax + ret + +endp + +; Allocates PRP1/PRP2. Note that it is not required to call this function +; unless you're doing read and writes with an arbitrary buffer that the +; kernel passes to driver. In most other cases, it's better to just allocate a +; page-aligned buffer. +; +; ns: Pointer to the device's respective namespace struct +; +; prps_ptr: should be a pointer to at least 2 DWORDS (PRP1 and PRP2 respectively), +; the caller is allowed to not initialize PRP1, however PRP2 should explicitly be +; initialized to 0. +; +; prp_list_ptr: pointer to 1 DWORD, the caller must initialize this value to 0. +; If a PRP list is allocated, then prp_list_ptr shall contain the pointer to +; the PRP list. The caller is required to free the allocated memory afterwards. +; +; buf: Pointer to the buffer +; +; On success, the function will return 1 and the PRPs will be initialized. If an +; error occurs (most likely due to memory allocation), the function returns 0. +proc alloc_dptr stdcall, ns:dword, prps_ptr:dword, numsectors:dword, prp_list_ptr:dword, buf:dword + + push ebx esi edi + mov esi, [ns] + mov edi, [prps_ptr] + mov eax, [buf] + invoke GetPhysAddr + mov dword [edi], eax + mov cl, byte [esi + NSINFO.lbads] + mov ebx, PAGE_SIZE + shr ebx, cl + mov edx, [numsectors] + + ; is the buffer offset portion equal to 0? + mov eax, [buf] + mov ecx, eax + and eax, PAGE_SIZE - 1 + mov eax, ebx + jnz @f + + ; is the number of sectors less than or equal to one memory page? + cmp edx, ebx + jbe .success + shl ebx, 1 ; it is page aligned, so set ebx to 2 memory pages + +@@: + ; is the number of sectors greater than one or two memory pages? + cmp edx, ebx + ja .build_prp_list + + ; set PRP2 + mov eax, ecx + and eax, not (PAGE_SIZE - 1) + add eax, PAGE_SIZE + invoke GetPhysAddr + mov dword [edi + 4], eax + jmp .success + +.build_prp_list: + mov ebx, ecx + mov ecx, eax + and ebx, not (PAGE_SIZE - 1) + add ebx, PAGE_SIZE + mov eax, [numsectors] + xor edx, edx + div ecx + stdcall build_prp_list, eax, ebx, [prp_list_ptr] + test eax, eax + jz .err + mov dword [edi + 4], eax + +.success: + xor eax, eax + inc eax + pop edi esi ebx + ret + +.err: + xor eax, eax + pop edi esi ebx + ret + +endp + +nvme_read: + mov edx, NVM_CMD_READ + jmp nvme_readwrite + +nvme_write: + mov edx, NVM_CMD_WRITE + +; Reads from/writes to the disk +proc nvme_readwrite stdcall, ns:dword, buf:dword, start_sector:qword, numsectors_ptr:dword + + push ebx esi edi + sub esp, 20 + + ; TODO: check if numsectors exceeds IDENTC.MDTS? + + ; stack: + ; [esp] - PRP1 + ; [esp + 4] - PRP2 + ; [esp + 8] - command type (read or write) + ; [esp + 12] - original numsectors value + ; [esp + 16] - virtual pointer to PRP2 PRP list (if allocated, 0 if not) + mov ebx, esp + + mov esi, [ns] + mov edi, [buf] + + mov eax, [numsectors_ptr] + mov eax, dword [eax] + mov dword [ebx + 4], 0 ; PRP2 entry (0 by default) + mov dword [ebx + 8], edx ; command type (read or write) + mov dword [ebx + 12], eax ; save original numsectors value + mov dword [ebx + 16], 0 ; virtual pointer to PRP2 PRP list (not allocated by default) + + mov ecx, ebx + add ecx, 16 + + ; Note that [esp] will contain the value of PRP1 and [esp + 4] will + ; contain the value of PRP2. If PRP2 is a PRP list, then [esp + 16] will point + ; to the allocated PRP list (after this call, only if it completes successfully) + stdcall alloc_dptr, esi, ebx, eax, ecx, [buf] + test eax, eax + jz .fail + + mov eax, dword [start_sector] + + ; According to the NVMe specification, the NLB field in the I/O read and write + ; commands is a 0-based value (i.e., 0 is equivalant to 1, 1 is equivalant to 2, ...) + ; As far as I know, KolibriOS doesn't follow this mechanism so let's just decrement the + ; value and it should have the same effect. + mov ecx, dword [ebx + 12] + dec ecx + + ; TODO: add non-blocking mechanisms later on + push eax + mov eax, dword [esi + NSINFO.pci] + mov dword [eax + pcidev.spinlock], 1 + pop eax + stdcall nvme_io_rw, [esi + NSINFO.pci], \ + 1, \ + [esi + NSINFO.nsid], \ + dword [ebx], \ + dword [ebx + 4], \ + eax, \ + dword [start_sector + 4], \ + ecx, \ + dword [ebx + 8] + + ; TODO: add non-blocking mechanisms later on + stdcall nvme_poll, [esi + NSINFO.pci] + test eax, eax + jz .fail + + ; free PRP list (if allocated) + mov eax, dword [ebx + 16] + test eax, eax + jz @f + invoke KernelFree, eax + +@@: + xor eax, eax + add esp, 20 + pop edi esi ebx + ret + +.fail: + ; free PRP list (if allocated) + mov eax, dword [ebx + 16] + test eax, eax + jz @f + invoke KernelFree, eax + +@@: + mov ebx, [numsectors_ptr] + mov dword [ebx], 0 + add esp, 20 + pop edi esi ebx + or eax, -1 ; generic disk error + ret + +endp + +; Detects NVMe devices on the PCI bus and stores them into +; [p_nvme_devices] and sets [num_pcidevs] to the appropriate +; size based off how many NVMe devices there are. +proc detect_nvme + + invoke GetPCIList + mov esi, eax + mov ebx, eax + +.check_dev: + mov eax, dword [esi + PCIDEV.class] + and eax, 0x00ffff00 ; retrieve class/subclass code only + cmp eax, 0x00010800 ; Mass Storage Controller - Non-Volatile Memory Controller + je .found_dev + +.next_dev: + mov esi, dword [esi + PCIDEV.fd] + cmp esi, ebx + jne .check_dev + +.exit_success: + xor eax, eax + inc eax + ret + +.found_dev: + ; skip PCIDEV.owner check if the PCI device pointer has already been + ; allocated (without this check, more than 1 NVMe device cannot be + ; registered) + mov eax, dword [p_nvme_devices] + test eax, eax + jnz @f + cmp dword [esi + PCIDEV.owner], 0 + jnz .err + +@@: + cmp dword [num_pcidevs], TOTAL_PCIDEVS + jne @f + DEBUGF DBG_INFO, "Can't add any more NVMe devices...\n" + jmp .exit_success + +@@: + inc dword [num_pcidevs] + add dword [num_pcidevs_sz], sizeof.pcidev + cmp dword [p_nvme_devices], 0 + jnz @f ; was the pointer already allocated? + invoke KernelAlloc, sizeof.pcidev * TOTAL_PCIDEVS + test eax, eax + jz .err + mov dword [p_nvme_devices], eax + mov dword [esi + PCIDEV.owner], eax + DEBUGF DBG_INFO, "nvme: Allocated memory for PCI devices at: 0x%x\n", eax + +@@: + mov ecx, dword [num_pcidevs] + dec ecx + mov edi, dword [p_nvme_devices] + mov edx, ecx + imul edx, sizeof.pcidev + lea edi, [edi + edx] + + movzx eax, byte [esi + PCIDEV.bus] + mov byte [edi + pcidev.bus], al + movzx eax, byte [esi + PCIDEV.devfn] + mov byte [edi + pcidev.devfn], al + mov dword [edi + pcidev.num], ecx + + jmp .next_dev + +.err: + xor eax, eax + ret + +endp + +; Returns 1 if the NVMe device is compatible. 0 otherwise. In practice, the driver +; is compatible with (hopefully) most compliant controllers. This also does some +; initialization for some reason, due to bad design decisions made in the beginning +; but since the code works I haven't felt inclined to change it. +proc device_is_compat stdcall, pci:dword + + push esi edx ecx + mov esi, [pci] + invoke PciRead8, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.interrupt_line + mov byte [esi + pcidev.iline], al + invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.base_addr_0 + and eax, 0xfffffff0 + test eax, eax + jz .failure + mov edx, eax + + invoke MapIoMem, eax, 0x2000, PG_SW+PG_NOCACHE + test eax, eax + jz .failure + mov dword [esi + pcidev.io_addr], eax + mov eax, dword [eax + NVME_MMIO.CAP + 4] + and eax, CAP_DSTRD + mov byte [esi + pcidev.dstrd], al + mov eax, dword [esi + pcidev.io_addr] + mov eax, dword [eax + NVME_MMIO.VS] + DEBUGF DBG_INFO, "nvme%u: Controller version: 0x%x\n", [esi + pcidev.num], eax + mov dword [esi + pcidev.version], eax + pop ecx edx esi + xor eax, eax + inc eax + ret + +.failure: + DEBUGF DBG_INFO, "nvme%u: something went wrong checking NVMe device compatibility\n", [esi + pcidev.num] + pop ecx edx esi + xor eax, eax + ret + +endp + +; nvme_init: Initializes the NVMe controller, I/O queues, and namespaces. +proc nvme_init stdcall, pci:dword + + push ebx esi edi + mov esi, dword [pci] + + ; Check the PCI header to see if interrupts are disabled, if so + ; we have to re-enable them + invoke PciRead16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.command + and eax, not (1 shl 10) + ; Enable Bus Master bit, memory space access, and I/O space access. QEMU automatically sets the + ; bus master bit, but Virtualbox does not. Not sure about the other bits though, but let's set them + ; to 1 to anyway just to be extra cautious. + ; See: https://git.kolibrios.org/GSoC/kolibrios-nvme-driver/issues/1#issuecomment-467 + or eax, (1 shl 2) or (1 shl 1) or 1 + invoke PciWrite16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.command, eax + + ; Check if the device has a pointer to the capabilities list (status register bit 4 set to 1) + ; though this check is probably unnecessary since all PCIe devices should have this bit set to 1 + invoke PciRead16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.status + test ax, (1 shl 4) + jz .exit_fail + + invoke PciRead8, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.cap_ptr + and eax, 0xfc ; bottom two bits are reserved, so mask them before we access the configuration space + mov edi, eax + DEBUGF DBG_INFO, "nvme%u: Checking capabilities...\n", [esi + pcidev.num] + +; We need to check if there are any MSI/MSI-X capabilities, and if so, make sure they're disabled since +; we're using old fashioned pin-based interrupts (for now) +.read_cap: + invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + add edi, 2 + cmp al, MSICAP_CID + je .got_msi_cap + cmp al, MSIXCAP_CID + je .got_msix_cap + movzx edi, ah + test edi, edi + jnz .read_cap + DEBUGF DBG_INFO, "nvme%u: MSI/MSI-X capability not found\n", [esi + pcidev.num] + jmp .end_cap_parse + +.got_msi_cap: + DEBUGF DBG_INFO, "nvme%u: Found MSI capability\n", [esi + pcidev.num] + invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + and eax, not MSICAP_MSIE + invoke PciWrite32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + jmp .end_cap_parse + +.got_msix_cap: + DEBUGF DBG_INFO, "nvme%u: Found MSI-X capability\n", [esi + pcidev.num] + invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + and eax, not MSIXCAP_MXE + invoke PciWrite32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + +.end_cap_parse: + mov edi, dword [esi + pcidev.io_addr] + + ; check maximum queue entries supported + mov eax, dword [edi + NVME_MMIO.CAP] + DEBUGF DBG_INFO, "nvme%u: Maximum queue entries available is %u (required: %u)\n", [esi + pcidev.num], ax, SQ_ENTRIES + cmp ax, SQ_ENTRIES + jb .exit_fail + + if __DEBUG__ + test eax, CAP_CQR + setnz al + DEBUGF DBG_INFO, "nvme%u: Contiguous queues required: %u\n", [esi + pcidev.num], al + end if + + ; Check if NVM command set is supported + mov eax, dword [edi + NVME_MMIO.CAP + 4] + DEBUGF DBG_INFO, "nvme%u: Checking if NVM command set is supported...\n", [esi + pcidev.num] + test eax, CAP_CSS_NVM_CMDSET + jz .exit_fail + DEBUGF DBG_INFO, "nvme%u: OK... NVM command set supported\n", [esi + pcidev.num] + + stdcall nvme_disable_ctrl, esi + DEBUGF DBG_INFO, "nvme%u: Checking if memory page size is supported...\n", [esi + pcidev.num] + mov eax, dword [edi + NVME_MMIO.CAP + 4] + mov edx, eax + and edx, CAP_MPSMIN + shr edx, 16 + cmp edx, NVM_MPS + ja .exit_fail + and eax, CAP_MPSMAX + shr eax, 20 + cmp eax, NVM_MPS + jb .exit_fail + DEBUGF DBG_INFO, "nvme%u: OK... memory page size supported\n", [esi + pcidev.num] + + ; Configure IOSQES, IOCQES, AMS, MPS, CSS + ; CSS = 0 (NVM Command Set) + ; AMS = 0 (Round Robin) + ; MPS = 0 (4KiB Pages) + ; IOSQES = 6 (64B) + ; IOCQES = 4 (16B) + xor eax, eax + or eax, CC_DEFAULT_IOSQES or CC_DEFAULT_IOCQES + mov dword [edi + NVME_MMIO.CC], eax + DEBUGF DBG_INFO, "nvme%u: OK... controller is configured to appropriate settings\n", [esi + pcidev.num] + + ; Configure Admin Queue Attributes + xor eax, eax + or eax, NVM_ASQS or (NVM_ACQS shl 16) + mov dword [edi + NVME_MMIO.AQA], eax + DEBUGF DBG_INFO, "nvme%u: Admin queue attributes: 0x%x\n", [esi + pcidev.num], eax + + ; Allocate list of queues + DEBUGF DBG_INFO, "nvme%u: Allocating Administrator and I/O queues...\n",, [esi + pcidev.num] + invoke KernelAlloc, sizeof.NVM_QUEUE_ENTRY * (LAST_QUEUE_ID + 1) + test eax, eax + jz .exit_fail + mov dword [esi + pcidev.queue_entries], eax + mov edi, eax + stdcall memsetdz, eax, sizeof.NVM_QUEUE_ENTRY * (LAST_QUEUE_ID + 1) / 4 + + ; Allocate submission/completion queue pointers + xor ebx, ebx + +.init_queues: + invoke KernelAlloc, QUEUE_ALLOC_SIZE + test eax, eax + jz .exit_fail + DEBUGF DBG_INFO, "nvme%u: Allocated queue at offset %u: 0x%x\n", [esi + pcidev.num], ebx, eax + mov dword [edi + ebx + NVM_QUEUE_ENTRY.cq_ptr], eax + mov edx, eax + add eax, CQ_ALLOC_SIZE + mov dword [edi + ebx + NVM_QUEUE_ENTRY.sq_ptr], eax + stdcall memsetdz, edx, QUEUE_ALLOC_SIZE / 4 + + ; Initialize command entries + invoke KernelAlloc, sizeof.NVMQCMD * CQ_ENTRIES + test eax, eax + jz .exit_fail + mov dword [edi + ebx + NVM_QUEUE_ENTRY.cmd_ptr], eax + push ebx esi + mov esi, eax + xor ebx, ebx + +.init_cmd_entries: + invoke KernelAlloc, sizeof.MUTEX + test eax, eax + jz .exit_fail_cleanup + mov dword [esi + NVMQCMD.mutex_ptr], eax + mov dword [esi + NVMQCMD.cid], ebx + mov ecx, eax + invoke MutexInit + inc ebx + add esi, sizeof.NVMQCMD + cmp ebx, CQ_ENTRIES + jne .init_cmd_entries + + pop esi ebx + add ebx, sizeof.NVM_QUEUE_ENTRY + cmp ebx, (LAST_QUEUE_ID + 1) * sizeof.NVM_QUEUE_ENTRY + jne .init_queues + + ; Configure Admin Completion Queue Base Address + mov esi, [pci] + mov esi, dword [esi + pcidev.io_addr] + mov eax, dword [edi + NVM_QUEUE_ENTRY.cq_ptr] + invoke GetPhysAddr + mov dword [esi + NVME_MMIO.ACQ], eax + mov dword [esi + NVME_MMIO.ACQ + 4], 0 + if __DEBUG__ + push esi + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Admin completion queue base address: 0x%x\n", [esi + pcidev.num], eax + pop esi + end if + + ; Configure Admin Submission Queue Base Address + mov eax, dword [edi + NVM_QUEUE_ENTRY.sq_ptr] + invoke GetPhysAddr + mov dword [esi + NVME_MMIO.ASQ], eax + mov dword [esi + NVME_MMIO.ASQ + 4], 0 + if __DEBUG__ + push esi + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Admin submission queue base address: 0x%x\n", [esi + pcidev.num], eax + pop esi + end if + + ; Attach interrupt handler + mov esi, [pci] + movzx eax, byte [esi + pcidev.iline] + DEBUGF DBG_INFO, "nvme%u: Attaching interrupt handler to IRQ %u\n", [esi + pcidev.num], eax + invoke AttachIntHandler, eax, irq_handler, 0 + test eax, eax + jz .exit_fail + DEBUGF DBG_INFO, "nvme%u: Successfully attached interrupt handler\n", [esi + pcidev.num] + + ; Restart the controller + stdcall nvme_enable_ctrl, esi + + invoke KernelAlloc, 0x1000 + test eax, eax + jz .exit_fail + mov edi, eax + invoke GetPhysAddr + ; pci:dword, nsid:dword, dptr:dword, cns:byte + stdcall nvme_identify, [pci], 0, eax, CNS_IDCS + test eax, eax + jz .exit_fail + mov eax, dword [edi + IDENTC.nn] + mov dword [esi + pcidev.nn], eax + DEBUGF DBG_INFO, "nvme%u: Namespace Count: %u\n", [esi + pcidev.num], eax + + ; Note that the specification only allows ASCII strings that contain code + ; values between 0x20 (' ') and 0x7E ('~'). Strings are left justified and + ; padded with spaces (at least according to the 1.4.0 spec) which means there + ; is no null terminator anywhere. To prevent garbage or repeated values from + ; being printed to the debug log, I have inserted a 0 byte at the end of each + ; string. + lea ebx, byte [edi + IDENTC.sn] + mov byte [ebx + 19], 0 + DEBUGF DBG_INFO, "nvme%u: Serial Number: %s\n", [esi + pcidev.num], ebx + add ebx, 20 + mov byte [ebx + 39], 0 + DEBUGF DBG_INFO, "nvme%u: Model Number: %s\n", [esi + pcidev.num], ebx + add ebx, 40 + mov byte [ebx + 7], 0 + DEBUGF DBG_INFO, "nvme%u: Firmware Revision: %s\n", [esi + pcidev.num], ebx + mov edx, dword [esi + pcidev.version] + + cmp edx, VS140 + jb @f + ; This is a reserved field in pre-1.4 controllers + mov al, byte [edi + IDENTC.cntrltype] + cmp al, CNTRLTYPE_IO_CONTROLLER + jne .exit_fail + ;DEBUGF DBG_INFO, "nvme%u: I/O controller detected...\n", [esi + pcidev.num] + +@@: + ; TODO: check IDENTC.AVSCC + mov al, byte [edi + IDENTC.sqes] + and al, 11110000b + DEBUGF DBG_INFO, "nvme%u: IDENTC.SQES = %u\n", [esi + pcidev.num], al + cmp al, 0x60 ; maximum submission queue size should at least be 64 bytes + jb .exit_fail + mov al, byte [edi + IDENTC.cqes] + and al, 11110000b + DEBUGF DBG_INFO, "nvme%u: IDENTC.CQES = %u\n", [esi + pcidev.num], al + and al, 0x40 ; maximum completion queue entry size should at least be 16 bytes + jb .exit_fail + invoke KernelFree, edi + + mov eax, 1 or (1 shl 16) ; CDW11 (set the number of queues we want) + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + stdcall set_features, [pci], NULLPTR, FID_NUMBER_OF_QUEUES, eax + stdcall nvme_poll, esi + test eax, eax + jz .exit_fail + mov esi, dword [esi + pcidev.queue_entries] + mov esi, dword [esi + NVM_QUEUE_ENTRY.cq_ptr] + mov eax, dword [esi + sizeof.CQ_ENTRY + CQ_ENTRY.cdw0] + ;DEBUGF DBG_INFO, "nvme%u: Set Features CDW0: 0x%x\n", [esi + pcidev.num], eax + test ax, ax ; Number of I/O Submission Queues allocated + jz .exit_fail + shl eax, 16 + test ax, ax ; Number of I/O Completion Queues allocated + jnz .exit_fail + + ; Create I/O Queues + ; (TODO: create N queue pairs for N CPU cores, see page 8 of NVMe 1.4 spec for an explaination) + mov esi, [pci] + mov edi, esi + mov esi, dword [esi + pcidev.queue_entries] + add esi, sizeof.NVM_QUEUE_ENTRY + mov eax, dword [esi + NVM_QUEUE_ENTRY.cq_ptr] + invoke GetPhysAddr + stdcall create_io_completion_queue, [pci], eax, 1, IEN_ON + test eax, eax + jz .exit_fail + ;DEBUGF DBG_INFO, "nvme%u: Successfully created I/O completion queue 1\n", [edi + pcidev.num] + mov eax, dword [esi + NVM_QUEUE_ENTRY.sq_ptr] + invoke GetPhysAddr + stdcall create_io_submission_queue, [pci], eax, 1, 1 + jz .exit_fail + ;DEBUGF DBG_INFO, "nvme%u: Successfully created I/O submission queue 1\n", [edi + pcidev.num] + + ; TODO: This only registers a single namespace. Add support for more + stdcall determine_active_nsids, [pci] + test eax, eax + jz .exit_fail ; No active NSIDS + mov esi, [pci] + mov dword [esi + pcidev.nsid], eax + DEBUGF DBG_INFO, "nvme%u: Found active NSID: %u\n", [esi + pcidev.num], eax + + invoke KernelAlloc, 0x1000 + test eax, eax + jz .exit_fail + mov edi, eax + invoke GetPhysAddr + stdcall nvme_identify, [pci], [esi + pcidev.nsid], eax, CNS_IDNS + test eax, eax + jz .exit_fail + invoke KernelAlloc, sizeof.NSINFO + test eax, eax + jz .exit_fail + mov ebx, eax + mov dword [esi + pcidev.nsinfo], eax + mov al, byte [edi + IDENTN.nsfeat] + mov byte [ebx + NSINFO.features], al + ;DEBUGF DBG_INFO, "nvme%un%u: Namespace Features: 0x%x\n", [esi + pcidev.num], [esi + pcidev.nsid], al + mov eax, dword [esi + pcidev.nsid] + mov dword [ebx + NSINFO.nsid], eax + mov dword [ebx + NSINFO.pci], esi + mov eax, dword [edi + IDENTN.nsze] + mov dword [ebx + NSINFO.size], eax + mov eax, dword [edi + IDENTN.nsze + 4] + mov dword [ebx + NSINFO.size + 4], eax + mov eax, dword [edi + IDENTN.ncap] + mov dword [ebx + NSINFO.capacity], eax + mov eax, dword [edi + IDENTN.ncap + 4] + mov dword [ebx + NSINFO.capacity + 4], eax + ;DEBUGF DBG_INFO, "nvme%un%u: Namespace Size: %u + %u logical blocks\n", [esi + pcidev.num], [esi + pcidev.nsid], [edi + IDENTN.nsze], [edi + IDENTN.nsze + 4] + ;DEBUGF DBG_INFO, "nvme%un%u: Namespace Capacity: %u + %u logical blocks\n", [esi + pcidev.num], [esi + pcidev.nsid], [edi + IDENTN.ncap], [edi + IDENTN.ncap + 4] + mov eax, dword [edi + IDENTN.lbaf0] + shr eax, 16 ; Get LBADS + + ; KolibriOS only supports a LBADS of 512, so if it's a higher value then we + ; have to ignore this namespace + cmp al, SUPPORTED_LBADS + jne .exit_fail + + mov byte [ebx + NSINFO.lbads], al + invoke KernelFree, edi + if 0 + invoke KernelAlloc, 0x6000 + test eax, eax + jz .exit_fail + mov edi, eax + invoke KernelAlloc, 0x8 + test eax, eax + jz .exit_fail + mov edx, NVM_CMD_READ + mov dword [eax], 6 + add edi, 0x5 + mov dword [esi + pcidev.spinlock], 1 + stdcall nvme_readwrite, [esi + pcidev.nsinfo], edi, 0x0, 0, eax + stdcall nvme_poll, esi + test eax, eax + jz .exit_fail + DEBUGF DBG_INFO, "STRING: %s\n", edi + add edi, 0x2000 + DEBUGF DBG_INFO, "STRING: %s\n", edi + end if + DEBUGF DBG_INFO, "nvme%u: Successfully initialized driver\n", [esi + pcidev.num] + xor eax, eax + inc eax + pop edi esi ebx + ret + +.exit_fail_cleanup: + add esp, 8 + +.exit_fail: + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Failed to initialize controller\n", [esi + pcidev.num] + mov edi, dword [esi + pcidev.io_addr] + mov eax, dword [edi + NVME_MMIO.CSTS] + test eax, CSTS_CFS + jz @f + DEBUGF DBG_INFO, "nvme%u: A fatal controller error has occurred\n", [esi + pcidev.num] + +@@: + xor eax, eax + pop edi esi ebx + ret + +endp + +; Returns a new CID for queue #y +proc get_new_cid stdcall, pci:dword, y:dword + + mov eax, [pci] + mov eax, dword [eax + pcidev.queue_entries] + mov ecx, [y] + shl ecx, SIZEOF_NVM_QUEUE_ENTRY + movzx eax, word [eax + ecx + NVM_QUEUE_ENTRY.head] + ;DEBUGF DBG_INFO, "get_new_cid: %u\n", eax + ret + +endp + +proc nvme_disable_ctrl stdcall, pci:dword + + ; TODO: Add timeout of CAP.TO seconds + push esi edi + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Disabling Controller...\n", [esi + pcidev.num] + mov edi, dword [esi + pcidev.io_addr] + and dword [edi + NVME_MMIO.CC], 0xfffffffe ; CC.EN = 0 + +; Wait for controller to be brought to idle state, CSTS.RDY should be cleared to 0 when this happens +.wait: + test dword [edi + NVME_MMIO.CSTS], CSTS_RDY + jnz .wait + DEBUGF DBG_INFO, "nvme%u: Successfully disabled controller\n", [esi + pcidev.num] + pop edi esi + ret + +endp + +proc nvme_enable_ctrl stdcall, pci:dword + + ; TODO: Add timeout of CAP.TO seconds + push esi edi + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Enabling Controller...\n", [esi + pcidev.num] + mov edi, dword [esi + pcidev.io_addr] + or dword [edi + NVME_MMIO.CC], 1 ; CC.EN = 1 + +; Wait for controller to be brought into active state, CSTS.RDY should be set to 1 when this happens +.wait: + test dword [edi + NVME_MMIO.CSTS], CSTS_RDY + jz .wait + DEBUGF DBG_INFO, "nvme%u: Successfully enabled controller\n", [esi + pcidev.num] + pop edi esi + ret + +endp + +; Polls until the device's spinlock is unlocked. Unless +; the "bad timeout" is reached. The lock should be unlocked +; by the interrupt handler when all the commands have been +; completed. +proc nvme_poll stdcall, pci:dword + + push esi + mov esi, [pci] + xor ecx, ecx + +@@: + inc ecx + cmp ecx, 0x10000000 + je @f + xor eax, eax + inc eax + xchg eax, dword [esi + pcidev.spinlock] + test eax, eax + jnz @b + + ; lock was released, return 1 + pop esi + xor eax, eax + inc eax + ret + +@@: + ; timeout: lock wasn't released, return 0 + pop esi + xor eax, eax + ret + +endp + + +; Writes to completion queue 'y' head doorbell. 'cqh' should +; be the new head value that will be stored in the register. +proc cqyhdbl_write stdcall, pci:dword, y:dword, cqh:dword + + push esi edi + mov esi, [pci] + + ; 1000h + ((2y + 1) * (4 << CAP.DSTRD)) + mov eax, [y] + shl al, 1 + inc al + mov edx, 4 + mov cl, byte [esi + pcidev.dstrd] + shl dx, cl + imul dx, ax + add dx, 0x1000 + mov ecx, [y] + shl ecx, SIZEOF_NVM_QUEUE_ENTRY + mov edi, dword [esi + pcidev.queue_entries] + lea edi, dword [edi + ecx] + mov eax, [cqh] + mov esi, dword [esi + pcidev.io_addr] + mov word [esi + edx], ax ; Write to CQyHDBL + mov word [edi + NVM_QUEUE_ENTRY.head], ax + + ; NOTE: Currently commented out since we're just using + ; plain spinlocks for notifying when a command has been + ; completed, but this will be uncommented later and use + ; semaphores instead of mutexes once the polling code + ; has been replaced with the asynchronous API. + + ; Unlock the mutex now that the command is complete + ;mov edi, dword [edi + NVM_QUEUE_ENTRY.cmd_ptr] + ;mov ecx, [cqh] + ;shl ecx, SIZEOF_NVMQCMD + ;add edi, ecx + ;mov ecx, dword [edi + NVMQCMD.mutex_ptr] + ;invoke MutexUnlock + + pop edi esi + ret + +endp + +; Writes to submission queue 'y' tail doorbell. 'cmd' should +; be a pointer to the submission queue struct. +proc sqytdbl_write stdcall, pci:dword, y:word, cmd:dword + + push ebx esi edi + mov edi, [pci] + mov edi, dword [edi + pcidev.queue_entries] + movzx ebx, [y] + shl ebx, SIZEOF_NVM_QUEUE_ENTRY + lea edi, [edi + ebx] + ;mov eax, dword [edi + NVM_QUEUE_ENTRY.cmd_ptr] + mov edx, dword [edi + NVM_QUEUE_ENTRY.sq_ptr] + mov esi, [cmd] + mov ecx, dword [esi + SQ_ENTRY.cdw0] + shr ecx, 16 ; Get CID + mov ebx, ecx + shl ebx, SIZEOF_NVM_QUEUE_ENTRY + add ebx, eax + shl ecx, SIZEOF_SQ_ENTRY + lea edx, [edx + ecx] + stdcall memcpyd, edx, esi, sizeof.SQ_ENTRY / 4 + ;mov ecx, dword [ebx + NVMQCMD.mutex_ptr] + ;invoke MutexLock + + mov esi, [pci] + mov ax, word [edi + NVM_QUEUE_ENTRY.tail] + inc ax + cmp ax, NVM_ASQS + jbe @f + xor ax, ax + +@@: + ; 1000h + (2y * (4 << CAP.DSTRD)) + movzx ebx, [y] + shl ebx, 1 + mov edx, 4 + mov cl, byte [esi + pcidev.dstrd] + shl edx, cl + imul edx, ebx + add edx, 0x1000 + mov word [edi + NVM_QUEUE_ENTRY.tail], ax + mov esi, dword [esi + pcidev.io_addr] + mov word [esi + edx], ax + pop edi esi ebx + ret + +endp + +proc is_queue_full stdcall, tail:word, head:word + + push bx + mov ax, [tail] + mov bx, [head] + cmp ax, bx + je .not_full + test bx, bx + jnz @f + cmp ax, NVM_ASQS + jne @f + pop bx + xor eax, eax + inc eax + ret + +@@: + cmp ax, bx + jae .not_full + sub ax, bx + cmp ax, 1 + jne .not_full + pop bx + xor eax, eax + inc eax + ret + +.not_full: + pop bx + xor eax, eax + ret + +endp + +; Notifies the controller that all the commands of the respective queue +; have been acknowledged as completed (if any). +proc consume_cq_entries stdcall, pci:dword, queue:dword + + push esi edi + mov esi, [pci] + mov ecx, [queue] + shl ecx, SIZEOF_NVM_QUEUE_ENTRY + mov esi, dword [esi + pcidev.queue_entries] + lea esi, [esi + ecx] + movzx ecx, word [esi + NVM_QUEUE_ENTRY.head] + cmp cx, word [esi + NVM_QUEUE_ENTRY.tail] + je .end + inc ecx + cmp ecx, NVM_ACQS + jbe @f + xor ecx, ecx + mov word [esi + NVM_QUEUE_ENTRY.head], cx + +@@: + stdcall cqyhdbl_write, [pci], [queue], ecx + +.end: + pop edi esi + xor eax, eax + ret + +endp + +; Our interrupt handler. Once the controller finishes a command, +; it should generate an interrupt (assuming that no fatal error +; occurred). If an interrupt isn't being generated when it is expected +; to, check the CSTS register to make sure that the error bit isn't being +; set. The controller doesn't generate any interrupts in such cases. +; +; Once a command has complete (successfully or not), the controller will +; add a new completion queue entry and it is the interrupt handler's +; responsibility to write to the appropriate completion queue's head doorbell +; register and update it correctly, otherwise the controller will continue +; to generate interrupts (the most common causes for freezes with the driver, +; in my experience). +proc irq_handler + + push ebx esi edi + mov esi, dword [p_nvme_devices] + mov ebx, dword [num_pcidevs_sz] + add ebx, esi + +.check_who_raised_irq: + stdcall device_generated_interrupt, esi + test eax, eax + jnz @f + add esi, sizeof.pcidev + cmp esi, ebx + jbe .check_who_raised_irq + + ; Interrupt not handled by driver, return 0 + pop edi esi ebx + xor eax, eax + ret + +@@: + mov edi, dword [esi + pcidev.io_addr] + mov dword [edi + NVME_MMIO.INTMS], 0x3 + stdcall consume_cq_entries, esi, ADMIN_QUEUE + stdcall consume_cq_entries, esi, 1 + + ; Interrupt handled by driver, return 1 + mov dword [edi + NVME_MMIO.INTMC], 0x3 + xor eax, eax + xchg eax, dword [esi + pcidev.spinlock] ; unlock spinlock + pop edi esi ebx + mov eax, 1 + ret + +endp + +proc device_generated_interrupt stdcall, pci:dword + + mov edx, [pci] + mov edx, dword [edx + pcidev.queue_entries] + xor ecx, ecx + +@@: + mov ax, word [edx + ecx + NVM_QUEUE_ENTRY.head] + cmp ax, word [edx + ecx + NVM_QUEUE_ENTRY.tail] + jne @f + add ecx, sizeof.NVM_QUEUE_ENTRY + cmp ecx, LAST_QUEUE_ID * sizeof.NVM_QUEUE_ENTRY + jbe @b + xor eax, eax + ret + +@@: + mov eax, 1 + ret + +endp + +; Deletes the allocated I/O queues for all of the NVMe devices, +; and shuts down all of the controllers. See page 295-297 of +; the NVMe 1.4.0 spec for details on how shutdown processing +; should occur. +; +; Currently shutdown still has problems on VMWare. +; See: https://git.kolibrios.org/GSoC/kolibrios-nvme-driver/issues/5 +proc nvme_cleanup + + DEBUGF DBG_INFO, "nvme: Cleaning up...\n" + push ebx esi edi + mov esi, dword [p_nvme_devices] + test esi, esi + jnz @f + pop edi esi ebx + ret + +@@: + sub esi, sizeof.pcidev + xor ebx, ebx + +.get_pcidev: + add esi, sizeof.pcidev + + ; Free the queues + mov edi, dword [esi + pcidev.queue_entries] + test edi, edi + jz .ret + sub edi, sizeof.NVM_QUEUE_ENTRY + push ebx + xor ebx, ebx + +.get_queue: + add edi, sizeof.NVM_QUEUE_ENTRY + + ; TODO: Check if I/O completion and submission queue exist + ; before deleting? + test ebx, ebx + jz @f ; we don't want to delete the admin queue + stdcall delete_io_submission_queue, esi, ebx + stdcall delete_io_completion_queue, esi, ebx + +@@: + inc ebx + cmp ebx, LAST_QUEUE_ID + jbe .get_queue + pop ebx + + ; Shutdown the controller + mov edi, dword [esi + pcidev.io_addr] + mov eax, dword [edi + NVME_MMIO.CC] + and eax, not CC_SHN + or eax, CC_SHN_NORMAL_SHUTDOWN + mov dword [edi + NVME_MMIO.CC], eax + stdcall nvme_disable_ctrl, esi + +; Wait for shutdown processing to complete +@@: + test byte [edi + NVME_MMIO.CSTS], CSTS_SHST_SHUTDOWN_COMPLETE + jnz @b + + inc ebx + cmp ebx, dword [num_pcidevs] + jne .get_pcidev + +.ret: + pop edi esi ebx + ret + +endp + +;all initialized data place here +align 4 + p_nvme_devices dd 0 ; Pointer to array of NVMe devices + num_pcidevs dd 0 ; Number of NVMe devices + num_pcidevs_sz dd 0 ; Size in bytes + my_service db "nvme",0 ;max 16 chars include zero + disk_functions: + dd disk_functions.end - disk_functions + dd 0 ; no close function + dd 0 ; no closemedia function + dd nvme_query_media + dd nvme_read + dd nvme_write + dd 0 ; no flush function + dd 0 ; use default cache size + .end: + if __DEBUG__ + include_debug_strings + end if + +align 4 +data fixups +end data + +; vim: syntax=fasm diff --git a/drivers/nvme/nvme.inc b/drivers/nvme/nvme.inc new file mode 100644 index 0000000000..3b312b39ec --- /dev/null +++ b/drivers/nvme/nvme.inc @@ -0,0 +1,591 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;; GNU GENERAL PUBLIC LICENSE ;; +;; Version 2, June 1991 ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NVMe Controller Versions +VS100 = 0x00010000 ; (v1.0.0) +VS110 = 0x00010100 ; (v1.1.0) +VS120 = 0x00010200 ; (V1.2.0) +VS121 = 0x00010201 ; (v1.2.1) +VS130 = 0x00010300 ; (v1.3.0) +VS140 = 0x00010400 ; (v1.4.0) + +NVM_MPS = 0 ; Memory Page Size (2 ^ (12 + MPS)) +NVM_ASQS = 64 ; Admin Submission Queue Size +NVM_ACQS = NVM_ASQS ; Admin Completion Queue Size +LAST_QUEUE_ID = 1 ; Index of the last queue +SQ_ENTRIES = NVM_ASQS ; I/O and Admin Submission Queue Size +CQ_ENTRIES = NVM_ACQS ; I/O and Admin Completion Queue Size +PAGE_SIZE = 4096 shl NVM_MPS ; Use 4KiB pages +SUPPORTED_LBADS = 9 ; KolibriOS only supports LBADS of 512, later on we may remove this restriction +SQ_ALLOC_SIZE = 0x1000 +CQ_ALLOC_SIZE = 0x1000 +QUEUE_ALLOC_SIZE = SQ_ALLOC_SIZE + CQ_ALLOC_SIZE +SIZEOF_SQ_ENTRY = 6 ; log2(sizeof.SQ_ENTRY) +SIZEOF_CQ_ENTRY = 4 ; log2(sizeof.CQ_ENTRY) +SIZEOF_NVM_QUEUE_ENTRY = 4 ; log2(sizeof.NVM_QUEUE_ENTRY) +SIZEOF_NVMQCMD = 4 ; log2(sizeof.NVMQCMD) + +MSIXCAP_CID = 0x11 +MSIXCAP_MXE = 1 shl 15 ; MSI-X Enable bit +MSICAP_CID = 0x05 +MSICAP_MSIE = 1 ; MSI Enable bit + +ADMIN_QUEUE = 0 ; Admin Queue ID + +IEN_ON = 2 +IEN_OFF = 0 + +; Opcodes for NVM commands +NVM_CMD_FLUSH = 0x00 +NVM_CMD_WRITE = 0x01 +NVM_CMD_READ = 0x02 +NVM_CMD_WRITE_UNCORRECTABLE = 0x04 +NVM_CMD_COMPARE = 0x05 +NVM_CMD_WRITE_ZEROES = 0x08 +NVM_CMD_DATASET_MANAGEMENT = 0x09 +NVM_CMD_VERIFY = 0x0C +NVM_CMD_RESERVATION_REG = 0x0D +NVM_CMD_RESERVATION_REPORT = 0x0E +NVM_CMD_RESERVATION_ACQUIRE = 0x11 +NVM_CMD_RESERVATION_RELEASE = 0x15 +NVM_CMD_COPY = 0x19 + +; Opcodes for admin commands (Page 94 of NVMe 1.4 spec) +ADM_CMD_DEL_IO_SUBMISSION_QUEUE = 0x00 +ADM_CMD_CRE_IO_SUBMISSION_QUEUE = 0x01 +ADM_CMD_GET_LOG_PAGE = 0x02 +ADM_CMD_DEL_IO_COMPLETION_QUEUE = 0x04 +ADM_CMD_CRE_IO_COMPLETION_QUEUE = 0x05 +ADM_CMD_IDENTIFY = 0x06 +ADM_CMD_ABORT = 0x08 +ADM_CMD_SET_FEATURES = 0x09 +ADM_CMD_GET_FEATURES = 0x0A + +; fuse (fused operation): In a fused operation, a complex command is created by 'fusing' together +; two simpler commands. This field specifies whether this command is part +; of a fused operation, and if so, which command it is in the sequence: +; 00b -> Normal operation +; 01b -> Fused operation, first command +; 10b -> Fused operation, second command +; 11b -> Reserved +NO_FUSE = 0 +FUSE_OP_FIRST_CMD = 1 shl 8 +FUSE_OP_SECOND_CMD = 2 shl 8 + +; sel (PRP or SGL for data transfer): This field specifies whether PRPs or SGLs are used for any +; data transfer associated with the command. PRPs shall be +; used for all Admin commands for NVMe over PCIe implementations. +; SGLs shall be used for all Admin and I/O commands for NVMe over +; Fabrics implementations (i.e., field set to 01b): +; 00b -> PRPs are used for this transfer +; 01b -> SGLs are used for this transfer, MPTR will contain address of +; a single contiguous physical buffer that is byte aligned +; 10b -> SGLs are used for this transfer. MPTR will contain address of +; an SGL segment containing exactly one SGL descriptor that is +; QWORD aligned +; 11b -> Reserved +SEL_PRP = 0 +SEL_SGL = 1 shl 14 + +; Controller or Namespace Structure (CNS) specifies the information to be returned to the host. +CNS_IDNS = 0x0 ; Namespace data structure (NSID) +CNS_IDCS = 0x1 ; Controller data structure +CNS_ANIDL = 0x2 ; Active namespace ID list (NSID) +CNS_NIDL = 0x3 ; Namespace identification descriptor list (NSID) +CNS_NVM_SL = 0x4 ; NVM Set List + +; Optional Admin Command Support (OACS) values +OACS_SEC_SEN_RECV_SUPPORTED = 1 shl 0 +OACS_FMT_NVM_SUPPORTED = 1 shl 1 +OACS_FIRM_COMDL_SUPPORTED = 1 shl 2 +OACS_NSMAN_SUPPORTED = 1 shl 3 + +; scope is all attached namespaces or all namespaces in NVM subsystem +NSID_BROADCAST = 0xFFFFFFFF + +NSSRC_RESET = 0x4E564D65 ; "NVMe" (initiates a NVMe subsystem reset) + +; NVMe Capabilities +CAP_MQES = 0xff +CAP_CQR = 1 shl 16 +CAP_AMS = (1 shl 17) or (1 shl 18) +CAP_TO = 0xff000000 +CAP_DSTRD = 1 or (1 shl 1) or (1 shl 2) or (1 shl 3) +CAP_NSSRS = 1 shl 4 +CAP_CSS_NVM_CMDSET = 1 shl 5 +CAP_CSS_NOIO = 1 shl 12 +CAP_BPS = 1 shl 14 +CAP_CPS_COSCOP = 1 shl 15 +CAP_CPS_DOSCOP = 1 shl 16 +CAP_CPS_NVMSCOP = CAP_CPS_COSCOP or CAP_CPS_DOSCOP +CAP_MPSMIN = (1 shl 17) or (1 shl 18) or (1 shl 19) or (1 shl 20) +CAP_MPSMAX = (1 shl 21) or (1 shl 22) or (1 shl 23) or (1 shl 24) +CAP_PMRS = 1 shl 25 +CAP_CMBS = 1 shl 26 +CAP_NSSS = 1 shl 27 +CAP_CRMS_CRWMS = 1 shl 28 +CAP_CRMS_CRIMS = 1 shl 29 + +; Controller Configuration Bits +CC_EN = 1 +CC_CSS = (1 shl 4) or (1 shl 5) or (1 shl 6) +CC_MPS = (1 shl 7) or (1 shl 8) or (1 shl 9) or (1 shl 10) +CC_AMS = (1 shl 11) or (1 shl 12) or (1 shl 13) +CC_SHN = (1 shl 14) or (1 shl 15) +CC_IOSQES = (1 shl 16) or (1 shl 17) or (1 shl 18) or (1 shl 19) +CC_IOCQES = (1 shl 20) or (1 shl 21) or (1 shl 22) or (1 shl 23) +CC_CRIME = 1 shl 24 + +CC_SHN_NORMAL_SHUTDOWN = 1 shl 14 +CC_SHN_ABRUPT_SHUTDOWN = 1 shl 15 + +CC_DEFAULT_IOSQES = SIZEOF_SQ_ENTRY shl 16 +CC_DEFAULT_IOCQES = SIZEOF_CQ_ENTRY shl 20 + +; Completion Queue Entry Status Field Values +CQ_PHASE_TAG = 1 shl 0 +CQ_STATUS_SC = 0xfe +CQ_STATUS_SCT = (1 shl 9) or (1 shl 10) or (1 shl 11) +CQ_STATUS_CRD = (1 shl 12) or (1 shl 13) +CQ_STATUS_M = 1 shl 14 +CQ_STATUS_DNR = 1 shl 15 + +; Completion Queue Entry Status Field - Status Code Type Values +CQ_STATUS_SCT_GCS = 0x0 ; Generic Command Status +CQ_STATUS_SCT_CSS = 0x1 ; Command Specific Status +CQ_STATUS_SCT_MADIE = 0x2 ; Media and Data Integrity Errors +CQ_STATUS_SCT_PRS = 0x3 ; Path Related Status + +; Completion Queue Entry Status Field - Status Code Generic Command Values +CQ_STATUS_SC_GCS_SUCCESS = 0x00 ; Successful Completion +CQ_STATUS_SC_GCS_ICOP = 0x01 ; Invalid Command Opcode +CQ_STATUS_SC_GCS_IFIC = 0x02 ; Invalid Field in Command +CQ_STATUS_SC_GCS_CIDC = 0x03 ; Command ID Conflict +CQ_STATUS_SC_GCS_DTE = 0x04 ; Data Transfer Error +CQ_STATUS_SC_GCS_CAPLN = 0x05 ; Commands Aborted due to Power Loss Notification +CQ_STATUS_SC_GCS_INERR = 0x06 ; Internal Error +CQ_STATUS_SC_GCS_CAR = 0x07 ; Command Abort Requested +CQ_STATUS_SC_GCS_CASQD = 0x08 ; Command Aborted due to SQ Deletion +CQ_STATUS_SC_GCS_CAFFC = 0x09 ; Command Aborted due to Failed Fused Command +CQ_STATUS_SC_GCS_CAMFC = 0x0A ; Command Aborted due to Missing Fused Command +CQ_STATUS_SC_GCS_INNOF = 0x0B ; Invalid Namespace or Format +CQ_STATUS_SC_GCS_CSE = 0x0C ; Command Sequence Error +CQ_STATUS_SC_GCS_INSGL = 0x0D ; Invalid SGL Segment Descriptor +CQ_STATUS_SC_GCS_INNSGL = 0x0E ; Invalid Number of SGL Descriptors +CQ_STATUS_SC_GCS_OPDEN = 0x15 ; Operation Denied +CQ_STATUS_SC_GCS_NSIWP = 0x20 ; Namespace is Write Protected +CQ_STATUS_SC_GCS_CINT = 0x21 ; Command Interrupted +CQ_STATUS_SC_GCS_TTE = 0x22 ; Transient Transport Error + +; Completion Queue Entry Status Field - Status Code Media and Data Integrity Errors +CQ_STATUS_SC_MADIE_WF = 0x80 ; Write Fault +CQ_STATUS_SC_MADIE_URE = 0x81 ; Unrecovered Read Error +CQ_STATUS_SC_MADIE_ACDEN = 0x86 ; Access Denied +CQ_STATUS_SC_MADIE_DOULB = 0x87 ; Deallocated or Unwritten Logical Block + +; Controller Status (CSTS) Values +CSTS_RDY = 1 +CSTS_CFS = 1 shl 1 +CSTS_SHST = (1 shl 2) or (1 shl 3) +CSTS_NSSRO = 1 shl 4 +CSTS_PP = 1 shl 5 +CSTS_SHST_SHUTDOWN_OCCURRING = 1 shl 2 +CSTS_SHST_SHUTDOWN_COMPLETE = 1 shl 3 + +; Admin Queue Attributes (AQA) Values +AQA_ASQS = 0xfff +AQA_ACQS = 0xfff shl 16 + +; CDW10.SEL Values (Page 115 of NVMe 1.4 specification) +CDW10_SEL_CURRENT = 000b +CDW10_SEL_DEFAULT = 001b +CDW10_SEL_SAVED = 010b +CDW10_SEL_SUPPORTED_CAPABILITIES = 011b + +; Feature Identifiers (FID) Values (Page 206 of NVMe 1.4 specification) +; Used in Get/Set Features Commands +FID_ARBITRATION = 0x01 +FID_POWER_MANAGEMENT = 0x02 +FID_LBA_RANGE_TYPE = 0x03 +FID_TEMPERATURE_THRESHOLD = 0x04 +FID_ERROR_RECOVERY = 0x05 +FID_VOLATILE_WRITE_CACHE = 0x06 +FID_NUMBER_OF_QUEUES = 0x07 +FID_INTERRUPT_COALESCING = 0x08 +FID_INTERRUPT_VECTOR_CONFIGURATION = 0x09 +FID_WRITE_ATOMICITY_NORMAL = 0x0A +FID_ASYNCHRONOUS_EVENT_CONFIGURATION = 0x0B +FID_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C +FID_HOST_MEMORY_BUFFER = 0x0D +FID_TIMESTAMP = 0x0E +FID_KEEP_ALIVE_TIMER = 0x0F +FID_HOST_CONTROLLED_THERMAL_MANAGEMENT = 0x10 +FID_NON_OPERATIONAL_POWER_STATE_CONFIG = 0x11 +FID_READ_RECOVERY_LEVEL_CONFIG = 0x12 +FID_PREDICTABLE_LATENCY_MODE_CONFIG = 0x13 +FID_PREDICTABLE_LATENCY_MODE_WINDOW = 0x14 +FID_LBA_STATUS_INFORMATION_REPORT_INTERVAL = 0x15 +FID_HOST_BEHAVIOR_SUPPORT = 0x16 +FID_SANITIZE_CONFIG = 0x17 +FID_ENDURANCE_GROUP_EVENT_CONFIGURATION = 0x18 +; NVM Command Set Specific - FID +FID_SOFTWARE_PROGRESS_MARKER = 0x80 +FID_HOST_IDENTIFIER = 0x81 +FID_RESERVATION_NOTIFICATION_MASK = 0x82 +FID_RESERVATION_PERSISTENCE = 0x83 +FID_NAMESPACE_WRITE_PROTECTION_CONFIG = 0x84 + +; Get Log Page - Log Page Identifiers (Page 118-119 of NVMe 1.4 specification) +LID_ERROR_INFORMATION = 0x01 +LID_SMARTHEALTH_INFORMATION = 0x02 +LID_FIRMWARE_SLOT_INFORMATION = 0x03 +LID_CHANGED_NAMESPACE_LIST = 0x04 +LID_COMMANDS_SUPPORTED_AND_EFFECTS = 0x05 +LID_DEVICE_SELF_TEST = 0x06 +LID_TELEMETRY_HOST_INITIATED = 0x07 +LID_TELEMETRY_CONTROLLER_INITIATED = 0x08 +LID_ENDURANCE_GROUP_INFORMATION = 0x09 +LID_PREDICTABLE_LATENCY_PER_NVM_SET = 0x0A +LID_PREDICTABLE_LATENCY_EVENT_AGGREGATE = 0x0B +LID_ASYMMETRIC_NAMESPACE_ACCESS = 0x0C +LID_PERSISTENT_EVENT_LOG = 0x0D +LID_LBA_STATUS_INFORMATION = 0x0E +LID_ENDURANCE_GROUP_EVENT_AGGREGATE = 0x0F +; I/O Command Set Specific - Log Page Identifiers +LID_RESERVATION_NOTIFICATION = 0x80 +LID_SANITIZE_STATUS = 0x81 + +; Controller Type Values +CNTRLTYPE_IO_CONTROLLER = 0x1 +CNTRLTYPE_DISCOVERY_CONTROLLER = 0x2 +CNTRLTYPE_ADMIN_CONTROLLER = 0x3 + +struct NVME_MMIO + CAP dq ? ; Controller Capabilities + VS dd ? ; Version + INTMS dd ? ; Interrupt Mask Set + INTMC dd ? ; Interrupt Mask Clear + CC dd ? ; Controller Configuration + rd 1 ; Reserved + CSTS dd ? ; Controller Status + NSSR dd ? ; NVM Subsystem Reset + AQA dd ? ; Admin Queue Attributes + ASQ dq ? ; Admin Submission Queue Base Address + ACQ dq ? ; Admin Completion Queue Base Address + CMBLOC dd ? ; Controller Memory Buffer Location + CMBSZ dd ? ; Controller Memory Buffer Size + BPINFO dd ? ; Boot Partition Information + BPRSEL dd ? ; Boot Partition Read Select + BPMBL dq ? ; Boot Partition Memory Buffer Location + CMBMSC dd ? ; Controller Memory Buffer Memory Space + CMBSTS dd ? ; Controller Memory Buffer Status + rb 3492 ; Reserved + PMRCAP dd ? ; Persistent Memory Capabilities + PMRCTL dd ? ; Persistent Memory Region Control + PMRSTS dd ? ; Persistent Memory Region Status + PMREBS dd ? ; Persistent Memory Region Elasticity Buffer Size + PMRSWTP dd ? ; Persistent Memory Region Sustained Write Throughput + PMRMSC dq ? ; Persistent Memory Region Controller Memory Space Control + rb 484 ; Reserved + SQ0TDBL dd ? ; Submission Queue 0 Tail Doorbell (Admin) +ends + + +; Submission Queue Entry (64 bytes) +struct SQ_ENTRY + cdw0 dd ? + nsid dd ? + cdw2 dd ? + cdw3 dd ? + mptr dq ? + prp1 dq ? + prp2 dq ? + cdw10 dd ? + cdw11 dd ? + cdw12 dd ? + cdw13 dd ? + cdw14 dd ? + cdw15 dd ? +ends + +; Completion Queue Entry (16 bytes) - See page 77 of the NVMe 1.4 spec +struct CQ_ENTRY + cdw0 dd ? + rd 1 ; reserved + sqhd dw ? + sqid dw ? + cid dw ? + status dw ? +ends + +struct NSINFO + capacity dq ? + size dq ? + nsid dd ? + pci dd ? + lbads db ? + features db ? +ends + +struct pcidev + bus db ? + devfn db ? + ipin db ? + iline db ? + num dd ? + io_addr dd ? + queue_entries dd ? + version dd ? + nsid dd ? + spinlock dd ? + nsinfo dd ? + nn dd ? + dstrd db ? + rb 3 ; align +ends +TOTAL_PCIDEVS = 4 +TOTAL_PCIDEVS_MALLOC_SZ = TOTAL_PCIDEVS * sizeof.pcidev + +struct NVMQCMD + cid dd ? + mutex_ptr MUTEX +ends + +struct NVM_QUEUE_ENTRY + tail dw ? + head dw ? + sq_ptr dd ? + cq_ptr dd ? + cmd_ptr dd ? +ends + +; Identify Controller Data Structure +struct IDENTC + + vid dw ? + ssvid dw ? + sn dt ?, ? + mn rt 4 + fr dq ? + rab db ? + ieee db ?, ?, ? + cmic db ? + mdts db ? + cntlid dw ? + ver dd ? + rtd3r dd ? + rtd3e dd ? + oaes dd ? + ctratt dd ? + rrls dw ? + rb 9 ; reserved + cntrltype db ? + fguid dq ?, ? + crdt1 dw ? + crdt2 dw ? + crdt3 dw ? + rb 106 ; reserved + rb 16 ; reserved (NVMMI) + oacs dw ? + acl db ? + aerl db ? + frmw db ? + lpa db ? + elpe db ? + npss db ? + avscc db ? + apsta db ? + wctemp dw ? + cctemp dw ? + mtfa dw ? + hmpre dd ? + hmmin dd ? + tnvmcap dq ?, ? + unvmcap dq ?, ? + rpmbs dd ? + edstt dw ? + dsto db ? + fwug db ? + kas dw ? + hctma dw ? + mntmt dw ? + mxtmt dw ? + sanicap dd ? + hmminds dd ? + hmmaxd dw ? + nsetidmax dw ? + endgidmax dw ? + anatt db ? + anacap db ? + anagrpmax dd ? + nanagrpid dd ? + pels dd ? + rb 156 + sqes db ? + cqes db ? + maxcmd dw ? + nn dd ? + oncs dw ? + fuses dw ? + fna db ? + vwc db ? + awun dw ? + awupf dw ? + nvscc db ? + nwpc db ? + acwu dw ? + rb 2 + sgls dd ? + mnan dd ? + rb 224 + subnqn rq 32 + rb 768 + rb 256 + psd0 rq 4 + psd1 rq 4 + psd2 rq 4 + psd3 rq 4 + psd4 rq 4 + psd5 rq 4 + psd6 rq 4 + psd7 rq 4 + psd8 rq 4 + psd9 rq 4 + psd10 rq 4 + psd11 rq 4 + psd12 rq 4 + psd13 rq 4 + psd14 rq 4 + psd15 rq 4 + psd16 rq 4 + psd17 rq 4 + psd18 rq 4 + psd19 rq 4 + psd20 rq 4 + psd21 rq 4 + psd22 rq 4 + psd23 rq 4 + psd24 rq 4 + psd25 rq 4 + psd26 rq 4 + psd27 rq 4 + psd28 rq 4 + psd29 rq 4 + psd30 rq 4 + psd31 rq 4 + rb 1024 +ends + +; Identify Namespace Data Structure +struct IDENTN + nsze dq ? + ncap dq ? + nuse dq ? + nsfeat db ? + nlbaf db ? + flbas db ? + mc db ? + dpc db ? + dps db ? + nmic db ? + rescap db ? + fpi db ? + dlfeat db ? + nawun dw ? + nawupf dw ? + nacwu dw ? + nabsn dw ? + nabo dw ? + nabspf dw ? + noiob dw ? + nvmcap dq ? + dq ? + npwg dw ? + npwa dw ? + npdg dw ? + npda dw ? + nows dw ? + rb 18 + anagrpid dd ? + rb 3 + nsattr db ? + nvmsetid dw ? + endgid dw ? + nguid dq ? + dq ? + eui64 dq ? + lbaf0 dd ? + lbaf1 dd ? + lbaf2 dd ? + lbaf3 dd ? + lbaf4 dd ? + lbaf5 dd ? + lbaf6 dd ? + lbaf7 dd ? + lbaf8 dd ? + lbaf9 dd ? + lbaf10 dd ? + lbaf11 dd ? + lbaf12 dd ? + lbaf13 dd ? + lbaf14 dd ? + lbaf15 dd ? + rb 3904 +ends + +; Namespace Granularity List (CNS 16h - Page 199 of NVMe specification 1.4) +struct NSGRANLS + + nga dd ? + nod db ? + rb 27 ; reserved + ngd0 dq ?, ? + ngd1 dq ?, ? + ngd2 dq ?, ? + ngd3 dq ?, ? + ngd4 dq ?, ? + ngd5 dq ?, ? + ngd6 dq ?, ? + ngd7 dq ?, ? + ngd8 dq ?, ? + ngd9 dq ?, ? + ngd10 dq ?, ? + ngd11 dq ?, ? + ngd12 dq ?, ? + ngd13 dq ?, ? + ngd14 dq ?, ? + ngd15 dq ?, ? + +ends + +assert NVM_ASQS = NVM_ACQS +assert SQ_ENTRIES = NVM_ASQS +assert CQ_ENTRIES = NVM_ACQS +assert NVM_MPS = 0 +assert PAGE_SIZE = 0x1000 +assert sizeof.NVME_MMIO = 4096 +assert sizeof.SQ_ENTRY = 64 +assert sizeof.CQ_ENTRY = 16 +assert sizeof.IDENTC = 4096 +assert sizeof.IDENTN = 4096 +assert sizeof.NSGRANLS = 288 +assert sizeof.NVMQCMD = 16 +assert SIZEOF_SQ_ENTRY = 6 +assert SIZEOF_CQ_ENTRY = 4 +assert SIZEOF_SQ_ENTRY = CC_DEFAULT_IOSQES shr 16 +assert SIZEOF_CQ_ENTRY = CC_DEFAULT_IOCQES shr 20 + +; NOTE: DO NOT CHANGE THIS ASSERTION! +; If you do decide to change it, you'll have +; to modify the source code manually since it +; uses bit shifts to multiply by the struct size +assert sizeof.NVM_QUEUE_ENTRY = 16 +assert SIZEOF_NVM_QUEUE_ENTRY = 4 +; vim: syntax=fasm