From d4441724ce6b98b018439a28b5b861f8798c8866 Mon Sep 17 00:00:00 2001 From: Abdur-Rahman Mansoor Date: Wed, 21 Aug 2024 11:05:20 -0400 Subject: [PATCH 1/5] feat: add NVMe driver --- drivers/nvme/command.inc | 269 ++++++++ drivers/nvme/lib.inc | 35 + drivers/nvme/macros.inc | 30 + drivers/nvme/nvme.asm | 1414 ++++++++++++++++++++++++++++++++++++++ drivers/nvme/nvme.inc | 591 ++++++++++++++++ 5 files changed, 2339 insertions(+) create mode 100644 drivers/nvme/command.inc create mode 100644 drivers/nvme/lib.inc create mode 100644 drivers/nvme/macros.inc create mode 100644 drivers/nvme/nvme.asm create mode 100644 drivers/nvme/nvme.inc diff --git a/drivers/nvme/command.inc b/drivers/nvme/command.inc new file mode 100644 index 0000000000..e40c264889 --- /dev/null +++ b/drivers/nvme/command.inc @@ -0,0 +1,269 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;; GNU GENERAL PUBLIC LICENSE ;; +;; Version 2, June 1991 ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +proc set_cdw0 stdcall, pci:dword, y:dword, opcode:byte + + stdcall get_new_cid, [pci], [y] + shl eax, 16 + or al, [opcode] + ret + +endp + +; See pages 161-205 of the NVMe 1.4 specification for reference +proc nvme_identify stdcall, pci:dword, nsid:dword, prp1:dword, cns:byte + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + + mov eax, [nsid] + mov dword [esp + SQ_ENTRY.nsid], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_IDENTIFY + mov dword [esp + SQ_ENTRY.cdw0], eax + mov al, [cns] + mov byte [esp + SQ_ENTRY.cdw10], al + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 101 of the NVMe 1.4 specification for reference +proc create_io_completion_queue stdcall, pci:dword, prp1:dword, qid:dword, ien:byte + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_CRE_IO_COMPLETION_QUEUE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + mov eax, CQ_ENTRIES shl 16 ; CDW10.QSIZE + or eax, [qid] ; CDW10.QID + mov dword [esp + SQ_ENTRY.cdw10], eax + movzx eax, [ien] ; CDW11.IEN + or eax, 0x1 ; CDW11.PC + ; Don't set CDW11.IV since we're not using MSI-X or MSI vector + mov dword [esp + SQ_ENTRY.cdw11], eax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 103-104 of the NVMe 1.4 specification for reference +proc create_io_submission_queue stdcall, pci:dword, prp1:dword, qid:dword, cqid:word + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_CRE_IO_SUBMISSION_QUEUE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + mov eax, SQ_ENTRIES shl 16 ; CDW10.QSIZE + or eax, [qid] + mov dword [esp + SQ_ENTRY.cdw10], eax + movzx eax, [cqid] + shl eax, 16 ; CDW11.CQID + or eax, 0x1 ; CDW11.PC (always set this to 1 as some devices may not support non-contiguous pages) + ; TODO: Set CDW10.QPRIO + mov dword [esp + SQ_ENTRY.cdw11], eax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 95-96 of the NVMe 1.4 specification for reference +proc abort stdcall, pci:dword, cid:word, sqid:word + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_ABORT + mov dword [esp + SQ_ENTRY.cdw0], eax + movzx eax, [cid] + shl eax, 16 + or eax, word [sqid] + mov dword [esp + SQ_ENTRY.cdw10], eax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + + +; See page 205 of the NVMe 1.4 specification for reference +proc set_features stdcall, pci:dword, prp1:dword, fid:byte, cdw11:dword + + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_SET_FEATURES + mov dword [esp + SQ_ENTRY.cdw0], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + movzx eax, [fid] + ;or eax, 1 shl 31 ; CDW10.SV + mov dword [esp + SQ_ENTRY.cdw10], eax + mov eax, [cdw11] + mov dword [esp + SQ_ENTRY.cdw11], eax + stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + ret + +endp + +; See page 105 of the NVMe 1.4 specification for reference +proc delete_io_completion_queue stdcall, pci:dword, qid:word + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_DEL_IO_COMPLETION_QUEUE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov ax, [qid] + mov word [esp + SQ_ENTRY.cdw10], ax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 114-116 of the NVMe 1.4 specification for reference +proc get_features stdcall, pci:dword, prp1:dword, sel:byte, fid:byte + + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_GET_FEATURES + mov dword [esp + SQ_ENTRY.cdw0], eax + movzx eax, [sel] + and eax, 111b + shl eax, 8 ; CDW10.SEL + or eax, byte [fid] ; CDW10.FID + mov dword [esp + SQ_ENTRY.cdw10], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + ; TODO: Implement CDW14.UUID? + stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + ret + +endp + +; See page 105-106 of the NVMe 1.4 specification for reference +proc delete_io_submission_queue stdcall, pci:dword, qid:word + + push esi + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, esi, ADMIN_QUEUE, ADM_CMD_DEL_IO_SUBMISSION_QUEUE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov ax, [qid] + mov word [esp + SQ_ENTRY.cdw10], ax + stdcall sqytdbl_write, esi, ADMIN_QUEUE, esp + add esp, sizeof.SQ_ENTRY + stdcall nvme_poll, esi + pop esi + ret + +endp + +; See page 117-118 of the NVMe 1.4 specification for reference +; INCOMPLETE +proc get_log_page stdcall, pci:dword, prp1:dword, lid:byte + + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_GET_LOG_PAGE + mov dword [esp + SQ_ENTRY.cdw0], eax + mov eax, [prp1] + mov dword [esp + SQ_ENTRY.prp1], eax + add esp, sizeof.SQ_ENTRY + ret + +endp + +; See pages 348-349 of the NVMe 1.4 specification for information on creating namespaces +proc create_namespace stdcall, pci:dword, cid:word + + push esi + invoke AllocPage + test eax, eax + jz .fail + invoke GetPhysAddr + stdcall nvme_identify, [pci], 0xffffffff, eax, CNS_IDNS + test eax, eax + jz .fail + +.fail: + pop esi + ret + +endp + +; See page 258-261 (read) and 269-271 (write) of the NVMe 1.4 specification for reference +proc nvme_io_rw stdcall, pci:dword, qid:word, nsid:dword, prps:qword, slba:qword, nlb:dword, opcode:dword + + ; TODO: Use IDENTC.NOIOB to construct read/write commands that don't + ; cross the I/O boundary to achieve optimal performance + ; + ; TODO: Read AWUN/NAWUN + sub esp, sizeof.SQ_ENTRY + stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 + movzx ecx, [qid] + stdcall set_cdw0, [pci], ecx, [opcode] + mov dword [esp + SQ_ENTRY.cdw0], eax ; CDW0 + mov eax, dword [prps] + mov dword [esp + SQ_ENTRY.prp1], eax + mov eax, dword [prps + 4] + mov dword [esp + SQ_ENTRY.prp2], eax + mov eax, [nsid] + mov dword [esp + SQ_ENTRY.nsid], eax + mov eax, dword [slba] ; slba_lo + mov dword [esp + SQ_ENTRY.cdw10], eax + mov eax, dword [slba + 4] ; slba_hi + mov dword [esp + SQ_ENTRY.cdw11], eax + mov eax, [nlb] + mov word [esp + SQ_ENTRY.cdw12], ax + movzx ecx, [qid] + stdcall sqytdbl_write, [pci], ecx, esp + add esp, sizeof.SQ_ENTRY + ret + +endp + +; vim: syntax=fasm diff --git a/drivers/nvme/lib.inc b/drivers/nvme/lib.inc new file mode 100644 index 0000000000..233a33492e --- /dev/null +++ b/drivers/nvme/lib.inc @@ -0,0 +1,35 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;; GNU GENERAL PUBLIC LICENSE ;; +;; Version 2, June 1991 ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +proc memsetdz stdcall, dest:dword, sz:dword + + push edi + mov edi, [dest] + mov ecx, [sz] + xor eax, eax + rep stosd + pop edi + ret + +endp + +proc memcpyd stdcall, dest:dword, src:dword, sz:dword + + push esi edi + mov esi, [src] + mov edi, [dest] + mov ecx, [sz] + rep movsd + pop edi esi + ret + +endp + +; vim: syntax=fasm diff --git a/drivers/nvme/macros.inc b/drivers/nvme/macros.inc new file mode 100644 index 0000000000..e08e9f347b --- /dev/null +++ b/drivers/nvme/macros.inc @@ -0,0 +1,30 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;; GNU GENERAL PUBLIC LICENSE ;; +;; Version 2, June 1991 ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +macro PDEBUGF _level*, _fmt*, _bus*, _devfn*, [_args] { + common + if __DEBUG__ + sub esp, 12 + push ebx + movzx ebx, _bus + mov dword [esp + 4], ebx + movzx ebx, _devfn + shr ebx, 3 ; get rid of 3 lowest bits (function code), the rest bits is device code + mov dword [esp + 8], ebx + movzx ebx, _devfn + and ebx, 00000111b ; get only 3 lowest bits (function code) + mov dword [esp + 12], ebx + pop ebx + DEBUGF _level, _fmt, [esp], [esp + 4], [esp + 8], _args + add esp, 12 + end if +} + +; vim: syntax=fasm diff --git a/drivers/nvme/nvme.asm b/drivers/nvme/nvme.asm new file mode 100644 index 0000000000..5da60d2521 --- /dev/null +++ b/drivers/nvme/nvme.asm @@ -0,0 +1,1414 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;; GNU GENERAL PUBLIC LICENSE ;; +;; Version 2, June 1991 ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +format PE DLL native +entry START + +API_VERSION = 0 ;debug +SRV_GETVERSION = 0 +__DEBUG__ = 1 +__DEBUG_LEVEL__ = 1 +DRIVER_VERSION = 1 +DBG_INFO = 1 +NULLPTR = 0 +FALSE = 0 +TRUE = 1 + +section ".flat" code readable writable executable +include "../proc32.inc" +include "../struct.inc" +include "../macros.inc" +include "../fdo.inc" +include "../pci.inc" +include "../peimport.inc" +include "nvme.inc" +include "macros.inc" +include "lib.inc" +include "command.inc" + +struct DISKMEDIAINFO + flags dd ? + sectorsize dd ? + capacity dq ? +ends + +proc START c, reason:dword, cmdline:dword +local AnythingLoadedSuccessfully db 0 + + push ebx esi edi + cmp [reason], DRV_ENTRY + jne .err + +.entry: + DEBUGF DBG_INFO, "Detecting NVMe device...\n" + call detect_nvme + test eax, eax + jz .err + xor ebx, ebx + mov esi, dword [p_nvme_devices] + test esi, esi + jz .err + sub esi, sizeof.pcidev + +.loop: + add esi, sizeof.pcidev + push ebx esi + stdcall device_is_compat, esi + test eax, eax + jz .pop + stdcall nvme_init, esi + test eax, eax + jz .pop + pop esi ebx + stdcall add_nvme_disk, esi + jmp .next + +.pop: + pop esi ebx + +.next: + test eax, eax + setne [AnythingLoadedSuccessfully] + inc ebx + cmp ebx, dword [pcidevs_len] + jne .loop + cmp [AnythingLoadedSuccessfully], 0 + jz .err + invoke RegService, my_service, service_proc + pop edi esi ebx + ret + +.err: + call nvme_cleanup + pop edi esi ebx + ret + +endp + +proc service_proc stdcall, ioctl:dword + + mov esi, [ioctl] + mov eax, [esi + IOCTL.io_code] + cmp eax, SRV_GETVERSION + jne .ret + + mov eax, [esi + IOCTL.output] + cmp [esi + IOCTL.out_size], 4 + jne .ret + mov dword [eax], API_VERSION + xor eax, eax + ret + +.ret: + or eax, -1 + ret + +endp + +; Registers the NVMe disk into KolibriOS. This requires that the +; device was successfully initialized by nvme_init, otherwise this +; has will have undefined behavior. +proc add_nvme_disk stdcall, pci:dword + + push esi + mov esi, [pci] + + ; NOTE: If the pcidev.num or pcidev.nsid is more than 9 then + ; this fails to build the string correctly. Ignoring this issue + ; for now since who has more than 9 NVMe SSDs on a desktop computer + ; and a NSID bigger than 9 is also unlikely. + ; + ; Still, will address this problem in the future. + push 0 ; null terminator + movzx eax, byte [esi + pcidev.nsid] + add al, "0" + mov byte [esp], al + dec esp + mov byte [esp], "n" + dec esp + movzx eax, byte [esi + pcidev.num] + add al, "0" + mov byte [esp], al + push "nvme" + mov eax, esp + invoke DiskAdd, disk_functions, eax, [esi + pcidev.nsinfo], 0 + add esp, 10 + test eax, eax + jz @f + invoke DiskMediaChanged, eax, 1 + DEBUGF DBG_INFO, "nvme%un%u: Successfully registered disk\n", [esi + pcidev.num], [esi + pcidev.nsid] + xor eax, eax + inc eax + pop esi + ret + +@@: + DEBUGF DBG_INFO, "nvme%un%u: Failed to register disk\n", [esi + pcidev.num], [esi + pcidev.nsid] + xor eax, eax + pop esi + ret + +endp + +proc nvme_query_media stdcall, userdata:dword, info:dword + + push ebx esi edi + mov esi, [userdata] + mov ebx, dword [esi + NSINFO.pci] + mov edi, [info] + mov dword [edi + DISKMEDIAINFO.flags], 0 + mov cl, byte [esi + NSINFO.lbads] + xor eax, eax + inc eax + shl eax, cl + DEBUGF DBG_INFO, "nvme%un%u (Query Media): Sector size = %u\n", [ebx + pcidev.num], [esi + NSINFO.nsid], eax + mov dword [edi + DISKMEDIAINFO.sectorsize], eax + mov eax, dword [esi + NSINFO.capacity] + mov dword [edi + DISKMEDIAINFO.capacity], eax + mov eax, dword [esi + NSINFO.capacity + 4] + mov dword [edi + DISKMEDIAINFO.capacity + 4], eax + DEBUGF DBG_INFO, "nvme%un%u (Query Media): Capacity = %u + %u sectors\n", [ebx + pcidev.num], [esi + NSINFO.nsid], [esi + NSINFO.capacity], [esi + NSINFO.capacity + 4] + xor eax, eax + pop edi esi ebx + ret + +endp + +; returns 1 if the given NSID is a an active NSID, returns +; 0 otherwise +proc is_active_namespace stdcall, pci:dword, nsid:dword + + push esi edi + invoke KernelAlloc, 0x1000 + test eax, eax + jnz @f + pop edi esi + ret + +@@: + mov esi, eax + invoke GetPhysAddr + stdcall nvme_identify, [pci], [nsid], eax, CNS_IDNS + test eax, eax + jz .not_active_nsid + xor ecx, ecx + +@@: + mov eax, dword [esi + ecx * 4] + test eax, eax + jnz .is_active_nsid + inc ecx + cmp ecx, 0x1000 / 4 + jne @b + +.not_active_nsid: + invoke KernelFree, esi + pop edi esi + xor eax, eax + ret + +.is_active_nsid: + invoke KernelFree, esi + pop edi esi + xor eax, eax + inc eax + ret + +endp + +; See page 248 of the NVMe 1.4 specification for reference +; Returns the number of namespaces that are active, note this +; doesn't mean if EAX = 5, then namespaces 1-5 will be active. +; This also sets [pci + pcidev.nn] and [pci + pcidev.nsids] +; to their appropriate values. +proc determine_active_nsids stdcall, pci:dword + + push ebx esi + mov esi, [pci] + xor ebx, ebx + xor ecx, ecx + inc ecx + +.loop: + cmp ecx, dword [esi + pcidev.nn] + ja .ret + push ecx + stdcall is_active_namespace, [pci], ecx + pop ecx + test eax, eax + jz .not_active_namespace + mov ebx, ecx + jmp .ret + +.not_active_namespace: + inc ecx + jmp .loop + +.ret: + pop edi esi + mov eax, ebx + ret + +endp + +; Allocates prp_list_ptr and creates a PRP list there. nprps should +; be set appropriately to the number of PRPs the caller wants to create. +; +; This function should only be called if the conditions for building +; a PRP list are met (see page 68 of the NVMe 1.4.0 spec). +; +; TODO: Currently the code for building recursive PRP lists is untested. +; If you want to test it, do a read/write with a sector count equivalant +; to more than 4MiB. Will test in the future. +proc build_prp_list stdcall, nprps:dword, buf:dword, prp_list_ptr:dword + + push esi ebx edi + sub esp, 4 + + ; stack: + ; [esp]: virtual pointer to first PRP list + ; here, we store the pointer to the very first + ; PRP list so that free_prp_list can free the + ; entire PRP list if something goes wrong, it + ; also serves as our return value placeholder + mov dword [esp], 0 + + xor edi, edi + xor esi, esi + mov ecx, [nprps] + shl ecx, 3 ; multiply by 8 since each PRP pointer is a QWORD + + ; we'll store consecutive PRP list buffers here, for example + ; given 2 PRP lists, we allocate 2 continuous pages + push ecx + invoke KernelAlloc, ecx ; store pointers to the PRP entries here + pop ecx + test eax, eax + jz .err + mov dword [esp], eax + mov edi, eax + mov eax, [prp_list_ptr] + mov dword [eax], edi + shr ecx, 1 + stdcall memsetdz, edi, ecx + + ; note we assume buf is page-aligned + mov esi, [buf] + +.build_prp_list: + ; ensure we don't cross a page boundary + mov ebx, [nprps] + cmp ebx, PAGE_SIZE / 8 + jb @f + mov ebx, PAGE_SIZE / 8 + sub [nprps], ebx + +@@: + xor ecx, ecx + cmp dword [esp], edi + je .loop + + ; we need to store the pointer of the next + ; PRP list to the previous PRP list last entry + mov eax, edi + invoke GetPhysAddr + mov dword [edi - 8], eax + mov dword [edi - 4], 0 + +.loop: + mov eax, esi + invoke GetPhysAddr + mov dword [edi + ecx * 8], eax + mov dword [edi + ecx * 8 - 4], 0 + add esi, PAGE_SIZE + inc ecx + cmp ecx, ebx + jne .loop + + ; check if we we need to build another PRP list + add edi, PAGE_SIZE + cmp ebx, PAGE_SIZE / 8 + je .build_prp_list + + ; PRP list successfully created + mov eax, dword [esp] + invoke GetPhysAddr + add esp, 4 + pop edi ebx esi + ret + +.err: + add esp, 4 + pop edi ebx esi + xor eax, eax + ret + +endp + +; Allocates PRP1/PRP2. Note that it is not required to call this function +; unless you're doing read and writes with an arbitrary buffer that the +; kernel passes to driver. In most other cases, it's better to just allocate a +; page-aligned buffer. +; +; ns: Pointer to the device's respective namespace struct +; +; prps_ptr: should be a pointer to at least 2 DWORDS (PRP1 and PRP2 respectively), +; the caller is allowed to not initialize PRP1, however PRP2 should explicitly be +; initialized to 0. +; +; prp_list_ptr: pointer to 1 DWORD, the caller must initialize this value to 0. +; If a PRP list is allocated, then prp_list_ptr shall contain the pointer to +; the PRP list. The caller is required to free the allocated memory afterwards. +; +; buf: Pointer to the buffer +; +; On success, the function will return 1 and the PRPs will be initialized. If an +; error occurs (most likely due to memory allocation), the function returns 0. +proc alloc_dptr stdcall, ns:dword, prps_ptr:dword, numsectors:dword, prp_list_ptr:dword, buf:dword + + push ebx esi edi + mov esi, [ns] + mov edi, [prps_ptr] + mov eax, [buf] + invoke GetPhysAddr + mov dword [edi], eax + mov cl, byte [esi + NSINFO.lbads] + mov ebx, PAGE_SIZE + shr ebx, cl + mov edx, [numsectors] + + ; is the buffer offset portion equal to 0? + mov eax, [buf] + mov ecx, eax + and eax, PAGE_SIZE - 1 + mov eax, ebx + jnz @f + + ; is the number of sectors less than or equal to one memory page? + cmp edx, ebx + jbe .success + shl ebx, 1 ; it is page aligned, so set ebx to 2 memory pages + +@@: + ; is the number of sectors greater than one or two memory pages? + cmp edx, ebx + ja .build_prp_list + + ; set PRP2 + mov eax, ecx + and eax, not (PAGE_SIZE - 1) + add eax, PAGE_SIZE + invoke GetPhysAddr + mov dword [edi + 4], eax + jmp .success + +.build_prp_list: + mov ebx, ecx + mov ecx, eax + and ebx, not (PAGE_SIZE - 1) + add ebx, PAGE_SIZE + mov eax, [numsectors] + xor edx, edx + div ecx + stdcall build_prp_list, eax, ebx, [prp_list_ptr] + test eax, eax + jz .err + mov dword [edi + 4], eax + +.success: + xor eax, eax + inc eax + pop edi esi ebx + ret + +.err: + xor eax, eax + pop edi esi ebx + ret + +endp + +nvme_read: + mov edx, NVM_CMD_READ + jmp nvme_readwrite + +nvme_write: + mov edx, NVM_CMD_WRITE + +; Reads from/writes to the disk +proc nvme_readwrite stdcall, ns:dword, buf:dword, start_sector:qword, numsectors_ptr:dword + + push ebx esi edi + sub esp, 20 + + ; TODO: check if numsectors exceeds IDENTC.MDTS? + + ; stack: + ; [esp] - PRP1 + ; [esp + 4] - PRP2 + ; [esp + 8] - command type (read or write) + ; [esp + 12] - original numsectors value + ; [esp + 16] - virtual pointer to PRP2 PRP list (if allocated, 0 if not) + mov ebx, esp + + mov esi, [ns] + mov edi, [buf] + + mov eax, [numsectors_ptr] + mov eax, dword [eax] + DEBUGF DBG_INFO, "buf: %x, start_sector: %u:%u, numsectors: %u\n", [buf], [start_sector + 4], [start_sector], eax + mov dword [ebx + 4], 0 ; PRP2 entry (0 by default) + mov dword [ebx + 8], edx ; command type (read or write) + mov dword [ebx + 12], eax ; save original numsectors value + mov dword [ebx + 16], 0 ; virtual pointer to PRP2 PRP list (not allocated by default) + + mov ecx, ebx + add ecx, 16 + + ; Note that [esp] will contain the value of PRP1 and [esp + 4] will + ; contain the value of PRP2. If PRP2 is a PRP list, then [esp + 16] will point + ; to the allocated PRP list (after this call, only if it completes successfully) + stdcall alloc_dptr, esi, ebx, eax, ecx, [buf] + test eax, eax + jz .fail + + DEBUGF DBG_INFO, "PRP1: %x, PRP2: %x\n", [ebx], [ebx + 4] + mov eax, dword [start_sector] + + ; According to the NVMe specification, the NLB field in the I/O read and write + ; commands is a 0-based value (i.e., 0 is equivalant to 1, 1 is equivalant to 2, ...) + ; As far as I know, KolibriOS doesn't follow this mechanism so let's just decrement the + ; value and it should have the same effect. + mov ecx, dword [ebx + 12] + dec ecx + + ; TODO: add non-blocking mechanisms later on + push eax + mov eax, dword [esi + NSINFO.pci] + mov dword [eax + pcidev.spinlock], 1 + pop eax + stdcall nvme_io_rw, [esi + NSINFO.pci], \ + 1, \ + [esi + NSINFO.nsid], \ + dword [ebx], \ + dword [ebx + 4], \ + eax, \ + dword [start_sector + 4], \ + ecx, \ + dword [ebx + 8] + + ; TODO: add non-blocking mechanisms later on + stdcall nvme_poll, [esi + NSINFO.pci] + test eax, eax + jz .fail + + ; free PRP list (if allocated) + mov eax, dword [ebx + 16] + test eax, eax + jz @f + invoke KernelFree, eax + +@@: + xor eax, eax + add esp, 20 + pop edi esi ebx + ret + +.fail: + ; free PRP list (if allocated) + mov eax, dword [ebx + 16] + test eax, eax + jz @f + invoke KernelFree, eax + +@@: + mov ebx, [numsectors_ptr] + mov dword [ebx], 0 + add esp, 20 + pop edi esi ebx + or eax, -1 ; generic disk error + ret + +endp + +; Detects NVMe devices on the PCI bus and stores them into +; [p_nvme_devices] and sets [pcidevs_len] to the appropriate +; size based off how many NVMe devices there are. +proc detect_nvme + + invoke GetPCIList + mov esi, eax + mov ebx, eax + +.check_dev: + mov eax, dword [esi + PCIDEV.class] + and eax, 0x00ffff00 ; retrieve class/subclass code only + cmp eax, 0x00010800 ; Mass Storage Controller - Non-Volatile Memory Controller + je .found_dev + +.next_dev: + mov esi, dword [esi + PCIDEV.fd] + cmp esi, ebx + jne .check_dev + +.exit_success: + xor eax, eax + inc eax + ret + +.found_dev: + ; skip PCIDEV.owner check if the PCI device pointer has already been + ; allocated (without this check, more than 1 NVMe device cannot be + ; registered) + mov eax, dword [p_nvme_devices] + test eax, eax + jnz @f + cmp dword [esi + PCIDEV.owner], 0 + jnz .err + +@@: + PDEBUGF DBG_INFO, "PCI(%u.%u.%u): Detected NVMe device...\n", [esi + PCIDEV.bus], [esi + PCIDEV.devfn] + cmp dword [pcidevs_len], TOTAL_PCIDEVS + jne @f + DEBUGF DBG_INFO, "Can't add any more NVMe devices...\n" + jmp .exit_success + +@@: + inc dword [pcidevs_len] + cmp dword [p_nvme_devices], 0 + jnz @f ; was the pointer already allocated? + invoke KernelAlloc, sizeof.pcidev * TOTAL_PCIDEVS + test eax, eax + jz .err + mov dword [p_nvme_devices], eax + mov dword [esi + PCIDEV.owner], eax + DEBUGF DBG_INFO, "nvme: Allocated memory for PCI devices at: 0x%x\n", eax + +@@: + mov ecx, dword [pcidevs_len] + dec ecx + mov edi, dword [p_nvme_devices] + mov edx, ecx + imul edx, sizeof.pcidev + lea edi, [edi + edx] + + movzx eax, byte [esi + PCIDEV.bus] + mov byte [edi + pcidev.bus], al + movzx eax, byte [esi + PCIDEV.devfn] + mov byte [edi + pcidev.devfn], al + mov dword [edi + pcidev.num], ecx + + jmp .next_dev + +.err: + xor eax, eax + ret + +endp + +; Returns 1 if the NVMe device is compatible. 0 otherwise. In practice, the driver +; is compatible with (hopefully) most compliant controllers. This also does some +; initialization for some reason, due to bad design decisions made in the beginning +; but since the code works I haven't felt inclined to change it. +proc device_is_compat stdcall, pci:dword + + push esi edx ecx + mov esi, [pci] + invoke PciRead8, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.interrupt_line + mov byte [esi + pcidev.iline], al + invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.base_addr_0 + and eax, 0xfffffff0 + test eax, eax + jz .failure + mov edx, eax + + invoke MapIoMem, eax, 0x2000, PG_SW+PG_NOCACHE + test eax, eax + jz .failure + mov dword [esi + pcidev.io_addr], eax + mov eax, dword [eax + NVME_MMIO.CAP + 4] + and eax, CAP_DSTRD + mov byte [esi + pcidev.dstrd], al + mov eax, dword [esi + pcidev.io_addr] + mov eax, dword [eax + NVME_MMIO.VS] + DEBUGF DBG_INFO, "nvme%u: Controller version: 0x%x\n", [esi + pcidev.num], eax + mov dword [esi + pcidev.version], eax + pop ecx edx esi + xor eax, eax + inc eax + ret + +.failure: + PDEBUGF DBG_INFO, "PCI(%u.%u.%u): something went wrong checking NVMe device compatibility\n", byte [esi + pcidev.bus], byte [esi + pcidev.devfn] + pop ecx edx esi + xor eax, eax + ret + +endp + +; nvme_init: Initializes the NVMe controller, I/O queues, and namespaces. +proc nvme_init stdcall, pci:dword + + push ebx esi edi + mov esi, dword [pci] + + ; Check the PCI header to see if interrupts are disabled, if so + ; we have to re-enable them + invoke PciRead16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.command + and eax, not (1 shl 10) + ; Enable Bus Master bit, memory space access, and I/O space access. QEMU automatically sets the + ; bus master bit, but Virtualbox does not. Not sure about the other bits though, but let's set them + ; to 1 to anyway just to be extra cautious. + ; See: https://git.kolibrios.org/GSoC/kolibrios-nvme-driver/issues/1#issuecomment-467 + or eax, (1 shl 2) or (1 shl 1) or 1 + invoke PciWrite16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.command, eax + + ; Check if the device has a pointer to the capabilities list (status register bit 4 set to 1) + ; though this check is probably unnecessary since all PCIe devices should have this bit set to 1 + invoke PciRead16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.status + test ax, (1 shl 4) + jz .exit_fail + + invoke PciRead8, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.cap_ptr + and eax, 0xfc ; bottom two bits are reserved, so mask them before we access the configuration space + mov edi, eax + DEBUGF DBG_INFO, "nvme%u: Checking capabilities...\n", [esi + pcidev.num] + +; We need to check if there are any MSI/MSI-X capabilities, and if so, make sure they're disabled since +; we're using old fashioned pin-based interrupts (for now) +.read_cap: + invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + add edi, 2 + cmp al, MSICAP_CID + je .got_msi_cap + cmp al, MSIXCAP_CID + je .got_msix_cap + movzx edi, ah + test edi, edi + jnz .read_cap + DEBUGF DBG_INFO, "nvme%u: MSI/MSI-X capability not found\n", [esi + pcidev.num] + jmp .end_cap_parse + +.got_msi_cap: + DEBUGF DBG_INFO, "nvme%u: Found MSI capability\n", [esi + pcidev.num] + invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + and eax, not MSICAP_MSIE + invoke PciWrite32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + jmp .end_cap_parse + +.got_msix_cap: + DEBUGF DBG_INFO, "nvme%u: Found MSI-X capability\n", [esi + pcidev.num] + invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + and eax, not MSIXCAP_MXE + invoke PciWrite32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], edi + +.end_cap_parse: + mov edi, dword [esi + pcidev.io_addr] + + ; check maximum queue entries supported + mov eax, dword [edi + NVME_MMIO.CAP] + DEBUGF DBG_INFO, "nvme%u: Maximum queue entries available is %u (required: %u)\n", [esi + pcidev.num], ax, SQ_ENTRIES + cmp ax, SQ_ENTRIES + jb .exit_fail + + if __DEBUG__ + test eax, CAP_CQR + setnz al + DEBUGF DBG_INFO, "nvme%u: Contiguous queues required: %u\n", [esi + pcidev.num], al + end if + + ; Check if NVM command set is supported + mov eax, dword [edi + NVME_MMIO.CAP + 4] + DEBUGF DBG_INFO, "nvme%u: Checking if NVM command set is supported...\n", [esi + pcidev.num] + test eax, CAP_CSS_NVM_CMDSET + jz .exit_fail + DEBUGF DBG_INFO, "nvme%u: OK... NVM command set supported\n", [esi + pcidev.num] + + stdcall nvme_disable_ctrl, esi + DEBUGF DBG_INFO, "nvme%u: Checking if memory page size is supported...\n", [esi + pcidev.num] + mov eax, dword [edi + NVME_MMIO.CAP + 4] + mov edx, eax + and edx, CAP_MPSMIN + shr edx, 16 + cmp edx, NVM_MPS + ja .exit_fail + and eax, CAP_MPSMAX + shr eax, 20 + cmp eax, NVM_MPS + jb .exit_fail + DEBUGF DBG_INFO, "nvme%u: OK... memory page size supported\n", [esi + pcidev.num] + + ; Configure IOSQES, IOCQES, AMS, MPS, CSS + ; CSS = 0 (NVM Command Set) + ; AMS = 0 (Round Robin) + ; MPS = 0 (4KiB Pages) + ; IOSQES = 6 (64B) + ; IOCQES = 4 (16B) + xor eax, eax + or eax, CC_DEFAULT_IOSQES or CC_DEFAULT_IOCQES + mov dword [edi + NVME_MMIO.CC], eax + DEBUGF DBG_INFO, "nvme%u: OK... controller is configured to appropriate settings\n", [esi + pcidev.num] + + ; Configure Admin Queue Attributes + xor eax, eax + or eax, NVM_ASQS or (NVM_ACQS shl 16) + mov dword [edi + NVME_MMIO.AQA], eax + DEBUGF DBG_INFO, "nvme%u: Admin queue attributes: 0x%x\n", [esi + pcidev.num], eax + + ; Allocate list of queues + DEBUGF DBG_INFO, "nvme%u: Allocating Administrator and I/O queues...\n",, [esi + pcidev.num] + invoke KernelAlloc, sizeof.NVM_QUEUE_ENTRY * (LAST_QUEUE_ID + 1) + test eax, eax + jz .exit_fail + mov dword [esi + pcidev.queue_entries], eax + mov edi, eax + stdcall memsetdz, eax, sizeof.NVM_QUEUE_ENTRY * (LAST_QUEUE_ID + 1) / 4 + + ; Allocate submission/completion queue pointers + xor ebx, ebx + +.init_queues: + invoke KernelAlloc, QUEUE_ALLOC_SIZE + test eax, eax + jz .exit_fail + DEBUGF DBG_INFO, "nvme%u: Allocated queue at offset %u: 0x%x\n", [esi + pcidev.num], ebx, eax + mov dword [edi + ebx + NVM_QUEUE_ENTRY.cq_ptr], eax + mov edx, eax + add eax, CQ_ALLOC_SIZE + mov dword [edi + ebx + NVM_QUEUE_ENTRY.sq_ptr], eax + stdcall memsetdz, edx, QUEUE_ALLOC_SIZE / 4 + + ; Initialize command entries + invoke KernelAlloc, sizeof.NVMQCMD * CQ_ENTRIES + test eax, eax + jz .exit_fail + mov dword [edi + ebx + NVM_QUEUE_ENTRY.cmd_ptr], eax + push ebx esi + mov esi, eax + xor ebx, ebx + +.init_cmd_entries: + invoke KernelAlloc, sizeof.MUTEX + test eax, eax + jz .exit_fail_cleanup + mov dword [esi + NVMQCMD.mutex_ptr], eax + mov dword [esi + NVMQCMD.cid], ebx + mov ecx, eax + invoke MutexInit + inc ebx + add esi, sizeof.NVMQCMD + cmp ebx, CQ_ENTRIES + jne .init_cmd_entries + + pop esi ebx + add ebx, sizeof.NVM_QUEUE_ENTRY + cmp ebx, (LAST_QUEUE_ID + 1) * sizeof.NVM_QUEUE_ENTRY + jne .init_queues + + ; Configure Admin Completion Queue Base Address + mov esi, [pci] + mov esi, dword [esi + pcidev.io_addr] + mov eax, dword [edi + NVM_QUEUE_ENTRY.cq_ptr] + invoke GetPhysAddr + mov dword [esi + NVME_MMIO.ACQ], eax + mov dword [esi + NVME_MMIO.ACQ + 4], 0 + if __DEBUG__ + push esi + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Admin completion queue base address: 0x%x\n", [esi + pcidev.num], eax + pop esi + end if + + ; Configure Admin Submission Queue Base Address + mov eax, dword [edi + NVM_QUEUE_ENTRY.sq_ptr] + invoke GetPhysAddr + mov dword [esi + NVME_MMIO.ASQ], eax + mov dword [esi + NVME_MMIO.ASQ + 4], 0 + if __DEBUG__ + push esi + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Admin submission queue base address: 0x%x\n", [esi + pcidev.num], eax + pop esi + end if + + ; Attach interrupt handler + mov esi, [pci] + movzx eax, byte [esi + pcidev.iline] + DEBUGF DBG_INFO, "nvme%u: Attaching interrupt handler to IRQ %u\n", [esi + pcidev.num], eax + invoke AttachIntHandler, eax, irq_handler, 0 + test eax, eax + jz .exit_fail + DEBUGF DBG_INFO, "nvme%u: Successfully attached interrupt handler\n", [esi + pcidev.num] + + ; Restart the controller + stdcall nvme_enable_ctrl, esi + + invoke KernelAlloc, 0x1000 + test eax, eax + jz .exit_fail + mov edi, eax + invoke GetPhysAddr + ; pci:dword, nsid:dword, dptr:dword, cns:byte + stdcall nvme_identify, [pci], 0, eax, CNS_IDCS + test eax, eax + jz .exit_fail + mov eax, dword [edi + IDENTC.nn] + mov dword [esi + pcidev.nn], eax + DEBUGF DBG_INFO, "nvme%u: Namespace Count: %u\n", [esi + pcidev.num], eax + + ; Note that the specification only allows ASCII strings that contain code + ; values between 0x20 (' ') and 0x7E ('~'). Strings are left justified and + ; padded with spaces (at least according to the 1.4.0 spec) which means there + ; is no null terminator anywhere. To prevent garbage or repeated values from + ; being printed to the debug log, I have inserted a 0 byte at the end of each + ; string. + lea ebx, byte [edi + IDENTC.sn] + mov byte [ebx + 19], 0 + DEBUGF DBG_INFO, "nvme%u: Serial Number: %s\n", [esi + pcidev.num], ebx + add ebx, 20 + mov byte [ebx + 39], 0 + DEBUGF DBG_INFO, "nvme%u: Model Number: %s\n", [esi + pcidev.num], ebx + add ebx, 40 + mov byte [ebx + 7], 0 + DEBUGF DBG_INFO, "nvme%u: Firmware Revision: %s\n", [esi + pcidev.num], ebx + mov edx, dword [esi + pcidev.version] + + cmp edx, VS140 + jb @f + ; This is a reserved field in pre-1.4 controllers + mov al, byte [edi + IDENTC.cntrltype] + cmp al, CNTRLTYPE_IO_CONTROLLER + jne .exit_fail + ;DEBUGF DBG_INFO, "nvme%u: I/O controller detected...\n", [esi + pcidev.num] + +@@: + ; TODO: check IDENTC.AVSCC + mov al, byte [edi + IDENTC.sqes] + and al, 11110000b + DEBUGF DBG_INFO, "nvme%u: IDENTC.SQES = %u\n", [esi + pcidev.num], al + cmp al, 0x60 ; maximum submission queue size should at least be 64 bytes + jb .exit_fail + mov al, byte [edi + IDENTC.cqes] + and al, 11110000b + DEBUGF DBG_INFO, "nvme%u: IDENTC.CQES = %u\n", [esi + pcidev.num], al + and al, 0x40 ; maximum completion queue entry size should at least be 16 bytes + jb .exit_fail + invoke KernelFree, edi + + mov eax, 1 or (1 shl 16) ; CDW11 (set the number of queues we want) + mov esi, [pci] + mov dword [esi + pcidev.spinlock], 1 + stdcall set_features, [pci], NULLPTR, FID_NUMBER_OF_QUEUES, eax + stdcall nvme_poll, esi + test eax, eax + jz .exit_fail + mov esi, dword [esi + pcidev.queue_entries] + mov esi, dword [esi + NVM_QUEUE_ENTRY.cq_ptr] + mov eax, dword [esi + sizeof.CQ_ENTRY + CQ_ENTRY.cdw0] + ;DEBUGF DBG_INFO, "nvme%u: Set Features CDW0: 0x%x\n", [esi + pcidev.num], eax + test ax, ax ; Number of I/O Submission Queues allocated + jz .exit_fail + shl eax, 16 + test ax, ax ; Number of I/O Completion Queues allocated + jnz .exit_fail + + ; Create I/O Queues + ; (TODO: create N queue pairs for N CPU cores, see page 8 of NVMe 1.4 spec for an explaination) + mov esi, [pci] + mov edi, esi + mov esi, dword [esi + pcidev.queue_entries] + add esi, sizeof.NVM_QUEUE_ENTRY + mov eax, dword [esi + NVM_QUEUE_ENTRY.cq_ptr] + invoke GetPhysAddr + stdcall create_io_completion_queue, [pci], eax, 1, IEN_ON + test eax, eax + jz .exit_fail + ;DEBUGF DBG_INFO, "nvme%u: Successfully created I/O completion queue 1\n", [edi + pcidev.num] + mov eax, dword [esi + NVM_QUEUE_ENTRY.sq_ptr] + invoke GetPhysAddr + stdcall create_io_submission_queue, [pci], eax, 1, 1 + jz .exit_fail + ;DEBUGF DBG_INFO, "nvme%u: Successfully created I/O submission queue 1\n", [edi + pcidev.num] + + ; TODO: This only registers a single namespace. Add support for more + stdcall determine_active_nsids, [pci] + test eax, eax + jz .exit_fail ; No active NSIDS + mov esi, [pci] + mov dword [esi + pcidev.nsid], eax + DEBUGF DBG_INFO, "nvme%u: Found active NSID: %u\n", [esi + pcidev.num], eax + + invoke KernelAlloc, 0x1000 + test eax, eax + jz .exit_fail + mov edi, eax + invoke GetPhysAddr + stdcall nvme_identify, [pci], [esi + pcidev.nsid], eax, CNS_IDNS + test eax, eax + jz .exit_fail + invoke KernelAlloc, sizeof.NSINFO + test eax, eax + jz .exit_fail + mov ebx, eax + mov dword [esi + pcidev.nsinfo], eax + mov al, byte [edi + IDENTN.nsfeat] + mov byte [ebx + NSINFO.features], al + ;DEBUGF DBG_INFO, "nvme%un%u: Namespace Features: 0x%x\n", [esi + pcidev.num], [esi + pcidev.nsid], al + mov eax, dword [esi + pcidev.nsid] + mov dword [ebx + NSINFO.nsid], eax + mov dword [ebx + NSINFO.pci], esi + mov eax, dword [edi + IDENTN.nsze] + mov dword [ebx + NSINFO.size], eax + mov eax, dword [edi + IDENTN.nsze + 4] + mov dword [ebx + NSINFO.size + 4], eax + mov eax, dword [edi + IDENTN.ncap] + mov dword [ebx + NSINFO.capacity], eax + mov eax, dword [edi + IDENTN.ncap + 4] + mov dword [ebx + NSINFO.capacity + 4], eax + ;DEBUGF DBG_INFO, "nvme%un%u: Namespace Size: %u + %u logical blocks\n", [esi + pcidev.num], [esi + pcidev.nsid], [edi + IDENTN.nsze], [edi + IDENTN.nsze + 4] + ;DEBUGF DBG_INFO, "nvme%un%u: Namespace Capacity: %u + %u logical blocks\n", [esi + pcidev.num], [esi + pcidev.nsid], [edi + IDENTN.ncap], [edi + IDENTN.ncap + 4] + mov eax, dword [edi + IDENTN.lbaf0] + shr eax, 16 ; Get LBADS + + ; KolibriOS only supports a LBADS of 512, so if it's a higher value then we + ; have to ignore this namespace + cmp al, SUPPORTED_LBADS + jne .exit_fail + + mov byte [ebx + NSINFO.lbads], al + invoke KernelFree, edi + if 0 + invoke KernelAlloc, 0x6000 + test eax, eax + jz .exit_fail + mov edi, eax + invoke KernelAlloc, 0x8 + test eax, eax + jz .exit_fail + mov edx, NVM_CMD_READ + mov dword [eax], 6 + add edi, 0x5 + mov dword [esi + pcidev.spinlock], 1 + stdcall nvme_readwrite, [esi + pcidev.nsinfo], edi, 0x0, 0, eax + stdcall nvme_poll, esi + test eax, eax + jz .exit_fail + DEBUGF DBG_INFO, "STRING: %s\n", edi + add edi, 0x2000 + DEBUGF DBG_INFO, "STRING: %s\n", edi + end if + DEBUGF DBG_INFO, "nvme%u: Successfully initialized driver\n", [esi + pcidev.num] + xor eax, eax + inc eax + pop edi esi ebx + ret + +.exit_fail_cleanup: + add esp, 8 + +.exit_fail: + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Failed to initialize controller\n", [esi + pcidev.num] + mov edi, dword [esi + pcidev.io_addr] + mov eax, dword [edi + NVME_MMIO.CSTS] + test eax, CSTS_CFS + jz @f + DEBUGF DBG_INFO, "nvme%u: A fatal controller error has occurred\n", [esi + pcidev.num] + +@@: + xor eax, eax + pop edi esi ebx + ret + +endp + +; Returns a new CID for queue #y +proc get_new_cid stdcall, pci:dword, y:dword + + mov eax, [pci] + mov eax, dword [eax + pcidev.queue_entries] + mov ecx, [y] + shl ecx, SIZEOF_NVM_QUEUE_ENTRY + movzx eax, word [eax + ecx + NVM_QUEUE_ENTRY.head] + ;DEBUGF DBG_INFO, "get_new_cid: %u\n", eax + ret + +endp + +proc nvme_disable_ctrl stdcall, pci:dword + + ; TODO: Add timeout of CAP.TO seconds + push esi edi + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Disabling Controller...\n", [esi + pcidev.num] + mov edi, dword [esi + pcidev.io_addr] + and dword [edi + NVME_MMIO.CC], 0xfffffffe ; CC.EN = 0 + +; Wait for controller to be brought to idle state, CSTS.RDY should be cleared to 0 when this happens +.wait: + test dword [edi + NVME_MMIO.CSTS], CSTS_RDY + jnz .wait + DEBUGF DBG_INFO, "nvme%u: Successfully disabled controller\n", [esi + pcidev.num] + pop edi esi + ret + +endp + +proc nvme_enable_ctrl stdcall, pci:dword + + ; TODO: Add timeout of CAP.TO seconds + push esi edi + mov esi, [pci] + DEBUGF DBG_INFO, "nvme%u: Enabling Controller...\n", [esi + pcidev.num] + mov edi, dword [esi + pcidev.io_addr] + or dword [edi + NVME_MMIO.CC], 1 ; CC.EN = 1 + +; Wait for controller to be brought into active state, CSTS.RDY should be set to 1 when this happens +.wait: + test dword [edi + NVME_MMIO.CSTS], CSTS_RDY + jz .wait + DEBUGF DBG_INFO, "nvme%u: Successfully enabled controller\n", [esi + pcidev.num] + pop edi esi + ret + +endp + +; Polls until the device's spinlock is unlocked. Unless +; the "bad timeout" is reached. The lock should be unlocked +; by the interrupt handler when all the commands have been +; completed. +proc nvme_poll stdcall, pci:dword + + push esi + mov esi, [pci] + xor ecx, ecx + +@@: + inc ecx + cmp ecx, 0x10000000 + je @f + xor eax, eax + inc eax + xchg eax, dword [esi + pcidev.spinlock] + test eax, eax + jnz @b + + ; lock was released, return 1 + pop esi + xor eax, eax + inc eax + ret + +@@: + ; timeout: lock wasn't released, return 0 + pop esi + xor eax, eax + ret + +endp + + +; Writes to completion queue 'y' head doorbell. 'cqh' should +; be the new head value that will be stored in the register. +proc cqyhdbl_write stdcall, pci:dword, y:dword, cqh:dword + + push esi edi + mov esi, [pci] + + ; 1000h + ((2y + 1) * (4 << CAP.DSTRD)) + mov eax, [y] + shl al, 1 + inc al + mov edx, 4 + mov cl, byte [esi + pcidev.dstrd] + shl dx, cl + imul dx, ax + add dx, 0x1000 + mov ecx, [y] + shl ecx, SIZEOF_NVM_QUEUE_ENTRY + mov edi, dword [esi + pcidev.queue_entries] + lea edi, dword [edi + ecx] + mov eax, [cqh] + DEBUGF DBG_INFO, "nvme%u: Writing to CQ%u doorbell register 0x%x: %u\n", [esi + pcidev.num], [y], dx, ax + mov esi, dword [esi + pcidev.io_addr] + mov word [esi + edx], ax ; Write to CQyHDBL + mov word [edi + NVM_QUEUE_ENTRY.head], ax + + ; Unlock the mutex now that the command is complete + ;mov edi, dword [edi + NVM_QUEUE_ENTRY.cmd_ptr] + ;mov ecx, [cqh] + ;shl ecx, SIZEOF_NVMQCMD + ;add edi, ecx + ;mov ecx, dword [edi + NVMQCMD.mutex_ptr] + ;invoke MutexUnlock + + pop edi esi + ret + +endp + +; Writes to submission queue 'y' tail doorbell. 'cmd' should +; be a pointer to the submission queue struct. +proc sqytdbl_write stdcall, pci:dword, y:word, cmd:dword + + push ebx esi edi + mov edi, [pci] + mov edi, dword [edi + pcidev.queue_entries] + movzx ebx, [y] + shl ebx, SIZEOF_NVM_QUEUE_ENTRY + lea edi, [edi + ebx] + ;mov eax, dword [edi + NVM_QUEUE_ENTRY.cmd_ptr] + mov edx, dword [edi + NVM_QUEUE_ENTRY.sq_ptr] + mov esi, [cmd] + mov ecx, dword [esi + SQ_ENTRY.cdw0] + shr ecx, 16 ; Get CID + mov ebx, ecx + shl ebx, SIZEOF_NVM_QUEUE_ENTRY + add ebx, eax + shl ecx, SIZEOF_SQ_ENTRY + lea edx, [edx + ecx] + stdcall memcpyd, edx, esi, sizeof.SQ_ENTRY / 4 + ;mov ecx, dword [ebx + NVMQCMD.mutex_ptr] + ;invoke MutexLock + + mov esi, [pci] + mov ax, word [edi + NVM_QUEUE_ENTRY.tail] + inc ax + cmp ax, NVM_ASQS + jbe @f + xor ax, ax + +@@: + ; 1000h + (2y * (4 << CAP.DSTRD)) + movzx ebx, [y] + shl ebx, 1 + mov edx, 4 + mov cl, byte [esi + pcidev.dstrd] + shl edx, cl + imul edx, ebx + add edx, 0x1000 + DEBUGF DBG_INFO, "nvme%u: Writing to SQ%u doorbell register 0x%x: %u\n", [esi + pcidev.num], [y], dx, ax + mov word [edi + NVM_QUEUE_ENTRY.tail], ax + mov esi, dword [esi + pcidev.io_addr] + mov word [esi + edx], ax + pop edi esi ebx + ret + +endp + +proc is_queue_full stdcall, tail:word, head:word + + push bx + mov ax, [tail] + mov bx, [head] + cmp ax, bx + je .not_full + test bx, bx + jnz @f + cmp ax, NVM_ASQS + jne @f + pop bx + xor eax, eax + inc eax + ret + +@@: + cmp ax, bx + jae .not_full + sub ax, bx + cmp ax, 1 + jne .not_full + pop bx + xor eax, eax + inc eax + ret + +.not_full: + pop bx + xor eax, eax + ret + +endp + +; Notifies the controller that all the commands of the respective queue +; have been acknowledged as completed (if any). +proc consume_cq_entries stdcall, pci:dword, queue:dword + + push esi edi + mov esi, [pci] + mov ecx, [queue] + shl ecx, SIZEOF_NVM_QUEUE_ENTRY + mov esi, dword [esi + pcidev.queue_entries] + lea esi, [esi + ecx] + movzx ecx, word [esi + NVM_QUEUE_ENTRY.head] + cmp cx, word [esi + NVM_QUEUE_ENTRY.tail] + je .end + inc ecx + cmp ecx, NVM_ACQS + jbe @f + xor ecx, ecx + mov word [esi + NVM_QUEUE_ENTRY.head], cx + +@@: + stdcall cqyhdbl_write, [pci], [queue], ecx + +.end: + pop edi esi + xor eax, eax + ret + +endp + +; Our interrupt handler. Once the controller finishes a command, +; it should generate an interrupt (assuming that no fatal error +; occurred). If an interrupt isn't being generated when it is expected +; to, check the CSTS register to make sure that the error bit isn't being +; set. The controller doesn't generate any interrupts in such cases. +; +; Once a command has complete (successfully or not), the controller will +; add a new completion queue entry and it is the interrupt handler's +; responsibility to write to the appropriate completion queue's head doorbell +; register and update it correctly, otherwise the controller will continue +; to generate interrupts (the most common causes for freezes with the driver, +; in my experience). +proc irq_handler + + push ebx esi edi + mov edi, dword [p_nvme_devices] + mov esi, edi + sub esi, sizeof.pcidev + mov ebx, dword [pcidevs_len] + xor ecx, ecx + +.check_who_raised_irq: + add esi, sizeof.pcidev + inc ecx + cmp ecx, ebx + ; TODO: Apply solution given by @punk_joker of checking which device + ; generated an interrupt. + ja .not_our_irq + mov edi, dword [esi + pcidev.io_addr] + mov dword [edi + NVME_MMIO.INTMS], 0x3 + stdcall consume_cq_entries, esi, ADMIN_QUEUE + stdcall consume_cq_entries, esi, 1 + + ; Interrupt handled by driver, return 1 + mov dword [edi + NVME_MMIO.INTMC], 0x3 + xor eax, eax + xchg eax, dword [esi + pcidev.spinlock] ; unlock spinlock + pop edi esi ebx + mov eax, 1 + ret + +.not_our_irq: + ; Interrupt not handled by driver, return 0 + pop edi esi ebx + xor eax, eax + ret + +endp + +; Deletes the allocated I/O queues for all of the NVMe devices, +; and shuts down all of the controllers. See page 295-297 of +; the NVMe 1.4.0 spec for details on how shutdown processing +; should occur. +; +; Currently shutdown still has problems on VMWare. +; See: https://git.kolibrios.org/GSoC/kolibrios-nvme-driver/issues/5 +proc nvme_cleanup + + DEBUGF DBG_INFO, "nvme: Cleaning up...\n" + push ebx esi edi + mov esi, dword [p_nvme_devices] + test esi, esi + jnz @f + pop edi esi ebx + ret + +@@: + sub esi, sizeof.pcidev + xor ebx, ebx + +.get_pcidev: + add esi, sizeof.pcidev + + ; Free the queues + mov edi, dword [esi + pcidev.queue_entries] + test edi, edi + jz .ret + sub edi, sizeof.NVM_QUEUE_ENTRY + push ebx + xor ebx, ebx + +.get_queue: + add edi, sizeof.NVM_QUEUE_ENTRY + + ; TODO: Check if I/O completion and submission queue exist + ; before deleting? + test ebx, ebx + jz @f ; we don't want to delete the admin queue + stdcall delete_io_submission_queue, esi, ebx + stdcall delete_io_completion_queue, esi, ebx + +@@: + inc ebx + cmp ebx, LAST_QUEUE_ID + jbe .get_queue + pop ebx + inc ebx + cmp ebx, dword [pcidevs_len] + jne .get_pcidev + + ; NOTE: This code has a bug! It only shuts down the last + ; controller, not all of them. Move this inside the loop + ; and check if the device is actually valid. + ; Shutdown the controller + mov edi, dword [esi + pcidev.io_addr] + mov eax, dword [edi + NVME_MMIO.CC] + and eax, not CC_SHN + or eax, CC_SHN_NORMAL_SHUTDOWN + mov dword [edi + NVME_MMIO.CC], eax + stdcall nvme_disable_ctrl, esi + +; Wait for shutdown processing to complete +@@: + test byte [edi + NVME_MMIO.CSTS], CSTS_SHST_SHUTDOWN_COMPLETE + jnz @b + +.ret: + pop edi esi ebx + ret + +endp + +;all initialized data place here +align 4 + p_nvme_devices dd 0 + pcidevs_len dd 0 + my_service db "nvme",0 ;max 16 chars include zero + disk_functions: + dd disk_functions.end - disk_functions + dd 0 ; no close function + dd 0 ; no closemedia function + dd nvme_query_media + dd nvme_read + dd nvme_write + dd 0 ; no flush function + dd 0 ; use default cache size + .end: + if __DEBUG__ + include_debug_strings + end if + +align 4 +data fixups +end data + +; vim: syntax=fasm diff --git a/drivers/nvme/nvme.inc b/drivers/nvme/nvme.inc new file mode 100644 index 0000000000..158650e55a --- /dev/null +++ b/drivers/nvme/nvme.inc @@ -0,0 +1,591 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;; GNU GENERAL PUBLIC LICENSE ;; +;; Version 2, June 1991 ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NVMe Controller Versions +VS100 = 0x00010000 ; (v1.0.0) +VS110 = 0x00010100 ; (v1.1.0) +VS120 = 0x00010200 ; (V1.2.0) +VS121 = 0x00010201 ; (v1.2.1) +VS130 = 0x00010300 ; (v1.3.0) +VS140 = 0x00010400 ; (v1.4.0) + +NVM_MPS = 0 ; Memory Page Size (2 ^ (12 + MPS)) +NVM_ASQS = 64 ; Admin Submission Queue Size +NVM_ACQS = NVM_ASQS ; Admin Completion Queue Size +LAST_QUEUE_ID = 1 ; Index of the last queue +SQ_ENTRIES = NVM_ASQS ; I/O and Admin Submission Queue Size +CQ_ENTRIES = NVM_ACQS ; I/O and Admin Completion Queue Size +PAGE_SIZE = 4096 shl NVM_MPS ; Use 4KiB pages +SUPPORTED_LBADS = 9 ; KolibriOS only supports LBADS of 512, later on we may remove this restriction +SQ_ALLOC_SIZE = 0x1000 +CQ_ALLOC_SIZE = 0x1000 +QUEUE_ALLOC_SIZE = SQ_ALLOC_SIZE + CQ_ALLOC_SIZE +SIZEOF_SQ_ENTRY = 6 ; log2(sizeof.SQ_ENTRY) +SIZEOF_CQ_ENTRY = 4 ; log2(sizeof.CQ_ENTRY) +SIZEOF_NVM_QUEUE_ENTRY = 4 ; log2(sizeof.NVM_QUEUE_ENTRY) +SIZEOF_NVMQCMD = 4 ; log2(sizeof.NVMQCMD) + +MSIXCAP_CID = 0x11 +MSIXCAP_MXE = 1 shl 15 ; MSI-X Enable bit +MSICAP_CID = 0x05 +MSICAP_MSIE = 1 ; MSI Enable bit + +ADMIN_QUEUE = 0 ; Admin Queue ID + +IEN_ON = 2 +IEN_OFF = 0 + +; Opcodes for NVM commands +NVM_CMD_FLUSH = 0x00 +NVM_CMD_WRITE = 0x01 +NVM_CMD_READ = 0x02 +NVM_CMD_WRITE_UNCORRECTABLE = 0x04 +NVM_CMD_COMPARE = 0x05 +NVM_CMD_WRITE_ZEROES = 0x08 +NVM_CMD_DATASET_MANAGEMENT = 0x09 +NVM_CMD_VERIFY = 0x0C +NVM_CMD_RESERVATION_REG = 0x0D +NVM_CMD_RESERVATION_REPORT = 0x0E +NVM_CMD_RESERVATION_ACQUIRE = 0x11 +NVM_CMD_RESERVATION_RELEASE = 0x15 +NVM_CMD_COPY = 0x19 + +; Opcodes for admin commands (Page 94 of NVMe 1.4 spec) +ADM_CMD_DEL_IO_SUBMISSION_QUEUE = 0x00 +ADM_CMD_CRE_IO_SUBMISSION_QUEUE = 0x01 +ADM_CMD_GET_LOG_PAGE = 0x02 +ADM_CMD_DEL_IO_COMPLETION_QUEUE = 0x04 +ADM_CMD_CRE_IO_COMPLETION_QUEUE = 0x05 +ADM_CMD_IDENTIFY = 0x06 +ADM_CMD_ABORT = 0x08 +ADM_CMD_SET_FEATURES = 0x09 +ADM_CMD_GET_FEATURES = 0x0A + +; fuse (fused operation): In a fused operation, a complex command is created by 'fusing' together +; two simpler commands. This field specifies whether this command is part +; of a fused operation, and if so, which command it is in the sequence: +; 00b -> Normal operation +; 01b -> Fused operation, first command +; 10b -> Fused operation, second command +; 11b -> Reserved +NO_FUSE = 0 +FUSE_OP_FIRST_CMD = 1 shl 8 +FUSE_OP_SECOND_CMD = 2 shl 8 + +; sel (PRP or SGL for data transfer): This field specifies whether PRPs or SGLs are used for any +; data transfer associated with the command. PRPs shall be +; used for all Admin commands for NVMe over PCIe implementations. +; SGLs shall be used for all Admin and I/O commands for NVMe over +; Fabrics implementations (i.e., field set to 01b): +; 00b -> PRPs are used for this transfer +; 01b -> SGLs are used for this transfer, MPTR will contain address of +; a single contiguous physical buffer that is byte aligned +; 10b -> SGLs are used for this transfer. MPTR will contain address of +; an SGL segment containing exactly one SGL descriptor that is +; QWORD aligned +; 11b -> Reserved +SEL_PRP = 0 +SEL_SGL = 1 shl 14 + +; Controller or Namespace Structure (CNS) specifies the information to be returned to the host. +CNS_IDNS = 0x0 ; Namespace data structure (NSID) +CNS_IDCS = 0x1 ; Controller data structure +CNS_ANIDL = 0x2 ; Active namespace ID list (NSID) +CNS_NIDL = 0x3 ; Namespace identification descriptor list (NSID) +CNS_NVM_SL = 0x4 ; NVM Set List + +; Optional Admin Command Support (OACS) values +OACS_SEC_SEN_RECV_SUPPORTED = 1 shl 0 +OACS_FMT_NVM_SUPPORTED = 1 shl 1 +OACS_FIRM_COMDL_SUPPORTED = 1 shl 2 +OACS_NSMAN_SUPPORTED = 1 shl 3 + +; scope is all attached namespaces or all namespaces in NVM subsystem +NSID_BROADCAST = 0xFFFFFFFF + +NSSRC_RESET = 0x4E564D65 ; "NVMe" (initiates a NVMe subsystem reset) + +; NVMe Capabilities +CAP_MQES = 0xff +CAP_CQR = 1 shl 16 +CAP_AMS = (1 shl 17) or (1 shl 18) +CAP_TO = 0xff000000 +CAP_DSTRD = 1 or (1 shl 1) or (1 shl 2) or (1 shl 3) +CAP_NSSRS = 1 shl 4 +CAP_CSS_NVM_CMDSET = 1 shl 5 +CAP_CSS_NOIO = 1 shl 12 +CAP_BPS = 1 shl 14 +CAP_CPS_COSCOP = 1 shl 15 +CAP_CPS_DOSCOP = 1 shl 16 +CAP_CPS_NVMSCOP = CAP_CPS_COSCOP or CAP_CPS_DOSCOP +CAP_MPSMIN = (1 shl 17) or (1 shl 18) or (1 shl 19) or (1 shl 20) +CAP_MPSMAX = (1 shl 21) or (1 shl 22) or (1 shl 23) or (1 shl 24) +CAP_PMRS = 1 shl 25 +CAP_CMBS = 1 shl 26 +CAP_NSSS = 1 shl 27 +CAP_CRMS_CRWMS = 1 shl 28 +CAP_CRMS_CRIMS = 1 shl 29 + +; Controller Configuration Bits +CC_EN = 1 +CC_CSS = (1 shl 4) or (1 shl 5) or (1 shl 6) +CC_MPS = (1 shl 7) or (1 shl 8) or (1 shl 9) or (1 shl 10) +CC_AMS = (1 shl 11) or (1 shl 12) or (1 shl 13) +CC_SHN = (1 shl 14) or (1 shl 15) +CC_IOSQES = (1 shl 16) or (1 shl 17) or (1 shl 18) or (1 shl 19) +CC_IOCQES = (1 shl 20) or (1 shl 21) or (1 shl 22) or (1 shl 23) +CC_CRIME = 1 shl 24 + +CC_SHN_NORMAL_SHUTDOWN = 1 shl 14 +CC_SHN_ABRUPT_SHUTDOWN = 1 shl 15 + +CC_DEFAULT_IOSQES = SIZEOF_SQ_ENTRY shl 16 +CC_DEFAULT_IOCQES = SIZEOF_CQ_ENTRY shl 20 + +; Completion Queue Entry Status Field Values +CQ_PHASE_TAG = 1 shl 0 +CQ_STATUS_SC = 0xfe +CQ_STATUS_SCT = (1 shl 9) or (1 shl 10) or (1 shl 11) +CQ_STATUS_CRD = (1 shl 12) or (1 shl 13) +CQ_STATUS_M = 1 shl 14 +CQ_STATUS_DNR = 1 shl 15 + +; Completion Queue Entry Status Field - Status Code Type Values +CQ_STATUS_SCT_GCS = 0x0 ; Generic Command Status +CQ_STATUS_SCT_CSS = 0x1 ; Command Specific Status +CQ_STATUS_SCT_MADIE = 0x2 ; Media and Data Integrity Errors +CQ_STATUS_SCT_PRS = 0x3 ; Path Related Status + +; Completion Queue Entry Status Field - Status Code Generic Command Values +CQ_STATUS_SC_GCS_SUCCESS = 0x00 ; Successful Completion +CQ_STATUS_SC_GCS_ICOP = 0x01 ; Invalid Command Opcode +CQ_STATUS_SC_GCS_IFIC = 0x02 ; Invalid Field in Command +CQ_STATUS_SC_GCS_CIDC = 0x03 ; Command ID Conflict +CQ_STATUS_SC_GCS_DTE = 0x04 ; Data Transfer Error +CQ_STATUS_SC_GCS_CAPLN = 0x05 ; Commands Aborted due to Power Loss Notification +CQ_STATUS_SC_GCS_INERR = 0x06 ; Internal Error +CQ_STATUS_SC_GCS_CAR = 0x07 ; Command Abort Requested +CQ_STATUS_SC_GCS_CASQD = 0x08 ; Command Aborted due to SQ Deletion +CQ_STATUS_SC_GCS_CAFFC = 0x09 ; Command Aborted due to Failed Fused Command +CQ_STATUS_SC_GCS_CAMFC = 0x0A ; Command Aborted due to Missing Fused Command +CQ_STATUS_SC_GCS_INNOF = 0x0B ; Invalid Namespace or Format +CQ_STATUS_SC_GCS_CSE = 0x0C ; Command Sequence Error +CQ_STATUS_SC_GCS_INSGL = 0x0D ; Invalid SGL Segment Descriptor +CQ_STATUS_SC_GCS_INNSGL = 0x0E ; Invalid Number of SGL Descriptors +CQ_STATUS_SC_GCS_OPDEN = 0x15 ; Operation Denied +CQ_STATUS_SC_GCS_NSIWP = 0x20 ; Namespace is Write Protected +CQ_STATUS_SC_GCS_CINT = 0x21 ; Command Interrupted +CQ_STATUS_SC_GCS_TTE = 0x22 ; Transient Transport Error + +; Completion Queue Entry Status Field - Status Code Media and Data Integrity Errors +CQ_STATUS_SC_MADIE_WF = 0x80 ; Write Fault +CQ_STATUS_SC_MADIE_URE = 0x81 ; Unrecovered Read Error +CQ_STATUS_SC_MADIE_ACDEN = 0x86 ; Access Denied +CQ_STATUS_SC_MADIE_DOULB = 0x87 ; Deallocated or Unwritten Logical Block + +; Controller Status (CSTS) Values +CSTS_RDY = 1 +CSTS_CFS = 1 shl 1 +CSTS_SHST = (1 shl 2) or (1 shl 3) +CSTS_NSSRO = 1 shl 4 +CSTS_PP = 1 shl 5 +CSTS_SHST_SHUTDOWN_OCCURRING = 1 shl 2 +CSTS_SHST_SHUTDOWN_COMPLETE = 1 shl 3 + +; Admin Queue Attributes (AQA) Values +AQA_ASQS = 0xfff +AQA_ACQS = 0xfff shl 16 + +; CDW10.SEL Values (Page 115 of NVMe 1.4 specification) +CDW10_SEL_CURRENT = 000b +CDW10_SEL_DEFAULT = 001b +CDW10_SEL_SAVED = 010b +CDW10_SEL_SUPPORTED_CAPABILITIES = 011b + +; Feature Identifiers (FID) Values (Page 206 of NVMe 1.4 specification) +; Used in Get/Set Features Commands +FID_ARBITRATION = 0x01 +FID_POWER_MANAGEMENT = 0x02 +FID_LBA_RANGE_TYPE = 0x03 +FID_TEMPERATURE_THRESHOLD = 0x04 +FID_ERROR_RECOVERY = 0x05 +FID_VOLATILE_WRITE_CACHE = 0x06 +FID_NUMBER_OF_QUEUES = 0x07 +FID_INTERRUPT_COALESCING = 0x08 +FID_INTERRUPT_VECTOR_CONFIGURATION = 0x09 +FID_WRITE_ATOMICITY_NORMAL = 0x0A +FID_ASYNCHRONOUS_EVENT_CONFIGURATION = 0x0B +FID_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C +FID_HOST_MEMORY_BUFFER = 0x0D +FID_TIMESTAMP = 0x0E +FID_KEEP_ALIVE_TIMER = 0x0F +FID_HOST_CONTROLLED_THERMAL_MANAGEMENT = 0x10 +FID_NON_OPERATIONAL_POWER_STATE_CONFIG = 0x11 +FID_READ_RECOVERY_LEVEL_CONFIG = 0x12 +FID_PREDICTABLE_LATENCY_MODE_CONFIG = 0x13 +FID_PREDICTABLE_LATENCY_MODE_WINDOW = 0x14 +FID_LBA_STATUS_INFORMATION_REPORT_INTERVAL = 0x15 +FID_HOST_BEHAVIOR_SUPPORT = 0x16 +FID_SANITIZE_CONFIG = 0x17 +FID_ENDURANCE_GROUP_EVENT_CONFIGURATION = 0x18 +; NVM Command Set Specific - FID +FID_SOFTWARE_PROGRESS_MARKER = 0x80 +FID_HOST_IDENTIFIER = 0x81 +FID_RESERVATION_NOTIFICATION_MASK = 0x82 +FID_RESERVATION_PERSISTENCE = 0x83 +FID_NAMESPACE_WRITE_PROTECTION_CONFIG = 0x84 + +; Get Log Page - Log Page Identifiers (Page 118-119 of NVMe 1.4 specification) +LID_ERROR_INFORMATION = 0x01 +LID_SMARTHEALTH_INFORMATION = 0x02 +LID_FIRMWARE_SLOT_INFORMATION = 0x03 +LID_CHANGED_NAMESPACE_LIST = 0x04 +LID_COMMANDS_SUPPORTED_AND_EFFECTS = 0x05 +LID_DEVICE_SELF_TEST = 0x06 +LID_TELEMETRY_HOST_INITIATED = 0x07 +LID_TELEMETRY_CONTROLLER_INITIATED = 0x08 +LID_ENDURANCE_GROUP_INFORMATION = 0x09 +LID_PREDICTABLE_LATENCY_PER_NVM_SET = 0x0A +LID_PREDICTABLE_LATENCY_EVENT_AGGREGATE = 0x0B +LID_ASYMMETRIC_NAMESPACE_ACCESS = 0x0C +LID_PERSISTENT_EVENT_LOG = 0x0D +LID_LBA_STATUS_INFORMATION = 0x0E +LID_ENDURANCE_GROUP_EVENT_AGGREGATE = 0x0F +; I/O Command Set Specific - Log Page Identifiers +LID_RESERVATION_NOTIFICATION = 0x80 +LID_SANITIZE_STATUS = 0x81 + +; Controller Type Values +CNTRLTYPE_IO_CONTROLLER = 0x1 +CNTRLTYPE_DISCOVERY_CONTROLLER = 0x2 +CNTRLTYPE_ADMIN_CONTROLLER = 0x3 + +struct NVME_MMIO + CAP dq ? ; Controller Capabilities + VS dd ? ; Version + INTMS dd ? ; Interrupt Mask Set + INTMC dd ? ; Interrupt Mask Clear + CC dd ? ; Controller Configuration + rd 1 ; Reserved + CSTS dd ? ; Controller Status + NSSR dd ? ; NVM Subsystem Reset + AQA dd ? ; Admin Queue Attributes + ASQ dq ? ; Admin Submission Queue Base Address + ACQ dq ? ; Admin Completion Queue Base Address + CMBLOC dd ? ; Controller Memory Buffer Location + CMBSZ dd ? ; Controller Memory Buffer Size + BPINFO dd ? ; Boot Partition Information + BPRSEL dd ? ; Boot Partition Read Select + BPMBL dq ? ; Boot Partition Memory Buffer Location + CMBMSC dd ? ; Controller Memory Buffer Memory Space + CMBSTS dd ? ; Controller Memory Buffer Status + rb 3492 ; Reserved + PMRCAP dd ? ; Persistent Memory Capabilities + PMRCTL dd ? ; Persistent Memory Region Control + PMRSTS dd ? ; Persistent Memory Region Status + PMREBS dd ? ; Persistent Memory Region Elasticity Buffer Size + PMRSWTP dd ? ; Persistent Memory Region Sustained Write Throughput + PMRMSC dq ? ; Persistent Memory Region Controller Memory Space Control + rb 484 ; Reserved + SQ0TDBL dd ? ; Submission Queue 0 Tail Doorbell (Admin) +ends + + +; Submission Queue Entry (64 bytes) +struct SQ_ENTRY + cdw0 dd ? + nsid dd ? + cdw2 dd ? + cdw3 dd ? + mptr dq ? + prp1 dq ? + prp2 dq ? + cdw10 dd ? + cdw11 dd ? + cdw12 dd ? + cdw13 dd ? + cdw14 dd ? + cdw15 dd ? +ends + +; Completion Queue Entry (16 bytes) - See page 77 of the NVMe 1.4 spec +struct CQ_ENTRY + cdw0 dd ? + rd 1 ; reserved + sqhd dw ? + sqid dw ? + cid dw ? + status dw ? +ends + +struct NSINFO + capacity dq ? + size dq ? + nsid dd ? + pci dd ? + lbads db ? + features db ? +ends + +struct pcidev + bus db ? + devfn db ? + ipin db ? + iline db ? + num dd ? + io_addr dd ? + queue_entries dd ? + version dd ? + nsid dd ? + spinlock dd ? + nsinfo dd ? + nn dd ? + dstrd db ? + rb 3 ; align +ends +TOTAL_PCIDEVS = 4 +TOTAL_PCIDEVS_MALLOC_SZ = TOTAL_PCIDEVS * sizeof.pcidev + +struct NVMQCMD + cid dd ? + mutex_ptr MUTEX +ends + +struct NVM_QUEUE_ENTRY + tail dw ? + head dw ? + sq_ptr dd ? + cq_ptr dd ? + cmd_ptr dd ? +ends + +; Identify Controller Data Structure +struct IDENTC + + vid dw ? + ssvid dw ? + sn dt ?, ? + mn dt ?, ?, ?, ? + fr dq ? + rab db ? + ieee db ?, ?, ? + cmic db ? + mdts db ? + cntlid dw ? + ver dd ? + rtd3r dd ? + rtd3e dd ? + oaes dd ? + ctratt dd ? + rrls dw ? + rb 9 ; reserved + cntrltype db ? + fguid dq ?, ? + crdt1 dw ? + crdt2 dw ? + crdt3 dw ? + rb 106 ; reserved + rb 16 ; reserved (NVMMI) + oacs dw ? + acl db ? + aerl db ? + frmw db ? + lpa db ? + elpe db ? + npss db ? + avscc db ? + apsta db ? + wctemp dw ? + cctemp dw ? + mtfa dw ? + hmpre dd ? + hmmin dd ? + tnvmcap dq ?, ? + unvmcap dq ?, ? + rpmbs dd ? + edstt dw ? + dsto db ? + fwug db ? + kas dw ? + hctma dw ? + mntmt dw ? + mxtmt dw ? + sanicap dd ? + hmminds dd ? + hmmaxd dw ? + nsetidmax dw ? + endgidmax dw ? + anatt db ? + anacap db ? + anagrpmax dd ? + nanagrpid dd ? + pels dd ? + rb 156 + sqes db ? + cqes db ? + maxcmd dw ? + nn dd ? + oncs dw ? + fuses dw ? + fna db ? + vwc db ? + awun dw ? + awupf dw ? + nvscc db ? + nwpc db ? + acwu dw ? + rb 2 + sgls dd ? + mnan dd ? + rb 224 + subnqn dq ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? + rb 768 + rb 256 + psd0 dq ?, ?, ?, ? + psd1 dq ?, ?, ?, ? + psd2 dq ?, ?, ?, ? + psd3 dq ?, ?, ?, ? + psd4 dq ?, ?, ?, ? + psd5 dq ?, ?, ?, ? + psd6 dq ?, ?, ?, ? + psd7 dq ?, ?, ?, ? + psd8 dq ?, ?, ?, ? + psd9 dq ?, ?, ?, ? + psd10 dq ?, ?, ?, ? + psd11 dq ?, ?, ?, ? + psd12 dq ?, ?, ?, ? + psd13 dq ?, ?, ?, ? + psd14 dq ?, ?, ?, ? + psd15 dq ?, ?, ?, ? + psd16 dq ?, ?, ?, ? + psd17 dq ?, ?, ?, ? + psd18 dq ?, ?, ?, ? + psd19 dq ?, ?, ?, ? + psd20 dq ?, ?, ?, ? + psd21 dq ?, ?, ?, ? + psd22 dq ?, ?, ?, ? + psd23 dq ?, ?, ?, ? + psd24 dq ?, ?, ?, ? + psd25 dq ?, ?, ?, ? + psd26 dq ?, ?, ?, ? + psd27 dq ?, ?, ?, ? + psd28 dq ?, ?, ?, ? + psd29 dq ?, ?, ?, ? + psd30 dq ?, ?, ?, ? + psd31 dq ?, ?, ?, ? + rb 1024 +ends + +; Identify Namespace Data Structure +struct IDENTN + nsze dq ? + ncap dq ? + nuse dq ? + nsfeat db ? + nlbaf db ? + flbas db ? + mc db ? + dpc db ? + dps db ? + nmic db ? + rescap db ? + fpi db ? + dlfeat db ? + nawun dw ? + nawupf dw ? + nacwu dw ? + nabsn dw ? + nabo dw ? + nabspf dw ? + noiob dw ? + nvmcap dq ? + dq ? + npwg dw ? + npwa dw ? + npdg dw ? + npda dw ? + nows dw ? + rb 18 + anagrpid dd ? + rb 3 + nsattr db ? + nvmsetid dw ? + endgid dw ? + nguid dq ? + dq ? + eui64 dq ? + lbaf0 dd ? + lbaf1 dd ? + lbaf2 dd ? + lbaf3 dd ? + lbaf4 dd ? + lbaf5 dd ? + lbaf6 dd ? + lbaf7 dd ? + lbaf8 dd ? + lbaf9 dd ? + lbaf10 dd ? + lbaf11 dd ? + lbaf12 dd ? + lbaf13 dd ? + lbaf14 dd ? + lbaf15 dd ? + rb 3904 +ends + +; Namespace Granularity List (CNS 16h - Page 199 of NVMe specification 1.4) +struct NSGRANLS + + nga dd ? + nod db ? + rb 27 ; reserved + ngd0 dq ?, ? + ngd1 dq ?, ? + ngd2 dq ?, ? + ngd3 dq ?, ? + ngd4 dq ?, ? + ngd5 dq ?, ? + ngd6 dq ?, ? + ngd7 dq ?, ? + ngd8 dq ?, ? + ngd9 dq ?, ? + ngd10 dq ?, ? + ngd11 dq ?, ? + ngd12 dq ?, ? + ngd13 dq ?, ? + ngd14 dq ?, ? + ngd15 dq ?, ? + +ends + +assert NVM_ASQS = NVM_ACQS +assert SQ_ENTRIES = NVM_ASQS +assert CQ_ENTRIES = NVM_ACQS +assert NVM_MPS = 0 +assert PAGE_SIZE = 0x1000 +assert sizeof.NVME_MMIO = 4096 +assert sizeof.SQ_ENTRY = 64 +assert sizeof.CQ_ENTRY = 16 +assert sizeof.IDENTC = 4096 +assert sizeof.IDENTN = 4096 +assert sizeof.NSGRANLS = 288 +assert sizeof.NVMQCMD = 16 +assert SIZEOF_SQ_ENTRY = 6 +assert SIZEOF_CQ_ENTRY = 4 +assert SIZEOF_SQ_ENTRY = CC_DEFAULT_IOSQES shr 16 +assert SIZEOF_CQ_ENTRY = CC_DEFAULT_IOCQES shr 20 + +; NOTE: DO NOT CHANGE THIS ASSERTION! +; If you do decide to change it, you'll have +; to modify the source code manually since it +; uses bit shifts to multiply by the struct size +assert sizeof.NVM_QUEUE_ENTRY = 16 +assert SIZEOF_NVM_QUEUE_ENTRY = 4 +; vim: syntax=fasm -- 2.45.2 From 0aace3dc73e80ccd4be5a91907a23b66a05aa3e0 Mon Sep 17 00:00:00 2001 From: Abdur-Rahman Mansoor Date: Wed, 21 Aug 2024 13:39:17 -0400 Subject: [PATCH 2/5] apply fixes mentioned by @Sweetbread and @Doczom --- drivers/nvme/command.inc | 1 - drivers/nvme/nvme.asm | 6 +--- drivers/nvme/nvme.inc | 68 ++++++++++++++++++++-------------------- 3 files changed, 35 insertions(+), 40 deletions(-) diff --git a/drivers/nvme/command.inc b/drivers/nvme/command.inc index e40c264889..48f93b1bc0 100644 --- a/drivers/nvme/command.inc +++ b/drivers/nvme/command.inc @@ -131,7 +131,6 @@ proc set_features stdcall, pci:dword, prp1:dword, fid:byte, cdw11:dword mov eax, [prp1] mov dword [esp + SQ_ENTRY.prp1], eax movzx eax, [fid] - ;or eax, 1 shl 31 ; CDW10.SV mov dword [esp + SQ_ENTRY.cdw10], eax mov eax, [cdw11] mov dword [esp + SQ_ENTRY.cdw11], eax diff --git a/drivers/nvme/nvme.asm b/drivers/nvme/nvme.asm index 5da60d2521..1571602658 100644 --- a/drivers/nvme/nvme.asm +++ b/drivers/nvme/nvme.asm @@ -8,7 +8,7 @@ ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -format PE DLL native +format PE DLL native 0.05 entry START API_VERSION = 0 ;debug @@ -463,7 +463,6 @@ proc nvme_readwrite stdcall, ns:dword, buf:dword, start_sector:qword, numsectors mov eax, [numsectors_ptr] mov eax, dword [eax] - DEBUGF DBG_INFO, "buf: %x, start_sector: %u:%u, numsectors: %u\n", [buf], [start_sector + 4], [start_sector], eax mov dword [ebx + 4], 0 ; PRP2 entry (0 by default) mov dword [ebx + 8], edx ; command type (read or write) mov dword [ebx + 12], eax ; save original numsectors value @@ -479,7 +478,6 @@ proc nvme_readwrite stdcall, ns:dword, buf:dword, start_sector:qword, numsectors test eax, eax jz .fail - DEBUGF DBG_INFO, "PRP1: %x, PRP2: %x\n", [ebx], [ebx + 4] mov eax, dword [start_sector] ; According to the NVMe specification, the NLB field in the I/O read and write @@ -1136,7 +1134,6 @@ proc cqyhdbl_write stdcall, pci:dword, y:dword, cqh:dword mov edi, dword [esi + pcidev.queue_entries] lea edi, dword [edi + ecx] mov eax, [cqh] - DEBUGF DBG_INFO, "nvme%u: Writing to CQ%u doorbell register 0x%x: %u\n", [esi + pcidev.num], [y], dx, ax mov esi, dword [esi + pcidev.io_addr] mov word [esi + edx], ax ; Write to CQyHDBL mov word [edi + NVM_QUEUE_ENTRY.head], ax @@ -1194,7 +1191,6 @@ proc sqytdbl_write stdcall, pci:dword, y:word, cmd:dword shl edx, cl imul edx, ebx add edx, 0x1000 - DEBUGF DBG_INFO, "nvme%u: Writing to SQ%u doorbell register 0x%x: %u\n", [esi + pcidev.num], [y], dx, ax mov word [edi + NVM_QUEUE_ENTRY.tail], ax mov esi, dword [esi + pcidev.io_addr] mov word [esi + edx], ax diff --git a/drivers/nvme/nvme.inc b/drivers/nvme/nvme.inc index 158650e55a..3b312b39ec 100644 --- a/drivers/nvme/nvme.inc +++ b/drivers/nvme/nvme.inc @@ -372,7 +372,7 @@ struct IDENTC vid dw ? ssvid dw ? sn dt ?, ? - mn dt ?, ?, ?, ? + mn rt 4 fr dq ? rab db ? ieee db ?, ?, ? @@ -445,41 +445,41 @@ struct IDENTC sgls dd ? mnan dd ? rb 224 - subnqn dq ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? + subnqn rq 32 rb 768 rb 256 - psd0 dq ?, ?, ?, ? - psd1 dq ?, ?, ?, ? - psd2 dq ?, ?, ?, ? - psd3 dq ?, ?, ?, ? - psd4 dq ?, ?, ?, ? - psd5 dq ?, ?, ?, ? - psd6 dq ?, ?, ?, ? - psd7 dq ?, ?, ?, ? - psd8 dq ?, ?, ?, ? - psd9 dq ?, ?, ?, ? - psd10 dq ?, ?, ?, ? - psd11 dq ?, ?, ?, ? - psd12 dq ?, ?, ?, ? - psd13 dq ?, ?, ?, ? - psd14 dq ?, ?, ?, ? - psd15 dq ?, ?, ?, ? - psd16 dq ?, ?, ?, ? - psd17 dq ?, ?, ?, ? - psd18 dq ?, ?, ?, ? - psd19 dq ?, ?, ?, ? - psd20 dq ?, ?, ?, ? - psd21 dq ?, ?, ?, ? - psd22 dq ?, ?, ?, ? - psd23 dq ?, ?, ?, ? - psd24 dq ?, ?, ?, ? - psd25 dq ?, ?, ?, ? - psd26 dq ?, ?, ?, ? - psd27 dq ?, ?, ?, ? - psd28 dq ?, ?, ?, ? - psd29 dq ?, ?, ?, ? - psd30 dq ?, ?, ?, ? - psd31 dq ?, ?, ?, ? + psd0 rq 4 + psd1 rq 4 + psd2 rq 4 + psd3 rq 4 + psd4 rq 4 + psd5 rq 4 + psd6 rq 4 + psd7 rq 4 + psd8 rq 4 + psd9 rq 4 + psd10 rq 4 + psd11 rq 4 + psd12 rq 4 + psd13 rq 4 + psd14 rq 4 + psd15 rq 4 + psd16 rq 4 + psd17 rq 4 + psd18 rq 4 + psd19 rq 4 + psd20 rq 4 + psd21 rq 4 + psd22 rq 4 + psd23 rq 4 + psd24 rq 4 + psd25 rq 4 + psd26 rq 4 + psd27 rq 4 + psd28 rq 4 + psd29 rq 4 + psd30 rq 4 + psd31 rq 4 rb 1024 ends -- 2.45.2 From 5f4fc14007405c37d327e96aa076a190aea23fcd Mon Sep 17 00:00:00 2001 From: Abdur-Rahman Mansoor Date: Fri, 23 Aug 2024 07:57:32 -0400 Subject: [PATCH 3/5] NVMe: remove macros.inc --- drivers/nvme/macros.inc | 30 ------------------------------ drivers/nvme/nvme.asm | 4 +--- 2 files changed, 1 insertion(+), 33 deletions(-) delete mode 100644 drivers/nvme/macros.inc diff --git a/drivers/nvme/macros.inc b/drivers/nvme/macros.inc deleted file mode 100644 index e08e9f347b..0000000000 --- a/drivers/nvme/macros.inc +++ /dev/null @@ -1,30 +0,0 @@ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ;; -;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; -;; Distributed under terms of the GNU General Public License ;; -;; ;; -;; GNU GENERAL PUBLIC LICENSE ;; -;; Version 2, June 1991 ;; -;; ;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -macro PDEBUGF _level*, _fmt*, _bus*, _devfn*, [_args] { - common - if __DEBUG__ - sub esp, 12 - push ebx - movzx ebx, _bus - mov dword [esp + 4], ebx - movzx ebx, _devfn - shr ebx, 3 ; get rid of 3 lowest bits (function code), the rest bits is device code - mov dword [esp + 8], ebx - movzx ebx, _devfn - and ebx, 00000111b ; get only 3 lowest bits (function code) - mov dword [esp + 12], ebx - pop ebx - DEBUGF _level, _fmt, [esp], [esp + 4], [esp + 8], _args - add esp, 12 - end if -} - -; vim: syntax=fasm diff --git a/drivers/nvme/nvme.asm b/drivers/nvme/nvme.asm index 1571602658..935f23768b 100644 --- a/drivers/nvme/nvme.asm +++ b/drivers/nvme/nvme.asm @@ -29,7 +29,6 @@ include "../fdo.inc" include "../pci.inc" include "../peimport.inc" include "nvme.inc" -include "macros.inc" include "lib.inc" include "command.inc" @@ -572,7 +571,6 @@ proc detect_nvme jnz .err @@: - PDEBUGF DBG_INFO, "PCI(%u.%u.%u): Detected NVMe device...\n", [esi + PCIDEV.bus], [esi + PCIDEV.devfn] cmp dword [pcidevs_len], TOTAL_PCIDEVS jne @f DEBUGF DBG_INFO, "Can't add any more NVMe devices...\n" @@ -644,7 +642,7 @@ proc device_is_compat stdcall, pci:dword ret .failure: - PDEBUGF DBG_INFO, "PCI(%u.%u.%u): something went wrong checking NVMe device compatibility\n", byte [esi + pcidev.bus], byte [esi + pcidev.devfn] + DEBUGF DBG_INFO, "nvme%u: something went wrong checking NVMe device compatibility\n", [esi + pcidev.num] pop ecx edx esi xor eax, eax ret -- 2.45.2 From 088dbaed5f2e17194bfef081d422ce9b12450728 Mon Sep 17 00:00:00 2001 From: Abdur-Rahman Mansoor Date: Fri, 23 Aug 2024 08:17:12 -0400 Subject: [PATCH 4/5] NVMe: add TODO comment suggested by @sdongles --- drivers/nvme/nvme.asm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/nvme.asm b/drivers/nvme/nvme.asm index 935f23768b..00e3de25c2 100644 --- a/drivers/nvme/nvme.asm +++ b/drivers/nvme/nvme.asm @@ -1136,6 +1136,12 @@ proc cqyhdbl_write stdcall, pci:dword, y:dword, cqh:dword mov word [esi + edx], ax ; Write to CQyHDBL mov word [edi + NVM_QUEUE_ENTRY.head], ax + ; NOTE: Currently commented out since we're just using + ; plain spinlocks for notifying when a command has been + ; completed, but this will be uncommented later and use + ; semaphores instead of mutexes once the polling code + ; has been replaced with the asynchronous API. + ; Unlock the mutex now that the command is complete ;mov edi, dword [edi + NVM_QUEUE_ENTRY.cmd_ptr] ;mov ecx, [cqh] -- 2.45.2 From 011e55907ffe1c98538697cc971b9c827d64fa81 Mon Sep 17 00:00:00 2001 From: Abdur-Rahman Mansoor Date: Fri, 23 Aug 2024 12:34:49 -0400 Subject: [PATCH 5/5] NVMe: shutdown all controllers and fix IRQ handler --- drivers/nvme/nvme.asm | 73 ++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/drivers/nvme/nvme.asm b/drivers/nvme/nvme.asm index 00e3de25c2..563774e58e 100644 --- a/drivers/nvme/nvme.asm +++ b/drivers/nvme/nvme.asm @@ -76,7 +76,7 @@ local AnythingLoadedSuccessfully db 0 test eax, eax setne [AnythingLoadedSuccessfully] inc ebx - cmp ebx, dword [pcidevs_len] + cmp ebx, dword [num_pcidevs] jne .loop cmp [AnythingLoadedSuccessfully], 0 jz .err @@ -536,7 +536,7 @@ proc nvme_readwrite stdcall, ns:dword, buf:dword, start_sector:qword, numsectors endp ; Detects NVMe devices on the PCI bus and stores them into -; [p_nvme_devices] and sets [pcidevs_len] to the appropriate +; [p_nvme_devices] and sets [num_pcidevs] to the appropriate ; size based off how many NVMe devices there are. proc detect_nvme @@ -571,13 +571,14 @@ proc detect_nvme jnz .err @@: - cmp dword [pcidevs_len], TOTAL_PCIDEVS + cmp dword [num_pcidevs], TOTAL_PCIDEVS jne @f DEBUGF DBG_INFO, "Can't add any more NVMe devices...\n" jmp .exit_success @@: - inc dword [pcidevs_len] + inc dword [num_pcidevs] + add dword [num_pcidevs_sz], sizeof.pcidev cmp dword [p_nvme_devices], 0 jnz @f ; was the pointer already allocated? invoke KernelAlloc, sizeof.pcidev * TOTAL_PCIDEVS @@ -588,7 +589,7 @@ proc detect_nvme DEBUGF DBG_INFO, "nvme: Allocated memory for PCI devices at: 0x%x\n", eax @@: - mov ecx, dword [pcidevs_len] + mov ecx, dword [num_pcidevs] dec ecx mov edi, dword [p_nvme_devices] mov edx, ecx @@ -1281,19 +1282,24 @@ endp proc irq_handler push ebx esi edi - mov edi, dword [p_nvme_devices] - mov esi, edi - sub esi, sizeof.pcidev - mov ebx, dword [pcidevs_len] - xor ecx, ecx + mov esi, dword [p_nvme_devices] + mov ebx, dword [num_pcidevs_sz] + add ebx, esi .check_who_raised_irq: + stdcall device_generated_interrupt, esi + test eax, eax + jnz @f add esi, sizeof.pcidev - inc ecx - cmp ecx, ebx - ; TODO: Apply solution given by @punk_joker of checking which device - ; generated an interrupt. - ja .not_our_irq + cmp esi, ebx + jbe .check_who_raised_irq + + ; Interrupt not handled by driver, return 0 + pop edi esi ebx + xor eax, eax + ret + +@@: mov edi, dword [esi + pcidev.io_addr] mov dword [edi + NVME_MMIO.INTMS], 0x3 stdcall consume_cq_entries, esi, ADMIN_QUEUE @@ -1307,12 +1313,28 @@ proc irq_handler mov eax, 1 ret -.not_our_irq: - ; Interrupt not handled by driver, return 0 - pop edi esi ebx +endp + +proc device_generated_interrupt stdcall, pci:dword + + mov edx, [pci] + mov edx, dword [edx + pcidev.queue_entries] + xor ecx, ecx + +@@: + mov ax, word [edx + ecx + NVM_QUEUE_ENTRY.head] + cmp ax, word [edx + ecx + NVM_QUEUE_ENTRY.tail] + jne @f + add ecx, sizeof.NVM_QUEUE_ENTRY + cmp ecx, LAST_QUEUE_ID * sizeof.NVM_QUEUE_ENTRY + jbe @b xor eax, eax ret +@@: + mov eax, 1 + ret + endp ; Deletes the allocated I/O queues for all of the NVMe devices, @@ -1362,13 +1384,7 @@ proc nvme_cleanup cmp ebx, LAST_QUEUE_ID jbe .get_queue pop ebx - inc ebx - cmp ebx, dword [pcidevs_len] - jne .get_pcidev - ; NOTE: This code has a bug! It only shuts down the last - ; controller, not all of them. Move this inside the loop - ; and check if the device is actually valid. ; Shutdown the controller mov edi, dword [esi + pcidev.io_addr] mov eax, dword [edi + NVME_MMIO.CC] @@ -1382,6 +1398,10 @@ proc nvme_cleanup test byte [edi + NVME_MMIO.CSTS], CSTS_SHST_SHUTDOWN_COMPLETE jnz @b + inc ebx + cmp ebx, dword [num_pcidevs] + jne .get_pcidev + .ret: pop edi esi ebx ret @@ -1390,8 +1410,9 @@ endp ;all initialized data place here align 4 - p_nvme_devices dd 0 - pcidevs_len dd 0 + p_nvme_devices dd 0 ; Pointer to array of NVMe devices + num_pcidevs dd 0 ; Number of NVMe devices + num_pcidevs_sz dd 0 ; Size in bytes my_service db "nvme",0 ;max 16 chars include zero disk_functions: dd disk_functions.end - disk_functions -- 2.45.2