;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; ;; Distributed under terms of the GNU General Public License ;; ;; ;; ;; GNU GENERAL PUBLIC LICENSE ;; ;; Version 2, June 1991 ;; ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; format PE DLL native entry START API_VERSION equ 0 ;debug SRV_GETVERSION equ 0 __DEBUG__ = 1 __DEBUG_LEVEL__ = 1 DRIVER_VERSION = 1 DBG_INFO = 1 NULLPTR = 0 FALSE = 0 TRUE = 1 ; flags for alloc_dptr PRP1_ENTRY_ALLOCATED = 1 PRP1_LIST_ALLOCATED = 2 PRP2_ENTRY_ALLOCATED = 4 PRP2_LIST_ALLOCATED = 8 section ".flat" code readable writable executable include "../proc32.inc" include "../struct.inc" include "../macros.inc" include "../fdo.inc" include "../pci.inc" include "../peimport.inc" include "nvme.inc" include "macros.inc" include "lib.asm" struct DISKMEDIAINFO flags dd ? sectorsize dd ? capacity dq ? ends proc START c, reason:dword cmp [reason], DRV_ENTRY jne .err .entry: DEBUGF DBG_INFO, "Detecting NVMe hardware...\n" call detect_nvme test eax, eax jz .err mov eax, dword [p_nvme_devices] test eax, eax jz .err xor ecx, ecx .loop: mov ebx, dword [p_nvme_devices] stdcall device_is_compat, ebx test eax, eax jz @f stdcall nvme_init, ebx test eax, eax jz .err ;@@: ;inc ecx ;cmp ecx, dword [pcidevs_len] ;jne .loop stdcall add_nvme_disk, [p_nvme_devices] test eax, eax jz .err invoke RegService, my_service, service_proc ret .err: call nvme_cleanup xor eax, eax ret endp proc service_proc stdcall, ioctl:dword mov esi, [ioctl] mov eax, [esi + IOCTL.io_code] cmp eax, SRV_GETVERSION jne .ret mov eax, [esi + IOCTL.output] cmp [esi + IOCTL.out_size], 4 jne .ret mov dword [eax], API_VERSION xor eax, eax ret .ret: or eax, -1 ret endp proc add_nvme_disk stdcall, pci:dword push esi mov esi, [pci] push 0 ; null terminator push dword [esi + pcidev.nsid] push "n" push dword [esi + pcidev.num] push "nvme" mov eax, esp invoke DiskAdd, disk_functions, eax, [esi + pcidev.nsinfo], 0 add esp, 20 test eax, eax jz @f invoke DiskMediaChanged, eax, 1 DEBUGF DBG_INFO, "nvme%u: Successfully registered disk\n", [esi + pcidev.num] xor eax, eax inc eax pop esi ret @@: DEBUGF DBG_INFO, "nvme%u: Failed to register disk\n", [esi + pcidev.num] xor eax, eax pop esi ret endp proc nvme_query_media stdcall, userdata:dword, info:dword push esi edi mov esi, [userdata] mov edi, [info] mov dword [edi + DISKMEDIAINFO.flags], 0 mov eax, dword [esi + NSINFO.lbads] mov dword [edi + DISKMEDIAINFO.sectorsize], eax mov eax, dword [esi + NSINFO.capacity] mov dword [edi + DISKMEDIAINFO.capacity], eax mov eax, dword [esi + NSINFO.capacity + 4] mov dword [edi + DISKMEDIAINFO.capacity + 4], eax xor eax, eax pop edi esi ret endp proc set_cdw0 stdcall, pci:dword, y:dword, opcode:byte stdcall get_new_cid, [pci], [y] shl eax, 16 or al, [opcode] ret endp ; See pages 161-205 of the NVMe 1.4 specification for reference proc nvme_identify stdcall, pci:dword, nsid:dword, dptr:dword, cns:byte sub esp, sizeof.SQ_ENTRY ; It's important to check if CNS is a valid value here. In revision 1.0 ; CNS is a 1 bit field and a two bit field in revision 1.1, using invalid ; values results in undefined behavior (see page 162 of NVMe 1.4 spec) if __DEBUG__ push esi mov esi, [pci] mov esi, dword [esi + pcidev.io_addr] mov eax, dword [esi + NVME_MMIO.VS] cmp eax, VS110 jne @f cmp [cns], 11b jbe .ok DEBUGF DBG_INFO, "(NVMe) FATAL ERROR: INVALID CNS VALUE ON v1.1.0 CONTROLLERS\n" jmp .err @@: cmp eax, VS100 jne .ok cmp [cns], 1b jbe .ok DEBUGF DBG_INFO, "(NVMe) FATAL ERROR: INVALID CNS VALUE ON v1.0.0 CONTROLLERS\n" jmp .err .err: jmp @b .ok: pop esi end if stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 mov eax, [nsid] mov dword [esp + SQ_ENTRY.nsid], eax mov eax, [dptr] mov dword [esp + SQ_ENTRY.dptr], eax stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_IDENTIFY mov dword [esp + SQ_ENTRY.cdw0], eax mov al, [cns] mov byte [esp + SQ_ENTRY.cdw10], al stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp add esp, sizeof.SQ_ENTRY ret endp ; See pages 348-349 of the NVMe 1.4 specification for information on creating namespaces proc create_namespace stdcall, pci:dword, cid:word push esi invoke AllocPage test eax, eax jz .fail invoke GetPhysAddr stdcall nvme_identify, [pci], 0xffffffff, eax, CNS_IDNS .fail: pop esi ret endp ; returns 1 if the given NSID is a an active NSID, returns ; 0 otherwise proc is_active_namespace stdcall, pci:dword, nsid:dword push esi edi invoke KernelAlloc, 0x1000 test eax, eax jnz @f pop edi esi ret @@: mov esi, eax invoke GetPhysAddr stdcall nvme_identify, [pci], [nsid], eax, CNS_IDNS xor ecx, ecx @@: mov eax, dword [esi + ecx * 4] test eax, eax jnz .is_active_nsid inc ecx cmp ecx, 0x1000 / 4 jne @b .not_active_nsid: invoke KernelFree, esi pop edi esi xor eax, eax ret .is_active_nsid: invoke KernelFree, esi pop edi esi xor eax, eax inc eax ret endp ; See page 248 of the NVMe 1.4 specification for reference ; Returns the number of namespaces that are active, note this ; doesn't mean if EAX = 5, then namespaces 1-5 will be active. ; This also sets [pci + pcidev.nn] and [pci + pcidev.nsids] ; to appropriate values proc determine_active_nsids stdcall, pci:dword push ebx esi mov esi, [pci] xor ebx, ebx xor ecx, ecx inc ecx .loop: cmp ecx, dword [esi + pcidev.nn] ja .ret push ecx stdcall is_active_namespace, [pci], ecx pop ecx test eax, eax jz .not_active_namespace mov ebx, ecx jmp .ret .not_active_namespace: inc ecx jmp .loop .ret: pop edi esi mov eax, ebx ret endp ; See page 101 of the NVMe 1.4 specification for reference proc create_io_completion_queue stdcall, pci:dword, prp1:dword, qid:dword, ien:byte sub esp, sizeof.SQ_ENTRY stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_CRE_IO_COMPLETION_QUEUE mov dword [esp + SQ_ENTRY.cdw0], eax mov eax, [prp1] mov dword [esp + SQ_ENTRY.dptr], eax mov eax, sizeof.CQ_ENTRY shl 16 ; CDW10.QSIZE or eax, [qid] ; CDW10.QID mov dword [esp + SQ_ENTRY.cdw10], eax movzx eax, [ien] ; CDW11.IEN or eax, 0x1 ; CDW11.PC ; Don't set CDW11.IV since we're not using MSI-X or MSI vector mov dword [esp + SQ_ENTRY.cdw11], eax stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp add esp, sizeof.SQ_ENTRY ret endp ; See page 103-104 of the NVMe 1.4 specification for reference proc create_io_submission_queue stdcall, pci:dword, prp1:dword, qid:dword, cqid:word sub esp, sizeof.SQ_ENTRY stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_CRE_IO_SUBMISSION_QUEUE mov dword [esp + SQ_ENTRY.cdw0], eax mov eax, [prp1] mov dword [esp + SQ_ENTRY.dptr], eax mov eax, sizeof.SQ_ENTRY shl 16 ; CDW10.QSIZE or eax, [qid] mov dword [esp + SQ_ENTRY.cdw10], eax movzx eax, [cqid] shl eax, 16 ; CDW11.CQID or eax, 0x1 ; CDW11.PC (always set this to 1 as some devices may not support non-contiguous pages) ; TODO: Set CDW10.QPRIO mov dword [esp + SQ_ENTRY.cdw11], eax stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp add esp, sizeof.SQ_ENTRY ret endp ; See page 95-96 of the NVMe 1.4 specification for reference proc abort stdcall, pci:dword, cid:word, sqid:word sub esp, sizeof.SQ_ENTRY stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_ABORT mov dword [esp + SQ_ENTRY.cdw0], eax movzx eax, [cid] shl eax, 16 or eax, word [sqid] mov dword [esp + SQ_ENTRY.cdw10], eax stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp add esp, sizeof.SQ_ENTRY ret endp ; See page 205 of the NVMe 1.4 specification for reference proc set_features stdcall, pci:dword, dptr:dword, fid:byte, cdw11:dword sub esp, sizeof.SQ_ENTRY stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_SET_FEATURES mov dword [esp + SQ_ENTRY.cdw0], eax mov eax, [dptr] mov dword [esp + SQ_ENTRY.dptr], eax movzx eax, [fid] ;or eax, 1 shl 31 ; CDW10.SV mov dword [esp + SQ_ENTRY.cdw10], eax mov eax, [cdw11] mov dword [esp + SQ_ENTRY.cdw11], eax stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp add esp, sizeof.SQ_ENTRY ret endp ; See page 105 of the NVMe 1.4 specification for reference proc delete_io_completion_queue stdcall, pci:dword, qid:word sub esp, sizeof.SQ_ENTRY stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_DEL_IO_COMPLETION_QUEUE mov dword [esp + SQ_ENTRY.cdw0], eax mov ax, [qid] mov word [esp + SQ_ENTRY.cdw10], ax stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp add esp, sizeof.SQ_ENTRY ret endp ; See page 114-116 of the NVMe 1.4 specification for reference proc get_features stdcall, pci:dword, dptr:dword, sel:byte, fid:byte sub esp, sizeof.SQ_ENTRY stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_GET_FEATURES mov dword [esp + SQ_ENTRY.cdw0], eax movzx eax, [sel] and eax, 111b shl eax, 8 ; CDW10.SEL or eax, byte [fid] ; CDW10.FID mov dword [esp + SQ_ENTRY.cdw10], eax mov eax, [dptr] mov dword [esp + SQ_ENTRY.dptr], eax ; TODO: Implement CDW14.UUID? stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp add esp, sizeof.SQ_ENTRY ret endp ; See page 105-106 of the NVMe 1.4 specification for reference proc delete_io_submission_queue stdcall, pci:dword, qid:word sub esp, sizeof.SQ_ENTRY stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_DEL_IO_SUBMISSION_QUEUE mov dword [esp + SQ_ENTRY.cdw0], eax mov ax, [qid] mov word [esp + SQ_ENTRY.cdw10], ax stdcall sqytdbl_write, [pci], ADMIN_QUEUE, esp add esp, sizeof.SQ_ENTRY ret endp ; See page 117-118 of the NVMe 1.4 specification for reference ; INCOMPLETE proc get_log_page stdcall, pci:dword, dptr:dword, lid:byte sub esp, sizeof.SQ_ENTRY stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 stdcall set_cdw0, [pci], ADMIN_QUEUE, ADM_CMD_GET_LOG_PAGE mov dword [esp + SQ_ENTRY.cdw0], eax mov eax, [dptr] mov dword [esp + SQ_ENTRY.dptr], eax add esp, sizeof.SQ_ENTRY ret endp proc build_prp_list stdcall, nprps:dword, buf_physical:dword, prp_list_ptr:dword push esi ebx edi sub esp, 16 ; here, we store the pointer to the very first ; PRP list so that free_prp_list can free the ; entire PRP list if something goes wrong, it ; also serves as our return value placeholder mov dword [esp], 0 ; store the number of PRPs here, we need to ; cache the result here since we'll subtract ; nprps regularly. If we don't we will lose ; the value. mov eax, [nprps] mov dword [esp + 4], eax ; store consecutive PRP list buffer here, since ; we'll increment the PRP list pointer continuously ; its important to cache the value mov dword [esp + 12], 0 xor edi, edi xor esi, esi mov ecx, [nprps] shl ecx, 3 ; multiply by 8 since each PRP pointer is a QWORD ; we'll store consecutive PRP list buffers here, for example ; given 2 PRP lists, we allocate 2 continuous pages invoke KernelAlloc, ecx ; store pointers to the PRP entries here test eax, eax jz .err mov dword [esp + 12], eax mov edi, eax mov eax, [prp_list_ptr] mov dword [eax], edi ; note we assume buf_physical is page-aligned mov esi, [buf_physical] .build_prp_list: ; ensure we don't cross a page boundary mov ebx, [nprps] cmp ebx, PAGE_SIZE / 8 jb @f mov ebx, PAGE_SIZE / 8 sub [nprps], ebx @@: xor ecx, ecx cmp dword [esp], 0 jz @f ; we need to store the pointer of the newly allocated ; PRP list to the previous PRP list last slot mov eax, dword [esp + 8] mov dword [eax + PAGE_SIZE - 8], esi mov dword [eax + PAGE_SIZE - 4], 0 jmp .loop @@: mov dword [esp], esi .loop: mov dword [edi + ecx * 4], esi mov dword [edi + ecx * 4 + 4], 0 DEBUGF DBG_INFO, "PRP: %x\n", esi add esi, PAGE_SIZE inc ecx cmp ecx, ebx jne .loop ; check if we we need to build another PRP list mov dword [esp + 8], edi add edi, PAGE_SIZE cmp ebx, PAGE_SIZE / 8 je .build_prp_list mov eax, dword [esp] invoke GetPhysAddr add esp, 16 pop edi ebx esi ret .err: add esp, 16 pop edi ebx esi xor eax, eax ret endp proc alloc_dptr stdcall, ns:dword, prps_ptr:dword, numsectors:dword, prp_list_ptr:dword, buf:dword push esi edi ; the pointer to our PRP list (virtual), needed so ; that the caller can free the PRP list afterwards mov edi, [prp_list_ptr] mov dword [edi], 0 mov edi, [prps_ptr] mov dword [edi], 0 ; PRP1 default value mov dword [edi + 4], 0 ; PRP2 default value mov eax, [buf] invoke GetPhysAddr mov dword [edi], eax mov edx, [numsectors] mov eax, edx mov [numsectors], 0 cmp eax, dword [esi + NSINFO.pg_sectors] jbe @f sub eax, dword [esi + NSINFO.pg_sectors] mov [numsectors], eax @@: mov esi, [ns] mov eax, dword [esi + NSINFO.pg_sectors] ; is the buffer offset portion equal to 0? mov ecx, [buf] and ecx, PAGE_SIZE - 1 jnz @f ; is the number of sectors less than or equal to one memory page? cmp edx, eax jbe .success shl eax, 1 ; it is page aligned, so set eax to 2 memory pages @@: ; is the number of sectors greater than one or two memory pages? cmp edx, eax ja .build_prp_list ; set PRP2 mov eax, dword [edi] and eax, not (PAGE_SIZE - 1) add eax, PAGE_SIZE mov dword [edi + 4], eax jmp .success .build_prp_list: DEBUGF DBG_INFO, "Allocating PRP list\n" ; allocate PRP list for PRP2 mov eax, [numsectors] mov ecx, dword [esi + NSINFO.pg_sectors] xor edx, edx div ecx test edx, edx jz @f inc eax @@: mov edx, dword [edi] and edx, not (PAGE_SIZE - 1) add edx, PAGE_SIZE stdcall build_prp_list, eax, edx, [prp_list_ptr] test eax, eax jz .err DEBUGF DBG_INFO, "Successfully allocated PRP list at: %x\n", eax mov dword [edi + 4], eax jmp .success .err: xor eax, eax pop edi esi ret .success: xor eax, eax inc eax pop edi esi ret endp nvme_read: mov edx, NVM_CMD_READ jmp nvme_readwrite nvme_write: mov edx, NVM_CMD_WRITE proc nvme_readwrite stdcall, ns:dword, buf:dword, start_sector:qword, numsectors_ptr:dword push ebx esi edi sub esp, 20 ; stack: ; [esp] - PRP1 ; [esp + 4] - PRP2 ; [esp + 8] - command type (read or write) ; [esp + 12] - original numsectors value ; [esp + 16] - virtual pointer to PRP2 PRP list (if allocated, 0 if not) mov ebx, esp mov eax, [numsectors_ptr] mov eax, dword [eax] DEBUGF DBG_INFO, "buf: %x, start_sector: %x%x, numsectors: %u\n", [buf], [start_sector + 4], [start_sector], eax mov dword [ebx + 8], edx ; command type (read or write) mov dword [ebx + 12], eax ; save original numsectors value mov esi, [ns] mov edi, [buf] mov edx, [numsectors_ptr] mov edx, [edx] mov ecx, ebx add ecx, 16 ; Note that [esp] will contain the value of PRP1 and [esp + 4] will ; contain the value of PRP2. If PRP2 is a PRP list, then [esp + 16] will point ; to the allocated PRP list (after this call, only if it completes successfully) stdcall alloc_dptr, esi, ebx, edx, ecx, [buf] test eax, eax jz .dptr_fail DEBUGF DBG_INFO, "PRP1: %x, PRP2: %x\n", [ebx], [ebx + 4] stdcall nvme_io_rw, [esi + NSINFO.pci], \ 1, \ [esi + NSINFO.nsid], \ dword [ebx], \ dword [ebx + 4], \ dword [start_sector], \ dword [start_sector + 4], \ dword [ebx + 12], \ dword [ebx + 8] ; assume command completes successfully for now jmp .end .dptr_fail: mov ebx, [numsectors_ptr] mov dword [ebx], 0 .end: add esp, 20 pop edi esi ebx ret endp ; See page 258-261 (read) and 269-271 (write) of the NVMe 1.4 specification for reference proc nvme_io_rw stdcall, pci:dword, qid:word, nsid:dword, prps:qword, slba:qword, nlb:word, opcode:dword ; TODO: Use IDENTC.NOIOB to construct read/write commands that don't ; cross the I/O boundary to achieve optimal performance ; ; TODO: Read AWUN/NAWUN sub esp, sizeof.SQ_ENTRY stdcall memsetdz, esp, sizeof.SQ_ENTRY / 4 movzx ecx, [qid] stdcall set_cdw0, [pci], ecx, [opcode] mov dword [esp + SQ_ENTRY.cdw0], eax ; CDW0 mov eax, dword [prps] mov dword [esp + SQ_ENTRY.dptr], eax mov eax, dword [prps + 4] mov dword [esp + SQ_ENTRY.dptr + 8], eax mov eax, [nsid] mov dword [esp + SQ_ENTRY.nsid], eax mov eax, dword [slba] ; slba_lo mov dword [esp + SQ_ENTRY.cdw10], eax mov eax, dword [slba + 4] ; slba_hi mov dword [esp + SQ_ENTRY.cdw11], eax movzx eax, [nlb] mov word [esp + SQ_ENTRY.cdw12], ax movzx ecx, [qid] stdcall sqytdbl_write, [pci], ecx, esp add esp, sizeof.SQ_ENTRY ret endp proc detect_nvme invoke GetPCIList mov edx, eax .check_dev: mov ebx, dword [eax + PCIDEV.class] and ebx, 0x00ffff00 ; retrieve class/subclass code only cmp ebx, 0x00010800 ; Mass Storage Controller - Non-Volatile Memory Controller je .found_dev .next_dev: mov eax, dword [eax + PCIDEV.fd] cmp eax, edx jne .check_dev jmp .exit_success .found_dev: push edx eax PDEBUGF DBG_INFO, "PCI(%u.%u.%u): Detected NVMe device...\n", byte [eax + PCIDEV.bus], byte [eax + PCIDEV.devfn] cmp dword [pcidevs_len], TOTAL_PCIDEVS jne @f pop eax edx jmp .exit_success @@: inc dword [pcidevs_len] mov ebx, dword [p_nvme_devices] test ebx, ebx jnz @f invoke KernelAlloc, sizeof.pcidev test eax, eax jz .err_no_mem mov dword [p_nvme_devices], eax DEBUGF DBG_INFO, "(NVMe) Allocated pcidev struct at 0x%x\n", [p_nvme_devices] @@: mov ecx, dword [pcidevs_len] dec ecx pop eax mov ebx, dword [p_nvme_devices] movzx edx, byte [eax + PCIDEV.bus] mov byte [ebx + pcidev.bus], dl movzx edx, byte [eax + PCIDEV.devfn] mov byte [ebx + pcidev.devfn], dl mov dword [ebx + pcidev.num], ecx pop edx jmp .next_dev .err_no_mem: pop eax edx xor eax, eax ret .exit_success: xor eax, eax inc eax ret endp proc device_is_compat stdcall, pci:dword push esi edx ecx mov esi, [pci] invoke PciRead8, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.interrupt_line mov byte [esi + pcidev.iline], al invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.base_addr_0 and eax, 0xfffffff0 test eax, eax jz .failure mov edx, eax push edx invoke MapIoMem, eax, sizeof.NVME_MMIO, PG_SW+PG_NOCACHE test eax, eax jz .failure ;DEBUGF DBG_INFO, "(NVMe) MMIO allocated at: 0x%x\n", eax mov dword [esi + pcidev.io_addr], eax mov eax, dword [eax + NVME_MMIO.CAP + 4] and eax, CAP_DSTRD mov byte [esi + pcidev.dstrd], al ; 1003h + ((2y + 1) * (4 << CAP.DSTRD)) mov eax, 4 shl ax, cl mov ecx, NVM_ASQS shl ecx, 1 inc ecx imul ecx, eax add ecx, 0x1003 pop edx invoke MapIoMem, edx, ecx, PG_SW+PG_NOCACHE mov dword [esi + pcidev.io_addr], eax mov eax, dword [eax + NVME_MMIO.VS] DEBUGF DBG_INFO, "nvme%u: Controller version: 0x%x\n", [esi + pcidev.num], eax mov dword [esi + pcidev.version], eax pop ecx edx esi xor eax, eax inc eax ret .failure: PDEBUGF DBG_INFO, "PCI(%u.%u.%u): something went wrong checking NVMe device compatibility\n", byte [esi + pcidev.bus], byte [esi + pcidev.devfn] pop ecx edx esi xor eax, eax ret endp ; nvme_init: Initializes the NVMe controller proc nvme_init stdcall, pci:dword push ebx esi edi mov esi, dword [pci] mov edi, dword [esi + pcidev.io_addr] if 0 mov eax, dword [edi + NVME_MMIO.CAP] DEBUGF DBG_INFO, "(NVMe) CAP (0-31): 0x%x\n", eax mov eax, dword [edi + NVME_MMIO.CAP + 4] DEBUGF DBG_INFO, "(NVMe) CAP (32-63): 0x%x\n", eax mov eax, dword [edi + NVME_MMIO.CC] DEBUGF DBG_INFO, "(NVMe) CC: 0x%x\n", eax mov eax, dword [edi + NVME_MMIO.CSTS] DEBUGF DBG_INFO, "(NVMe) CSTS: 0x%x\n", eax end if ; check maximum queue entries supported mov ax, word [edi + NVME_MMIO.CAP] cmp ax, SQ_ENTRIES jb .exit_fail ; For some reason, bit 7 (No I/O command set supported) is also set to 1 despite bit 0 (NVM command set) ; being set to 1.. so I am not sure if bit 7 should be checked at all.. investigate later. mov eax, dword [edi + NVME_MMIO.CAP + 4] test eax, CAP_CSS_NVM_CMDSET jz .exit_fail ; Reset controller before we configure it stdcall nvme_controller_reset, esi if __DEBUG__ stdcall nvme_wait, edi end if mov eax, dword [edi + NVME_MMIO.CAP + 4] and eax, CAP_MPSMIN shr eax, 16 cmp eax, NVM_MPS ja .exit_fail mov eax, dword [edi + NVME_MMIO.CAP + 4] and eax, CAP_MPSMAX shr eax, 20 cmp eax, NVM_MPS jb .exit_fail ; Configure IOSQES, IOCQES, AMS, MPS, CSS and dword [edi + NVME_MMIO.CC], not (CC_AMS or CC_MPS or CC_CSS or CC_IOSQES or CC_IOCQES) mov eax, dword [edi + NVME_MMIO.CC] ; CSS = 0 (NVM Command Set) ; AMS = 0 (Round Robin) ; MPS = 0 (4KiB Pages) ; IOSQES = 6 (64B) ; IOCQES = 4 (16B) or eax, (4 shl 20) or (6 shl 16) mov dword [edi + NVME_MMIO.CC], eax ; Configure Admin Queue Attributes mov eax, dword [edi + NVME_MMIO.AQA] and eax, not (AQA_ASQS or AQA_ACQS) or eax, NVM_ASQS or (NVM_ACQS shl 16) mov dword [edi + NVME_MMIO.AQA], eax ; Allocate list of queues invoke KernelAlloc, sizeof.NVM_QUEUE_ENTRY * (LAST_QUEUE_ID + 1) test eax, eax jz .exit_fail mov dword [esi + pcidev.queue_entries], eax mov edi, eax stdcall memsetdz, eax, sizeof.NVM_QUEUE_ENTRY * (LAST_QUEUE_ID + 1) / 4 ; Allocate submission/completion queue pointers ; TODO: Make these queues physically contiguous xor ecx, ecx @@: push ecx invoke CreateRingBuffer, 0x1000, PG_SW pop ecx test eax, eax jz .exit_fail mov dword [edi + ecx + NVM_QUEUE_ENTRY.sq_ptr], eax push ecx stdcall memsetdz, eax, sizeof.CQ_ENTRY * CQ_ENTRIES / 4 invoke CreateRingBuffer, 0x1000, PG_SW pop ecx test eax, eax jz .exit_fail mov dword [edi + ecx + NVM_QUEUE_ENTRY.cq_ptr], eax push ecx stdcall memsetdz, eax, sizeof.CQ_ENTRY * CQ_ENTRIES / 4 pop ecx mov dword [edi + ecx + NVM_QUEUE_ENTRY.phase_tag], CQ_PHASE_TAG add ecx, sizeof.NVM_QUEUE_ENTRY cmp ecx, (LAST_QUEUE_ID + 1) * sizeof.NVM_QUEUE_ENTRY jne @b ; Configure Admin Submission/Completion Queue Base Address mov esi, [pci] mov esi, dword [esi + pcidev.io_addr] mov eax, dword [edi + NVM_QUEUE_ENTRY.sq_ptr] invoke GetPhysAddr mov dword [esi + NVME_MMIO.ASQ], eax mov dword [esi + NVME_MMIO.ASQ + 4], 0 mov eax, dword [edi + NVM_QUEUE_ENTRY.cq_ptr] invoke GetPhysAddr mov dword [esi + NVME_MMIO.ACQ], eax mov dword [esi + NVME_MMIO.ACQ + 4], 0 ; Attach interrupt handler mov esi, [pci] movzx eax, byte [esi + pcidev.iline] DEBUGF DBG_INFO, "nvme%u: Attaching interrupt handler to IRQ %u\n", [esi + pcidev.num], eax invoke AttachIntHandler, eax, irq_handler, 0 test eax, eax jz .exit_fail DEBUGF DBG_INFO, "nvme%u: Successfully attached interrupt handler\n", [esi + pcidev.num] ; Restart the controller stdcall nvme_controller_start, esi invoke KernelAlloc, 0x1000 test eax, eax jz .exit_fail mov edi, eax invoke GetPhysAddr ; pci:dword, nsid:dword, dptr:dword, cns:byte stdcall nvme_identify, [pci], 0, eax, CNS_IDCS mov eax, dword [edi + IDENTC.nn] mov dword [esi + pcidev.nn], eax DEBUGF DBG_INFO, "nvme%u: Namespace Count: %u\n", [esi + pcidev.num], eax lea ebx, byte [edi + IDENTC.sn] lea eax, byte [esi + pcidev.serial] stdcall memcpy, eax, ebx, 20 DEBUGF DBG_INFO, "nvme%u: Serial Number: %s\n", [esi + pcidev.num], eax add ebx, 20 lea eax, byte [esi + pcidev.model] stdcall memcpy, eax, ebx, 40 DEBUGF DBG_INFO, "nvme%u: Model: %s\n", [esi + pcidev.num], eax mov edx, dword [esi + pcidev.version] cmp edx, VS140 jb @f ; This is a reserved field in pre-1.4 controllers mov al, byte [edi + IDENTC.cntrltype] cmp al, CNTRLTYPE_IO_CONTROLLER jne .exit_fail DEBUGF DBG_INFO, "nvme%u: I/O controller detected...\n", [esi + pcidev.num] @@: ; TODO: check IDENTC.AVSCC mov al, byte [edi + IDENTC.sqes] and al, 11110000b cmp al, 0x60 ; maximum submission queue size should at least be 64 bytes jb .exit_fail mov al, byte [edi + IDENTC.cqes] and al, 11110000b and al, 0x40 ; maximum completion queue entry size should at least be 16 bytes jb .exit_fail invoke KernelFree, edi mov eax, 1 or (1 shl 16) ; CDW11 (set the number of queues we want) stdcall set_features, [pci], NULLPTR, FID_NUMBER_OF_QUEUES, eax mov esi, [pci] mov esi, dword [esi + pcidev.queue_entries] mov esi, dword [esi + NVM_QUEUE_ENTRY.cq_ptr] mov eax, dword [esi + sizeof.CQ_ENTRY + CQ_ENTRY.cdw0] if __DEBUG__ DEBUGF DBG_INFO, "nvme%u: Set Features CDW0: 0x%x\n", [esi + pcidev.num], eax end if test ax, ax ; Number of I/O Submission Queues allocated jz .exit_fail shl eax, 16 test ax, ax ; Number of I/O Completion Queues allocated jnz .exit_fail ; Create I/O Queues ; (TODO: create N queue pairs for N CPU cores, see page 8 of NVMe 1.4 spec for an explaination mov esi, [pci] mov edi, esi mov esi, dword [esi + pcidev.queue_entries] lea esi, [esi + sizeof.NVM_QUEUE_ENTRY] mov eax, dword [esi + NVM_QUEUE_ENTRY.cq_ptr] invoke GetPhysAddr stdcall create_io_completion_queue, [pci], eax, 1, IEN_ON DEBUGF DBG_INFO, "nvme%u: Successfully created I/O completion queue 1\n", [edi + pcidev.num] mov eax, dword [esi + NVM_QUEUE_ENTRY.sq_ptr] invoke GetPhysAddr stdcall create_io_submission_queue, [pci], eax, 1, 1 DEBUGF DBG_INFO, "nvme%u: Successfully created I/O submission queue 1\n", [edi + pcidev.num] if 1 stdcall determine_active_nsids, [pci] test eax, eax jz .exit_fail ; No active NSIDS mov esi, [pci] mov dword [esi + pcidev.nsid], eax DEBUGF DBG_INFO, "nvme%u: Found active NSID: %u\n", [esi + pcidev.num], eax else mov esi, [pci] xor eax, eax inc eax mov dword [esi + pcidev.nsid], eax end if invoke KernelAlloc, 0x1000 test eax, eax jz .exit_fail mov edi, eax invoke GetPhysAddr stdcall nvme_identify, [pci], [esi + pcidev.nsid], eax, CNS_IDNS invoke KernelAlloc, sizeof.NSINFO test eax, eax jz .exit_fail mov ebx, eax mov dword [esi + pcidev.nsinfo], eax mov al, byte [edi + IDENTN.nsfeat] mov byte [ebx + NSINFO.features], al DEBUGF DBG_INFO, "nvme%un%u: Namespace Features: 0x%x\n", [esi + pcidev.num], [esi + pcidev.nsid], al mov dword [ebx + NSINFO.pci], esi mov eax, dword [edi + IDENTN.nsze] mov dword [ebx + NSINFO.size], eax mov eax, dword [edi + IDENTN.nsze + 4] mov dword [ebx + NSINFO.size + 4], eax mov eax, dword [edi + IDENTN.ncap] mov dword [ebx + NSINFO.capacity], eax mov eax, dword [edi + IDENTN.ncap + 4] mov dword [ebx + NSINFO.capacity + 4], eax DEBUGF DBG_INFO, "nvme%un%u: Namespace Size: %u + %u logical blocks\n", [esi + pcidev.num], [esi + pcidev.nsid], [edi + IDENTN.nsze], [edi + IDENTN.nsze + 4] DEBUGF DBG_INFO, "nvme%un%u: Namespace Capacity: %u + %u logical blocks\n", [esi + pcidev.num], [esi + pcidev.nsid], [edi + IDENTN.ncap], [edi + IDENTN.ncap + 4] mov eax, dword [edi + IDENTN.lbaf0] shr eax, 16 ; Get LBADS and eax, 0xff stdcall pow2, eax DEBUGF DBG_INFO, "nvme%un%u: Namespace LBA Data Size: %u\n", [esi + pcidev.num], [esi + pcidev.nsid], eax mov dword [ebx + NSINFO.lbads], eax mov ecx, PAGE_SIZE xchg eax, ecx xor edx, edx div ecx mov dword [ebx + NSINFO.pg_sectors], eax invoke KernelFree, edi if 0 invoke KernelAlloc, 0x1000 test eax, eax jz .exit_fail mov edi, eax invoke KernelAlloc, 0x8 test eax, eax jz .exit_fail mov edx, NVM_CMD_READ mov dword [eax], 0x11 stdcall nvme_readwrite, [esi + pcidev.nsinfo], edi, 0, 0, eax test eax, eax jz .exit_fail DEBUGF DBG_INFO, "%s\n", edi end if DEBUGF DBG_INFO, "nvme%u: Successfully initialized driver\n", [esi + pcidev.num] xor eax, eax inc eax pop edi esi ebx ret .exit_fail: if __DEBUG__ mov esi, [pci] DEBUGF DBG_INFO, "nvme%u: failed to initialize controller\n", [esi + pcidev.num] end if xor eax, eax pop edi esi ebx ret endp proc get_new_cid stdcall, pci:dword, y:dword push esi mov esi, [pci] mov esi, [esi + pcidev.queue_entries] mov ecx, [y] imul ecx, sizeof.NVM_QUEUE_ENTRY movzx eax, word [esi + ecx + NVM_QUEUE_ENTRY.tail] pop esi ret endp proc nvme_controller_reset stdcall, pci:dword push esi edi mov esi, [pci] DEBUGF DBG_INFO, "nvme%u: Resetting Controller...\n", [esi + pcidev.num] mov edi, dword [esi + pcidev.io_addr] and dword [edi + NVME_MMIO.CC], 0xfffffffe ; CC.EN = 0 ; Wait for controller to be brought to idle state, CSTS.RDY should be cleared to 0 when this happens .wait: test dword [edi + NVME_MMIO.CSTS], CSTS_RDY jnz .wait DEBUGF DBG_INFO, "nvme%u: Successfully reset controller...\n", [esi + pcidev.num] pop edi esi ret endp proc nvme_controller_start stdcall, pci:dword push esi edi mov esi, [pci] DEBUGF DBG_INFO, "nvme%u: Starting Controller...\n", [esi + pcidev.num] mov edi, dword [esi + pcidev.io_addr] or dword [edi + NVME_MMIO.CC], 1 ; CC.EN = 1 ; Wait for controller to be brought into active state, CSTS.RDY should be set to 1 when this happens .wait: test dword [edi + NVME_MMIO.CSTS], CSTS_RDY jz .wait DEBUGF DBG_INFO, "nvme%u: Successfully started controller...\n", [esi + pcidev.num] pop edi esi ret endp ; Should be called only after the value of CC.EN has changed proc nvme_wait stdcall, mmio:dword push esi mov esi, [mmio] mov esi, dword [esi + NVME_MMIO.CAP] and esi, CAP_TO shr esi, 24 imul esi, 150 ; TODO: bad time delay, set to appropriate value later invoke Sleep pop esi ret endp ; Writes to completion queue 'y' head doorbell proc cqyhdbl_write stdcall, pci:dword, y:dword, cqh:dword push esi edi mov esi, [pci] ; 1000h + ((2y + 1) * (4 << CAP.DSTRD)) mov eax, [y] shl al, 1 inc al mov edx, 4 mov cl, byte [esi + pcidev.dstrd] shl dx, cl imul dx, ax add dx, 0x1000 mov ecx, [y] imul ecx, sizeof.NVM_QUEUE_ENTRY mov edi, dword [esi + pcidev.queue_entries] lea edi, dword [edi + ecx] mov esi, dword [esi + pcidev.io_addr] mov eax, [cqh] ;DEBUGF DBG_INFO, "(NVMe) Writing to completion queue doorbell register 0x%x: %u\n", dx, ax mov word [esi + edx], ax ; Write to CQyHDBL mov word [edi + NVM_QUEUE_ENTRY.head], ax pop edi esi ret endp ; Writes to submission queue 'y' tail doorbell proc sqytdbl_write stdcall, pci:dword, y:word, cmd:dword push ebx esi edi mov edi, [pci] mov edi, dword [edi + pcidev.queue_entries] movzx ecx, [y] imul ecx, sizeof.NVM_QUEUE_ENTRY mov edi, dword [edi + ecx + NVM_QUEUE_ENTRY.sq_ptr] mov esi, [cmd] mov ecx, dword [esi + SQ_ENTRY.cdw0] shr ecx, 16 ; Get CID imul ecx, sizeof.SQ_ENTRY lea edi, [edi + ecx] stdcall memcpy, edi, esi, sizeof.SQ_ENTRY mov edi, [pci] mov esi, dword [edi + pcidev.io_addr] mov edi, dword [edi + pcidev.queue_entries] movzx ecx, [y] imul ecx, sizeof.NVM_QUEUE_ENTRY movzx eax, word [edi + ecx + NVM_QUEUE_ENTRY.tail] cmp ax, NVM_ASQS jb @f xor ax, ax @@: mov esi, [pci] inc ax ; 1000h + (2y * (4 << CAP.DSTRD)) movzx ebx, [y] shl ebx, 1 mov edx, 4 mov cl, byte [esi + pcidev.dstrd] shl edx, cl imul edx, ebx add edx, 0x1000 mov esi, dword [esi + pcidev.io_addr] mov word [esi + edx], ax movzx ecx, [y] imul ecx, sizeof.NVM_QUEUE_ENTRY mov word [edi + ecx + NVM_QUEUE_ENTRY.tail], ax dec ax movzx ecx, [y] stdcall nvme_cmd_wait, [pci], ecx, eax pop edi esi ebx ret endp ; Calculates 2^x proc pow2 stdcall, x:byte push ecx mov cl, [x] xor eax, eax inc eax test cl, cl jnz @f pop ecx ret @@: shl eax, cl pop ecx ret endp proc nvme_cmd_wait stdcall, pci:dword, y:dword, cid:word push esi mov esi, [pci] movzx ecx, word [cid] mov edx, [y] imul edx, sizeof.NVM_QUEUE_ENTRY mov esi, dword [esi + pcidev.queue_entries] lea esi, [esi + edx] imul ecx, sizeof.CQ_ENTRY mov eax, dword [esi + NVM_QUEUE_ENTRY.phase_tag] mov esi, dword [esi + NVM_QUEUE_ENTRY.cq_ptr] test eax, CQ_PHASE_TAG jnz .phase_tag_1 @@: test byte [esi + ecx + CQ_ENTRY.status], CQ_PHASE_TAG jnz @b pop esi ret .phase_tag_1: ;DEBUGF DBG_INFO, "status: %x\n", [esi + ecx + CQ_ENTRY.status] test byte [esi + ecx + CQ_ENTRY.status], CQ_PHASE_TAG jz .phase_tag_1 pop esi ret endp proc is_queue_full stdcall, tail:word, head:word push bx mov ax, [tail] mov bx, [head] cmp ax, bx je .not_full test bx, bx jnz @f cmp ax, NVM_ASQS jne @f xor eax, eax inc eax ret @@: cmp ax, bx jae .not_full sub ax, bx cmp ax, 1 jne .not_full xor eax, eax inc eax ret .not_full: pop bx xor eax, eax ret endp proc consume_cq_entries stdcall, pci:dword, queue:dword push esi edi mov esi, [pci] mov ecx, [queue] imul ecx, sizeof.NVM_QUEUE_ENTRY mov esi, dword [esi + pcidev.queue_entries] lea esi, [esi + ecx] mov edi, dword [esi + NVM_QUEUE_ENTRY.cq_ptr] movzx eax, word [esi + NVM_QUEUE_ENTRY.tail] movzx ecx, word [esi + NVM_QUEUE_ENTRY.head] stdcall is_queue_full, eax, ecx test eax, eax jnz .end movzx ecx, word [esi + NVM_QUEUE_ENTRY.head] .loop: cmp cx, word [esi + NVM_QUEUE_ENTRY.tail] je .end mov edx, ecx imul edx, sizeof.CQ_ENTRY mov ax, word [edi + edx + CQ_ENTRY.status] DEBUGF DBG_INFO, "Status: 0x%x\n", ax inc cx push ecx stdcall cqyhdbl_write, [pci], [queue], ecx pop ecx jmp .loop .end: pop edi esi xor eax, eax ret endp proc irq_handler push esi edi mov esi, dword [p_nvme_devices] ; check if the NVMe device generated an interrupt invoke PciRead16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.status test al, 1000b ; check interrupt status jz .not_our_irq mov edi, dword [esi + pcidev.io_addr] mov dword [edi + NVME_MMIO.INTMS], 0x3 xor ecx, ecx @@: push ecx stdcall consume_cq_entries, [p_nvme_devices], ecx pop ecx inc ecx cmp ecx, LAST_QUEUE_ID jng @b ; Interrupt handled by driver, return 1 mov dword [edi + NVME_MMIO.INTMC], 0x3 pop edi esi xor eax, eax inc eax ret .not_our_irq: ; Interrupt not handled by driver, return 0 pop edi esi xor eax, eax ret endp proc nvme_cleanup DEBUGF DBG_INFO, "(NVMe): Cleaning up...\n" mov ecx, dword [pcidevs_len] mov eax, dword [p_nvme_devices] test eax, eax jnz .loop ret .loop: ;invoke KernelFree, dword [p_nvme_devices + ecx * sizeof.pcidev + pcidev.ident_ptr] dec ecx test ecx, ecx jnz .loop invoke KernelFree, dword [p_nvme_devices] @@: ret endp ;all initialized data place here align 4 p_nvme_devices dd 0 pcidevs_len dd 0 my_service db "NVMe",0 ;max 16 chars include zero disk_functions: dd disk_functions.end - disk_functions dd 0 ; no close function dd 0 ; no closemedia function dd nvme_query_media dd nvme_read dd 0 ; no write function (for now) dd 0 ; no flush function dd 0 ; use default cache size .end: if __DEBUG__ include_debug_strings end if align 4 data fixups end data