;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; Copyright (C) KolibriOS team 2004-2024. All rights reserved. ;; ;; Distributed under terms of the GNU General Public License ;; ;; ;; ;; GNU GENERAL PUBLIC LICENSE ;; ;; Version 2, June 1991 ;; ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; format PE DLL native entry START API_VERSION equ 0 ;debug SRV_GETVERSION equ 0 __DEBUG__ = 1 __DEBUG_LEVEL__ = 1 DRIVER_VERSION = 1 DBG_INFO = 1 section ".flat" code readable writable executable include "../proc32.inc" include "../struct.inc" include "../macros.inc" include "../fdo.inc" include "../pci.inc" include "../peimport.inc" include "nvme.inc" include "macros.inc" proc START c, reason:dword cmp [reason], DRV_ENTRY jne .err .entry: DEBUGF DBG_INFO, "Detecting NVMe hardware...\n" call detect_nvme test eax, eax jz .err mov eax, dword [p_nvme_devices] test eax, eax jz .err xor ecx, ecx .loop: mov ebx, dword [p_nvme_devices] stdcall device_is_compat, ebx test eax, eax jz @f stdcall nvme_init, ebx ;@@: ;inc ecx ;cmp ecx, dword [pcidevs_len] ;jne .loop invoke RegService, my_service, service_proc ret .err: call nvme_cleanup xor eax, eax ret endp proc service_proc stdcall, ioctl:dword mov ebx, [ioctl] mov eax, [ebx+IOCTL.io_code] cmp eax, SRV_GETVERSION jne @F mov eax, [ebx+IOCTL.output] cmp [ebx+IOCTL.out_size], 4 jne @F mov dword [eax], API_VERSION xor eax, eax ret @@: or eax, -1 ret endp proc memset stdcall, p_data:dword, val:byte, sz:dword push ebx ecx mov eax, [p_data] mov ecx, [sz] mov bl, [val] @@: mov byte [eax + ecx], bl dec ecx test ecx, ecx jnz @b pop ecx ebx ret endp proc set_cdw0 stdcall, opcode:byte, cid:word movzx eax, [cid] shl eax, 16 or eax, [opcode] ret endp ; See pages 161-205 of the NVMe 1.4 specification for reference proc nvme_identify stdcall, pci:dword, nsid:dword, dptr:dword, cid:word, cns:byte push esi mov esi, [pci] ; It's important to check if CNS is a valid value here. In revision 1.0 ; CNS is a 1 bit field and a two bit field in revision 1.1, using invalid ; values results in undefined behavior (see page 162 of NVMe 1.4 spec) if __DEBUG__ push esi mov esi, dword [esi + pcidev.io_addr] mov eax, dword [esi + NVME_MMIO.VS] cmp eax, VS110 jne @f cmp [cns], 11b jle .ok DEBUGF DBG_INFO, "(NVMe) FATAL ERROR: INVALID CNS VALUE ON v1.1.0 CONTROLLERS\n" jmp .err @@: cmp eax, VS100 jne .ok cmp [cns], 1b jle .ok DEBUGF DBG_INFO, "(NVMe) FATAL ERROR: INVALID CNS VALUE ON v1.0.0 CONTROLLERS\n" jmp .err .err: jmp @b .ok: pop esi end if mov esi, [esi + pcidev.sq_ptr] stdcall memset, esi, 0, sizeof.SQ_ENTRY mov eax, [nsid] mov dword [esi + SQ_ENTRY.nsid], eax mov eax, [dptr] mov dword [esi + SQ_ENTRY.dptr], eax movzx eax, [cid] shl eax, 16 mov dword [esi + SQ_ENTRY.cdw0], ADM_CMD_IDENTIFY or dword [esi + SQ_ENTRY.cdw0], eax mov al, [cns] mov byte [esi + SQ_ENTRY.cdw10], al stdcall write_admin_cmd, [pci] pop esi ret endp ; See pages 348-349 of the NVMe 1.4 specification for information on creating namespaces proc create_namespace stdcall, pci:dword, cid:word push esi invoke AllocPage test eax, eax jz .fail invoke GetPhysAddr stdcall nvme_identify, [pci], 0xffffffff, eax, [cid], CNS_IDNS .fail: pop esi ret endp ; See page 101 of the NVMe 1.4 specification for reference proc create_io_completion_queue stdcall, pci:dword, prp1:dword, qid:word, ien:byte push esi ebx mov esi, [pci] xor ebx, ebx movzx eax, [ien] and eax, 10b or ebx, eax movzx eax, byte [esi + pcidev.pc] and eax, 0x1 or ebx, eax ; CDW.PC mov esi, [esi + pcidev.sq_ptr] stdcall memset, esi, 0, sizeof.SQ_ENTRY stdcall set_cdw0, ADM_CMD_CRE_IO_COMPLETION_QUEUE, 0 ; [TODO: Set CID to valid value] mov dword [esi + SQ_ENTRY.cdw0], eax ; Since we are not using MSI-X or MSI vector (yet), CDW11.IV must be set to 0 mov dword [esi + SQ_ENTRY.cdw11], ebx mov bx, [qid] ; CDW10.QID or ebx, (sizeof.CQ_ENTRY shl 16) ; CDW10.QSIZE mov dword [esi + SQ_ENTRY.cdw10], ebx mov ebx, [prp1] mov dword [esi + SQ_ENTRY.dptr], ebx stdcall sqytdbl_write, [pci], [qid], 0 ; setting last param to 0 for now, change later pop ebx esi ret endp ; See page 103-104 of the NVMe 1.4 specification for reference proc create_io_submission_queue stdcall, pci:dword, prp1:dword, qid:word, cqid:word push esi ebx mov esi, [pci] movzx ebx, byte [esi + pcidev.pc] and ebx, 0x1 ; CDW11.PC mov esi, [esi + pcidev.sq_ptr] stdcall memset, esi, 0, sizeof.SQ_ENTRY stdcall set_cdw0, ADM_CMD_CRE_IO_SUBMISSION_QUEUE, 0 ; [TODO: Set CID to valid value] mov dword [esi + SQ_ENTRY.cdw0], eax movzx eax, [cqid] shl eax, 16 or ebx, eax ; CDW11.CQID ; TODO: Set CDW10.QPRIO mov dword [esi + SQ_ENTRY.cdw11], ebx movzx ebx, sizeof.SQ_ENTRY shl ebx, 16 ; CDW10.QSIZE or ebx, [qid] ; CDW10.QID mov dword [esi + SQ_ENTRY.cdw10], ebx mov ebx, [prp1] mov dword [esi + SQ_ENTRY.dptr], ebx stdcall sqytdbl_write, [pci], [qid], 0 ; setting last param to 0 for now, change later pop ebx esi ret endp ; See page 95-96 of the NVMe 1.4 specification for reference proc abort stdcall, pci:dword, cid:word, sqid:word push esi mov esi, [pci] stdcall memset, esi, 0, sizeof.SQ_ENTRY stdcall set_cdw0, ADM_CMD_ABORT, 0 ; [TODO: Set CID to valid value] mov dword [esi + SQ_ENTRY.cdw0], eax mov eax, [cid] shl eax, 16 or eax, [sqid] mov dword [esi + SQ_ENTRY.cdw10], eax stdcall sqytdbl_write, [pci], 0, 0 pop esi ret endp ; See page 205 of the NVMe 1.4 specification for reference proc set_features stdcall, pci:dword, dptr:dword, fid:byte push esi mov esi, [pci] mov esi, [esi + pcidev.sq_ptr] stdcall memset, esi, 0, sizeof.SQ_ENTRY stdcall set_cdw0, AMD_CMD_SET_FEATURES, 0 ; [TODO: Set CID to valid value] mov eax, [dptr] mov dword [esi + SQ_ENTRY.dptr], eax mov al, [fid] mov byte [esi + SQ_ENTRY.cdw10], al ; TODO: Set CDW10.SV and CDW14.UUID to valid value stdcall sqytdbl_write, [pci], 0, 0 pop esi ret endp ; See page 105 of the NVMe 1.4 specification for reference proc delete_io_completion_queue stdcall, pci:dword, qid:word push esi mov esi, [pci] mov esi, [esi + pcidev.sq_ptr] stdcall memset, esi, 0, sizeof.SQ_ENTRY stdcall set_cdw0, ADM_CMD_DEL_IO_COMPLETION_QUEUE, 0 ; [TODO: Set CID to valid value] mov dword [esi + SQ_ENTRY.cdw0], eax mov ax, [qid] mov word [esi + SQ_ENTRY.cdw10], ax stdcall sqytdbl_write, [pci], [qid], 0 ; setting last param to 0 for now, change later pop esi ret endp ; See page 114-116 of the NVMe 1.4 specification for reference proc get_features stdcall, pci:dword, dptr:dword, sel:byte, fid:byte push esi ebx mov esi, [pci] mov esi, [esi + pcidev.sq_ptr] stdcall memset, esi, 0, sizeof.SQ_ENTRY stdcall set_cdw0, ADM_CMD_GET_FEATURES, 0 ; [TODO: Set CID to valid value] mov dword [esi + SQ_ENTRY.cdw0], eax movzx eax, [fid] ; CDW10.FID movzx ebx, [sel] and ebx, 111b shl ebx, 8 or eax, ebx ; CDW10.SEL mov dword [esi + SQ_ENTRY.cdw10], eax mov eax, [dptr] mov dword [esi + SQ_ENTRY.dptr], eax ; TODO: Implement CDW14.UUID? stdcall sqytdbl_write, [pci], 0, 0 pop ebx esi ret endp ; See page 105-106 of the NVMe 1.4 specification for reference proc delete_io_submission_queue stdcall, pci:dword, qid:word push esi mov esi, [pci] mov esi, [esi + pcidev.sq_ptr] stdcall memset, esi, 0, sizeof.SQ_ENTRY stdcall set_cdw0, ADM_CMD_DEL_IO_SUBMISSION_QUEUE, 0 ; [TODO: Set CID to valid value] mov dword [esi + SQ_ENTRY.cdw0], eax mov ax, [qid] mov word [esi + SQ_ENTRY.cdw10], ax stdcall sqytdbl_write, [pci], [qid], 0 ; setting last param to 0 for now, change later pop esi ret endp ; See page 117-118 of the NVMe 1.4 specification for reference ; INCOMPLETE proc get_log_page stdcall, pci:dword, dptr:dword, lid:byte push esi mov esi, [pci] mov esi, [esi + pcidev.sq_ptr] stdcall memset, esi, 0, sizeof.SQ_ENTRY stdcall set_cdw0, ADM_CMD_GET_LOG_PAGE, 0 ; [TODO: Set CID to valid value] mov dword [esi + SQ_ENTRY.cdw0], eax mov eax, [dptr] mov dword [esi + SQ_ENTRY.dptr], eax pop esi ret endp proc detect_nvme invoke GetPCIList mov edx, eax .check_dev: mov ebx, dword [eax + PCIDEV.class] and ebx, 0x00ffff00 ; retrieve class/subclass code only cmp ebx, 0x00010800 ; Mass Storage Controller - Non-Volatile Memory Controller je .found_dev .next_dev: mov eax, dword [eax + PCIDEV.fd] cmp eax, edx jne .check_dev jmp .exit_success .found_dev: push edx eax PDEBUGF DBG_INFO, "PCI(%u.%u.%u): Detected NVMe device...\n", byte [eax + PCIDEV.bus], byte [eax + PCIDEV.devfn] cmp dword [pcidevs_len], TOTAL_PCIDEVS jne @f pop eax edx jmp .exit_success @@: inc dword [pcidevs_len] mov ebx, dword [p_nvme_devices] test ebx, ebx jnz @f invoke KernelAlloc, sizeof.pcidev test eax, eax jz .err_no_mem mov dword [p_nvme_devices], eax DEBUGF DBG_INFO, "(NVMe) Allocated pcidev struct at 0x%x\n", [p_nvme_devices] @@: mov ecx, dword [pcidevs_len] dec ecx pop eax mov ebx, dword [p_nvme_devices] movzx edx, byte [eax + PCIDEV.bus] mov byte [ebx + pcidev.bus], dl movzx edx, byte [eax + PCIDEV.devfn] mov byte [ebx + pcidev.devfn], dl pop edx jmp .next_dev .err_no_mem: pop eax edx xor eax, eax ret .exit_success: xor eax, eax inc eax ret endp proc device_is_compat stdcall, pci:dword push esi edx ecx mov esi, [pci] invoke PciRead8, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.interrupt_pin test eax, eax jz .failure mov byte [esi + pcidev.ipin], al ;invoke PciRead16, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header.command ;test eax, eax ;jz .failure ;DEBUGF DBG_INFO, "(NVMe) CMD: %x\n", eax invoke PciRead8, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.interrupt_line mov byte [esi + pcidev.iline], al invoke PciRead32, dword [esi + pcidev.bus], dword [esi + pcidev.devfn], PCI_header00.base_addr_0 and eax, 0xfffffff0 test eax, eax jz .failure mov edx, eax push edx invoke MapIoMem, eax, sizeof.NVME_MMIO, PG_SW+PG_NOCACHE test eax, eax jz .failure ;DEBUGF DBG_INFO, "(NVMe) MMIO allocated at: 0x%x\n", eax mov dword [esi + pcidev.io_addr], eax mov eax, dword [eax + NVME_MMIO.CAP + 4] and eax, CAP_DSTRD ; Stride is (2 ^ (2 + DSTRD)) bytes add al, 2 stdcall pow2, eax mov ecx, eax mov byte [esi + pcidev.dstrd], al ; 1003h + ((2y + 1) * (4 << CAP.DSTRD)) mov eax, 4 shl ax, cl mov ecx, NVM_ASQS shl ecx, 1 inc ecx imul ecx, eax add ecx, 0x1003 pop edx invoke MapIoMem, edx, ecx, PG_SW+PG_NOCACHE mov dword [esi + pcidev.io_addr], eax mov eax, dword [eax + NVME_MMIO.VS] DEBUGF DBG_INFO, "(NVMe) Controller version: 0x%x\n", eax pop ecx edx esi xor eax, eax inc eax ret .failure: PDEBUGF DBG_INFO, "PCI(%u.%u.%u): something went wrong checking NVMe device compatibility\n", byte [esi + pcidev.bus], byte [esi + pcidev.devfn] pop ecx edx esi xor eax, eax ret endp ; nvme_init: Initializes the NVMe controller proc nvme_init stdcall, pci:dword push ebx esi edi mov esi, dword [pci] mov edi, dword [esi + pcidev.io_addr] if 0 mov eax, dword [edi + NVME_MMIO.CAP] DEBUGF DBG_INFO, "(NVMe) CAP (0-31): 0x%x\n", eax mov eax, dword [edi + NVME_MMIO.CAP + 4] DEBUGF DBG_INFO, "(NVMe) CAP (32-63): 0x%x\n", eax mov eax, dword [edi + NVME_MMIO.CC] DEBUGF DBG_INFO, "(NVMe) CC: 0x%x\n", eax mov eax, dword [edi + NVME_MMIO.CSTS] DEBUGF DBG_INFO, "(NVMe) CSTS: 0x%x\n", eax end if mov eax, dword [edi + NVME_MMIO.CAP] test eax, CAP_CQR jz .cqr_not_req .cqr_not_req: ; For some reason, bit 7 (No I/O command set supported) is also set to 1 despite bit 0 (NVM command set) ; being set to 1.. so I am not sure if bit 7 should be checked at all.. investigate later. mov eax, dword [edi + NVME_MMIO.CAP + 4] test eax, CAP_CSS_NVM_CMDSET jz .exit_fail ; Reset controller before we configure it stdcall nvme_controller_reset, edi mov eax, dword [edi + NVME_MMIO.CAP + 4] and eax, CAP_MPSMIN shr eax, 16 cmp eax, NVM_MPS jg .exit_fail mov eax, dword [edi + NVME_MMIO.CAP + 4] and eax, CAP_MPSMAX shr eax, 20 cmp eax, NVM_MPS jl .exit_fail ; Configure AMS, MPS, CSS mov eax, dword [edi + NVME_MMIO.CC] and eax, not (CC_AMS or CC_MPS or CC_CSS) or eax, 111b shl 4 ; Admin Command Set Only (temporary) and dword [edi + NVME_MMIO.CC], eax ; Configure Admin Queue Attributes mov eax, dword [edi + NVME_MMIO.AQA] and eax, not (AQA_ASQS or AQA_ACQS) or eax, NVM_ASQS or (NVM_ACQS shl 16) mov dword [edi + NVME_MMIO.AQA], eax ; Configure Admin Submission/Completion Queue Base Address invoke KernelAlloc, 0x1000 test eax, eax jz .exit_fail mov dword [esi + pcidev.sq_ptr], eax invoke GetPhysAddr mov dword [edi + NVME_MMIO.ASQ], eax and dword [edi + NVME_MMIO.ASQ + 4], 0 invoke KernelAlloc, 0x1000 test eax, eax jz .exit_fail mov dword [esi + pcidev.cq_ptr], eax invoke GetPhysAddr mov dword [edi + NVME_MMIO.ACQ], eax and dword [edi + NVME_MMIO.ACQ + 4], 0 stdcall memset, dword [esi + pcidev.sq_ptr], 0, sizeof.SQ_ENTRY * NVM_ASQS stdcall memset, dword [esi + pcidev.cq_ptr], 0, sizeof.CQ_ENTRY * NVM_ACQS ; Allocate list of queues invoke KernelAlloc, sizeof.NVM_QUEUE * NVM_ASQS test eax, eax jz .exit_fail mov dword [esi + pcidev.queue_ptr], eax stdcall memset, eax, 0, sizeof.NVM_QUEUE * NVM_ASQS ; we want to disable all interrupts for now, since the controller randomly ; generates interrupts while starting up mov dword [edi + NVME_MMIO.INTMS], 0xffffffff ; Attach interrupt handler movzx eax, byte [esi + pcidev.iline] DEBUGF DBG_INFO, "(NVMe) Attaching interrupt handler to IRQ %u\n", eax invoke AttachIntHandler, eax, irq_handler, 0 test eax, eax jz .exit_fail DEBUGF DBG_INFO, "(NVMe) Successfully attached interrupt handler\n" ; Restart the controller stdcall nvme_controller_start, edi mov dword [edi + NVME_MMIO.INTMC], 0xffffffff ; re-enable interrupts invoke KernelAlloc, 0x1000 test eax, eax jz .exit_fail mov dword [dptr], eax invoke GetPhysAddr ; pci:dword, nsid:dword, dptr:dword, cid:word, cns:byte stdcall nvme_identify, [pci], 0, eax, 2, CNS_IDCS xor eax, eax inc eax pop edi esi ebx ret .exit_fail: DEBUGF DBG_INFO, "(NVMe) failed to initialize controller\n" xor eax, eax pop edi esi ebx ret endp proc nvme_controller_reset stdcall, mmio:dword DEBUGF DBG_INFO, "(NVMe) Resetting Controller...\n" push edi mov edi, dword [mmio] and dword [edi + NVME_MMIO.CC], 0xfffffffe ; CC.EN = 0 stdcall nvme_wait, [mmio] ; Wait for controller to be brought to idle state, CSTS.RDY should be cleared to 0 when this happens .wait: test dword [edi + NVME_MMIO.CSTS], CSTS_RDY jnz .wait DEBUGF DBG_INFO, "(NVMe) Successfully reset controller...\n" pop edi ret endp proc nvme_controller_start stdcall, mmio:dword DEBUGF DBG_INFO, "(NVMe) Starting Controller...\n" push edi mov edi, dword [mmio] or dword [edi + NVME_MMIO.CC], 1 ;; CC.EN = 1 stdcall nvme_wait, [mmio] ; Wait for controller to be brought into active state, CSTS.RDY should be set to 1 when this happens .wait: test dword [edi + NVME_MMIO.CSTS], CSTS_RDY jz .wait DEBUGF DBG_INFO, "(NVMe) Successfully started controller...\n" pop edi ret endp ; Should be called only after the value of CC.EN has changed proc nvme_wait stdcall, mmio:dword push esi mov esi, [mmio] mov esi, dword [esi + NVME_MMIO.CAP] and esi, CAP_TO shr esi, 24 imul esi, 100 ; TODO: bad time delay, set to appropriate value later invoke Sleep pop esi ret endp ; Writes to submission queue 'y' tail doorbell proc sqytdbl_write stdcall, pci:dword, y:byte, sqt:word push ebx esi mov esi, [pci] ; 1000h + (2y * (4 << CAP.DSTRD)) mov ecx, dword [esi + pcidev.dstrd] mov eax, 4 shl eax, cl mov cl, [y] xor ebx, ebx shl ebx, cl imul ebx, eax add ebx, 0x1000 DEBUGF DBG_INFO, "(NVMe) Writing to submission queue 0x%x doorbell register\n", ebx mov esi, [esi + pcidev.io_addr] mov ax, [sqt] mov word [esi + ebx], ax ; Write to register pop esi ebx ret endp ; Writes to completion queue 'y' head doorbell proc cqyhdbl_write stdcall, pci:dword, y:byte push esi edi mov esi, [pci] ; 1000h + ((2y + 1) * (4 << CAP.DSTRD)) movzx eax, [y] shl al, 1 inc al mov dx, 4 mov cl, byte [esi + pcidev.dstrd] shl dx, cl imul dx, ax add dx, 0x1000 movzx ecx, [y] mov edi, dword [esi + pcidev.queue_ptr] mov ax, word [edi + ecx * sizeof.NVM_QUEUE + NVM_QUEUE.head] ; get head for completion queue Y cmp ax, NVM_ACQS jl @f xor ax, ax @@: inc ax mov esi, dword [esi + pcidev.io_addr] DEBUGF DBG_INFO, "(NVMe) Writing to completion queue doorbell register 0x%x: %u\n", dx, ax mov word [esi + edx], ax ; Write to CQyHDBL mov word [edi + ecx * sizeof.NVM_QUEUE + NVM_QUEUE.head], ax pop edi esi ret endp proc write_admin_cmd stdcall, pci:dword push esi mov esi, [pci] mov esi, dword [esi + pcidev.queue_ptr] mov ax, word [esi + NVM_QUEUE.tail] cmp ax, NVM_ASQS jl @f xor ax, ax @@: mov esi, [pci] mov esi, dword [esi + pcidev.io_addr] inc ax mov word [esi + 0x1000], ax mov word [esi + NVM_QUEUE.tail], ax pop esi ret endp ; Calculates 2^x proc pow2 stdcall, x:byte push ecx mov cl, [x] xor eax, eax inc eax test cl, cl jnz @f pop ecx ret @@: shl eax, cl pop ecx ret endp proc irq_handler push esi edi mov esi, dword [p_nvme_devices] mov edi, esi mov esi, dword [esi + pcidev.io_addr] mov edi, dword [edi + pcidev.cq_ptr] mov dword [esi + NVME_MMIO.INTMS], 0x1 mov ax, word [edi + CQ_ENTRY.status] and ax, not CQ_PHASE_TAG ; ignore phase tag bit DEBUGF DBG_INFO, "(NVMe) Status: %x\n", ax test al, al ; check status code (0 on success) jz @f ; error occurred ; we have to initiate a controller reset if a admin command encounters ; a fatal error or if a completion is not received for a deletion ; of a submission or completion queue (section 10.1 - page 400 of NVMe 1.4 spec) stdcall nvme_controller_reset, esi stdcall nvme_controller_start, esi jmp .exit @@: mov dword [esi + NVME_MMIO.INTMC], 0x1 stdcall cqyhdbl_write, [p_nvme_devices], 0 .exit: ; Interrupt handled by driver, return 1 xor eax, eax inc eax pop edi esi ret endp proc nvme_cleanup DEBUGF DBG_INFO, "(NVMe): Cleaning up...\n" mov ecx, dword [pcidevs_len] mov eax, dword [p_nvme_devices] test eax, eax jnz .loop ret .loop: ;invoke KernelFree, dword [p_nvme_devices + ecx * sizeof.pcidev + pcidev.ident_ptr] dec ecx test ecx, ecx jnz .loop invoke KernelFree, dword [p_nvme_devices] @@: ret endp ;all initialized data place here align 4 p_nvme_devices dd 0 pcidevs_len dd 0 dptr dd ? my_service db "NVMe",0 ;max 16 chars include zero if __DEBUG__ include_debug_strings end if align 4 data fixups end data