diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index 572fb96d1d..9ab027e8d2 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -68,6 +68,10 @@ struct spdk_nvme_ctrlr_opts { * Number of I/O queues to request (used to set Number of Queues feature) */ uint32_t num_io_queues; + /** + * Enable submission queue in controller memory buffer + */ + bool use_cmb_sqs; }; /** diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 80d936b7c8..a7d4422f34 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -42,6 +42,7 @@ void spdk_nvme_ctrlr_opts_set_defaults(struct spdk_nvme_ctrlr_opts *opts) { opts->num_io_queues = DEFAULT_MAX_IO_QUEUES; + opts->use_cmb_sqs = false; } static int @@ -918,6 +919,95 @@ nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr) return 0; } +static void +nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr) +{ + int rc; + void *addr; + uint32_t bir; + union spdk_nvme_cmbsz_register cmbsz; + union spdk_nvme_cmbloc_register cmbloc; + uint64_t size, unit_size, offset, bar_size, bar_phys_addr; + + cmbsz.raw = nvme_mmio_read_4(ctrlr, cmbsz.raw); + cmbloc.raw = nvme_mmio_read_4(ctrlr, cmbloc.raw); + if (!cmbsz.bits.sz) + goto exit; + + bir = cmbloc.bits.bir; + /* Values 0 2 3 4 5 are valid for BAR */ + if (bir > 5 || bir == 1) + goto exit; + + /* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */ + unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu); + /* controller memory buffer size in Bytes */ + size = unit_size * cmbsz.bits.sz; + /* controller memory buffer offset from BAR in Bytes */ + offset = unit_size * cmbloc.bits.ofst; + + nvme_pcicfg_get_bar_addr_len(ctrlr->devhandle, bir, &bar_phys_addr, &bar_size); + + if (offset > bar_size) + goto exit; + + if (size > bar_size - offset) + goto exit; + + rc = nvme_pcicfg_map_bar_write_combine(ctrlr->devhandle, bir, &addr); + if (addr == NULL || (rc != 0)) + goto exit; + + ctrlr->cmb_bar_virt_addr = addr; + ctrlr->cmb_bar_phys_addr = bar_phys_addr; + ctrlr->cmb_size = size; + ctrlr->cmb_current_offset = offset; + + if (cmbsz.bits.sqs) { + ctrlr->flags |= SPDK_NVME_CTRLR_CMB_SQ_SUPPORTED; + } else { + ctrlr->opts.use_cmb_sqs = false; + } + + return; +exit: + ctrlr->cmb_bar_virt_addr = NULL; + ctrlr->opts.use_cmb_sqs = false; + return; +} + +static int +nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr) +{ + int rc = 0; + union spdk_nvme_cmbloc_register cmbloc; + void *addr = ctrlr->cmb_bar_virt_addr; + + if (addr) { + cmbloc.raw = nvme_mmio_read_4(ctrlr, cmbloc.raw); + rc = nvme_pcicfg_unmap_bar(ctrlr->devhandle, cmbloc.bits.bir, addr); + } + return rc; +} + +int +nvme_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t length, uint64_t aligned, + uint64_t *offset) +{ + uint64_t round_offset; + + round_offset = ctrlr->cmb_current_offset; + round_offset = (round_offset + (aligned - 1)) & ~(aligned - 1); + + if (round_offset + length > ctrlr->cmb_size) + return -1; + + *offset = round_offset; + ctrlr->cmb_current_offset = round_offset + length; + + return 0; +} + static int nvme_ctrlr_allocate_bars(struct spdk_nvme_ctrlr *ctrlr) { @@ -931,6 +1021,8 @@ nvme_ctrlr_allocate_bars(struct spdk_nvme_ctrlr *ctrlr) return -1; } + nvme_ctrlr_map_cmb(ctrlr); + return 0; } @@ -940,6 +1032,12 @@ nvme_ctrlr_free_bars(struct spdk_nvme_ctrlr *ctrlr) int rc = 0; void *addr = (void *)ctrlr->regs; + rc = nvme_ctrlr_unmap_cmb(ctrlr); + if (rc != 0) { + nvme_printf(ctrlr, "nvme_ctrlr_unmap_cmb failed with error code %d\n", rc); + return -1; + } + if (addr) { rc = nvme_pcicfg_unmap_bar(ctrlr->devhandle, 0, addr); } @@ -956,6 +1054,7 @@ nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); ctrlr->devhandle = devhandle; + ctrlr->flags = 0; status = nvme_ctrlr_allocate_bars(ctrlr); if (status != 0) { @@ -981,7 +1080,6 @@ nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr, void *devhandle) ctrlr->is_resetting = false; ctrlr->is_failed = false; - ctrlr->flags = 0; TAILQ_INIT(&ctrlr->free_io_qpairs); TAILQ_INIT(&ctrlr->active_io_qpairs); diff --git a/lib/nvme/nvme_impl.h b/lib/nvme/nvme_impl.h index 79e219e543..bb78a21e99 100644 --- a/lib/nvme/nvme_impl.h +++ b/lib/nvme/nvme_impl.h @@ -184,6 +184,16 @@ nvme_pcicfg_map_bar(void *devhandle, uint32_t bar, uint32_t read_only, void **ma flags, mapped_addr); } +static inline int +nvme_pcicfg_map_bar_write_combine(void *devhandle, uint32_t bar, void **mapped_addr) +{ + struct pci_device *dev = devhandle; + uint32_t flags = PCI_DEV_MAP_FLAG_WRITABLE | PCI_DEV_MAP_FLAG_WRITE_COMBINE; + + return pci_device_map_range(dev, dev->regions[bar].base_addr, dev->regions[bar].size, + flags, mapped_addr); +} + static inline int nvme_pcicfg_unmap_bar(void *devhandle, uint32_t bar, void *addr) { @@ -192,6 +202,15 @@ nvme_pcicfg_unmap_bar(void *devhandle, uint32_t bar, void *addr) return pci_device_unmap_range(dev, addr, dev->regions[bar].size); } +static inline void +nvme_pcicfg_get_bar_addr_len(void *devhandle, uint32_t bar, uint64_t *addr, uint64_t *size) +{ + struct pci_device *dev = devhandle; + + *addr = (uint64_t)dev->regions[bar].base_addr; + *size = (uint64_t)dev->regions[bar].size; +} + #else /* !USE_PCIACCESS */ static inline int @@ -203,12 +222,28 @@ nvme_pcicfg_map_bar(void *devhandle, uint32_t bar, uint32_t read_only, void **ma return 0; } +static inline int +nvme_pcicfg_map_bar_write_combine(void *devhandle, uint32_t bar, void **mapped_addr) +{ + nvme_printf(NULL, "DPDK cannot support write combine now\n"); + return -1; +} + static inline int nvme_pcicfg_unmap_bar(void *devhandle, uint32_t bar, void *addr) { return 0; } +static inline void +nvme_pcicfg_get_bar_addr_len(void *devhandle, uint32_t bar, uint64_t *addr, uint64_t *size) +{ + struct rte_pci_device *dev = devhandle; + + *addr = (uint64_t)dev->mem_resource[bar].phys_addr; + *size = (uint64_t)dev->mem_resource[bar].len; +} + /* * TODO: once DPDK supports matching class code instead of device ID, switch to SPDK_PCI_CLASS_NVME */ diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index 379af9ec3b..62eb5da75e 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -138,7 +138,8 @@ enum nvme_payload_type { * Controller support flags. */ enum spdk_nvme_ctrlr_flags { - SPDK_NVME_CTRLR_SGL_SUPPORTED = 0x1, /**< The SGL is supported */ + SPDK_NVME_CTRLR_SGL_SUPPORTED = 0x1, /**< The SGL is supported */ + SPDK_NVME_CTRLR_CMB_SQ_SUPPORTED = 0x2, /**< The submission queue in controller memory buffer is supported */ }; /** @@ -304,6 +305,7 @@ struct spdk_nvme_qpair { uint8_t phase; bool is_enabled; + bool sq_in_cmb; /* * Fields below this point should not be touched on the normal I/O happy path. @@ -440,6 +442,15 @@ struct spdk_nvme_ctrlr { TAILQ_HEAD(, spdk_nvme_qpair) active_io_qpairs; struct spdk_nvme_ctrlr_opts opts; + + /** BAR mapping address which contains controller memory buffer */ + void *cmb_bar_virt_addr; + /** BAR physical address which contains controller memory buffer */ + uint64_t cmb_bar_phys_addr; + /** Controller memory buffer size in Bytes */ + uint64_t cmb_size; + /** Current offset of controller memory buffer */ + uint64_t cmb_current_offset; }; struct nvme_driver { @@ -538,7 +549,8 @@ int nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr); int nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, struct nvme_request *req); - +int nvme_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t length, uint64_t aligned, + uint64_t *offset); int nvme_qpair_construct(struct spdk_nvme_qpair *qpair, uint16_t id, uint16_t num_entries, uint16_t num_trackers, diff --git a/lib/nvme/nvme_qpair.c b/lib/nvme/nvme_qpair.c index c09d55aaa3..663dfa44d2 100644 --- a/lib/nvme/nvme_qpair.c +++ b/lib/nvme/nvme_qpair.c @@ -482,6 +482,7 @@ nvme_qpair_construct(struct spdk_nvme_qpair *qpair, uint16_t id, uint16_t i; volatile uint32_t *doorbell_base; uint64_t phys_addr = 0; + uint64_t offset; nvme_assert(num_entries != 0, ("invalid num_entries\n")); nvme_assert(num_trackers != 0, ("invalid num_trackers\n")); @@ -489,18 +490,30 @@ nvme_qpair_construct(struct spdk_nvme_qpair *qpair, uint16_t id, qpair->id = id; qpair->num_entries = num_entries; qpair->qprio = 0; + qpair->sq_in_cmb = false; qpair->ctrlr = ctrlr; /* cmd and cpl rings must be aligned on 4KB boundaries. */ - qpair->cmd = nvme_malloc("qpair_cmd", - qpair->num_entries * sizeof(struct spdk_nvme_cmd), - 0x1000, - &qpair->cmd_bus_addr); - if (qpair->cmd == NULL) { - nvme_printf(ctrlr, "alloc qpair_cmd failed\n"); - goto fail; + if (ctrlr->opts.use_cmb_sqs) { + if (nvme_ctrlr_alloc_cmb(ctrlr, qpair->num_entries * sizeof(struct spdk_nvme_cmd), + 0x1000, &offset) == 0) { + qpair->cmd = ctrlr->cmb_bar_virt_addr + offset; + qpair->cmd_bus_addr = ctrlr->cmb_bar_phys_addr + offset; + qpair->sq_in_cmb = true; + } } + if (qpair->sq_in_cmb == false) { + qpair->cmd = nvme_malloc("qpair_cmd", + qpair->num_entries * sizeof(struct spdk_nvme_cmd), + 0x1000, + &qpair->cmd_bus_addr); + if (qpair->cmd == NULL) { + nvme_printf(ctrlr, "alloc qpair_cmd failed\n"); + goto fail; + } + } + qpair->cpl = nvme_malloc("qpair_cpl", qpair->num_entries * sizeof(struct spdk_nvme_cpl), 0x1000, @@ -575,7 +588,7 @@ nvme_qpair_destroy(struct spdk_nvme_qpair *qpair) if (nvme_qpair_is_admin_queue(qpair)) { _nvme_admin_qpair_destroy(qpair); } - if (qpair->cmd) + if (qpair->cmd && !qpair->sq_in_cmb) nvme_free(qpair->cmd); if (qpair->cpl) nvme_free(qpair->cpl); diff --git a/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c b/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c index 55f08a0858..862fa09e7e 100644 --- a/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c +++ b/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c @@ -640,6 +640,34 @@ test_nvme_ctrlr_set_supported_features(void) CU_ASSERT(res == true); } +static void +test_nvme_ctrlr_alloc_cmb(void) +{ + int rc; + uint64_t offset; + struct spdk_nvme_ctrlr ctrlr = {}; + + ctrlr.cmb_size = 0x1000000; + ctrlr.cmb_current_offset = 0x100; + rc = nvme_ctrlr_alloc_cmb(&ctrlr, 0x200, 0x1000, &offset); + CU_ASSERT(rc == 0); + CU_ASSERT(offset == 0x1000); + CU_ASSERT(ctrlr.cmb_current_offset == 0x1200); + + rc = nvme_ctrlr_alloc_cmb(&ctrlr, 0x800, 0x1000, &offset); + CU_ASSERT(rc == 0); + CU_ASSERT(offset == 0x2000); + CU_ASSERT(ctrlr.cmb_current_offset == 0x2800); + + rc = nvme_ctrlr_alloc_cmb(&ctrlr, 0x800000, 0x100000, &offset); + CU_ASSERT(rc == 0); + CU_ASSERT(offset == 0x100000); + CU_ASSERT(ctrlr.cmb_current_offset == 0x900000); + + rc = nvme_ctrlr_alloc_cmb(&ctrlr, 0x8000000, 0x1000, &offset); + CU_ASSERT(rc == -1); +} + int main(int argc, char **argv) { CU_pSuite suite = NULL; @@ -671,6 +699,8 @@ int main(int argc, char **argv) test_nvme_ctrlr_construct_intel_support_log_page_list) == NULL || CU_add_test(suite, "test nvme ctrlr function nvme_ctrlr_set_supported_features", test_nvme_ctrlr_set_supported_features) == NULL + || CU_add_test(suite, "test nvme ctrlr function nvme_ctrlr_alloc_cmb", + test_nvme_ctrlr_alloc_cmb) == NULL ) { CU_cleanup_registry(); return CU_get_error(); diff --git a/test/lib/nvme/unit/nvme_impl.h b/test/lib/nvme/unit/nvme_impl.h index d3fb66a495..05c0f973d3 100644 --- a/test/lib/nvme/unit/nvme_impl.h +++ b/test/lib/nvme/unit/nvme_impl.h @@ -103,12 +103,26 @@ int nvme_pcicfg_map_bar(void *pci_handle, int bar, int read_only, void **addr) return 0; } +static inline int +nvme_pcicfg_map_bar_write_combine(void *devhandle, uint32_t bar, void **addr) +{ + *addr = &g_ut_nvme_regs; + return 0; +} + static inline int nvme_pcicfg_unmap_bar(void *devhandle, uint32_t bar, void *addr) { return 0; } +static inline void +nvme_pcicfg_get_bar_addr_len(void *devhandle, uint32_t bar, uint64_t *addr, uint64_t *size) +{ + *addr = 0; + *size = 0; +} + typedef pthread_mutex_t nvme_mutex_t; #define nvme_mutex_init(x) pthread_mutex_init((x), NULL) diff --git a/test/lib/nvme/unit/nvme_qpair_c/nvme_qpair_ut.c b/test/lib/nvme/unit/nvme_qpair_c/nvme_qpair_ut.c index 1b5078fc85..4947ad0683 100644 --- a/test/lib/nvme/unit/nvme_qpair_c/nvme_qpair_ut.c +++ b/test/lib/nvme/unit/nvme_qpair_c/nvme_qpair_ut.c @@ -184,6 +184,13 @@ nvme_request_remove_child(struct nvme_request *parent, TAILQ_REMOVE(&parent->children, child, child_tailq); } +int +nvme_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t length, uint64_t aligned, + uint64_t *offset) +{ + return -1; +} + static void test1(void) {