nvme: move common pcie transport code new source files

There are some common data structures and APIs in pcie transport
which can be used both for pcie and vfio-user transport, so move
the common code into a new header and source file.

No actual logic change just the code movement except remove the
static function declarations.

Change-Id: Ie9021e703a5780fdd6840f0e3cfea76a0017a811
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5923
Community-CI: Broadcom CI
Reviewed-by: sunshihao <sunshihao@huawei.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Changpeng Liu 2021-01-15 19:44:50 +08:00 committed by Jim Harris
parent f4fd07c68c
commit 6b4b2d2913
5 changed files with 832 additions and 726 deletions

View File

@ -37,8 +37,8 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 4
SO_MINOR := 2
C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c nvme_uevent.c nvme_ctrlr_ocssd_cmd.c \
nvme_ns_ocssd_cmd.c nvme_tcp.c nvme_opal.c nvme_io_msg.c nvme_poll_group.c nvme_zns.c
C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvme_pcie_common.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c nvme_uevent.c \
nvme_ctrlr_ocssd_cmd.c nvme_ns_ocssd_cmd.c nvme_tcp.c nvme_opal.c nvme_io_msg.c nvme_poll_group.c nvme_zns.c
C_SRCS-$(CONFIG_RDMA) += nvme_rdma.c
C_SRCS-$(CONFIG_NVME_CUSE) += nvme_cuse.c

View File

@ -41,174 +41,17 @@
#include "spdk/likely.h"
#include "spdk/string.h"
#include "nvme_internal.h"
#include "nvme_pcie_internal.h"
#include "nvme_uevent.h"
/*
* Number of completion queue entries to process before ringing the
* completion queue doorbell.
*/
#define NVME_MIN_COMPLETIONS (1)
#define NVME_MAX_COMPLETIONS (128)
/*
* NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL
* segment.
*/
#define NVME_MAX_SGL_DESCRIPTORS (250)
#define NVME_MAX_PRP_LIST_ENTRIES (503)
struct nvme_pcie_enum_ctx {
struct spdk_nvme_probe_ctx *probe_ctx;
struct spdk_pci_addr pci_addr;
bool has_pci_addr;
};
/* PCIe transport extensions for spdk_nvme_ctrlr */
struct nvme_pcie_ctrlr {
struct spdk_nvme_ctrlr ctrlr;
/** NVMe MMIO register space */
volatile struct spdk_nvme_registers *regs;
/** NVMe MMIO register size */
uint64_t regs_size;
struct {
/* BAR mapping address which contains controller memory buffer */
void *bar_va;
/* BAR physical address which contains controller memory buffer */
uint64_t bar_pa;
/* Controller memory buffer size in Bytes */
uint64_t size;
/* Current offset of controller memory buffer, relative to start of BAR virt addr */
uint64_t current_offset;
void *mem_register_addr;
size_t mem_register_size;
} cmb;
/** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */
uint32_t doorbell_stride_u32;
/* Opaque handle to associated PCI device. */
struct spdk_pci_device *devhandle;
/* Flag to indicate the MMIO register has been remapped */
bool is_remapped;
};
struct nvme_tracker {
TAILQ_ENTRY(nvme_tracker) tq_list;
struct nvme_request *req;
uint16_t cid;
uint16_t rsvd0;
uint32_t rsvd1;
spdk_nvme_cmd_cb cb_fn;
void *cb_arg;
uint64_t prp_sgl_bus_addr;
/* Don't move, metadata SGL is always contiguous with Data Block SGL */
struct spdk_nvme_sgl_descriptor meta_sgl;
union {
uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES];
struct spdk_nvme_sgl_descriptor sgl[NVME_MAX_SGL_DESCRIPTORS];
} u;
};
/*
* struct nvme_tracker must be exactly 4K so that the prp[] array does not cross a page boundary
* and so that there is no padding required to meet alignment requirements.
*/
SPDK_STATIC_ASSERT(sizeof(struct nvme_tracker) == 4096, "nvme_tracker is not 4K");
SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned");
SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, meta_sgl) & 7) == 0, "SGL must be Qword aligned");
struct nvme_pcie_poll_group {
struct spdk_nvme_transport_poll_group group;
};
/* PCIe transport extensions for spdk_nvme_qpair */
struct nvme_pcie_qpair {
/* Submission queue tail doorbell */
volatile uint32_t *sq_tdbl;
/* Completion queue head doorbell */
volatile uint32_t *cq_hdbl;
/* Submission queue */
struct spdk_nvme_cmd *cmd;
/* Completion queue */
struct spdk_nvme_cpl *cpl;
TAILQ_HEAD(, nvme_tracker) free_tr;
TAILQ_HEAD(nvme_outstanding_tr_head, nvme_tracker) outstanding_tr;
/* Array of trackers indexed by command ID. */
struct nvme_tracker *tr;
uint16_t num_entries;
uint8_t retry_count;
uint16_t max_completions_cap;
uint16_t last_sq_tail;
uint16_t sq_tail;
uint16_t cq_head;
uint16_t sq_head;
struct {
uint8_t phase : 1;
uint8_t delay_cmd_submit : 1;
uint8_t has_shadow_doorbell : 1;
} flags;
/*
* Base qpair structure.
* This is located after the hot data in this structure so that the important parts of
* nvme_pcie_qpair are in the same cache line.
*/
struct spdk_nvme_qpair qpair;
struct {
/* Submission queue shadow tail doorbell */
volatile uint32_t *sq_tdbl;
/* Completion queue shadow head doorbell */
volatile uint32_t *cq_hdbl;
/* Submission queue event index */
volatile uint32_t *sq_eventidx;
/* Completion queue event index */
volatile uint32_t *cq_eventidx;
} shadow_doorbell;
/*
* Fields below this point should not be touched on the normal I/O path.
*/
bool sq_in_cmb;
uint64_t cmd_bus_addr;
uint64_t cpl_bus_addr;
struct spdk_nvme_cmd *sq_vaddr;
struct spdk_nvme_cpl *cq_vaddr;
};
static int nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx *probe_ctx,
struct spdk_pci_addr *pci_addr);
static int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair,
const struct spdk_nvme_io_qpair_opts *opts);
static int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair);
__thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL;
@ -256,13 +99,6 @@ nvme_pcie_ctrlr_setup_signal(void)
sigaction(SIGBUS, &sa, NULL);
}
static inline struct nvme_pcie_ctrlr *
nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
{
assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE);
return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr);
}
static int
_nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx)
{
@ -343,13 +179,6 @@ _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx)
return 0;
}
static inline struct nvme_pcie_qpair *
nvme_pcie_qpair(struct spdk_nvme_qpair *qpair)
{
assert(qpair->trtype == SPDK_NVME_TRANSPORT_PCIE);
return SPDK_CONTAINEROF(qpair, struct nvme_pcie_qpair, qpair);
}
static volatile void *
nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset)
{
@ -699,34 +528,6 @@ nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr *pctrlr)
return rc;
}
static int
nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries)
{
struct nvme_pcie_qpair *pqpair;
int rc;
pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
if (pqpair == NULL) {
return -ENOMEM;
}
pqpair->num_entries = num_entries;
pqpair->flags.delay_cmd_submit = 0;
ctrlr->adminq = &pqpair->qpair;
rc = nvme_qpair_init(ctrlr->adminq,
0, /* qpair ID */
ctrlr,
SPDK_NVME_QPRIO_URGENT,
num_entries);
if (rc != 0) {
return rc;
}
return nvme_pcie_qpair_construct(ctrlr->adminq, NULL);
}
/* This function must only be called while holding g_spdk_nvme_driver->lock */
static int
pcie_nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
@ -953,201 +754,6 @@ nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
return 0;
}
static void
nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr)
{
tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp);
tr->cid = cid;
tr->req = NULL;
}
static int
nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
uint32_t i;
/* all head/tail vals are set to 0 */
pqpair->last_sq_tail = pqpair->sq_tail = pqpair->sq_head = pqpair->cq_head = 0;
/*
* First time through the completion queue, HW will set phase
* bit on completions to 1. So set this to 1 here, indicating
* we're looking for a 1 to know which entries have completed.
* we'll toggle the bit each time when the completion queue
* rolls over.
*/
pqpair->flags.phase = 1;
for (i = 0; i < pqpair->num_entries; i++) {
pqpair->cpl[i].status.p = 0;
}
return 0;
}
static void *
nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t size, uint64_t alignment,
uint64_t *phys_addr)
{
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
uintptr_t addr;
if (pctrlr->cmb.mem_register_addr != NULL) {
/* BAR is mapped for data */
return NULL;
}
addr = (uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset;
addr = (addr + (alignment - 1)) & ~(alignment - 1);
/* CMB may only consume part of the BAR, calculate accordingly */
if (addr + size > ((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.size)) {
SPDK_ERRLOG("Tried to allocate past valid CMB range!\n");
return NULL;
}
*phys_addr = pctrlr->cmb.bar_pa + addr - (uintptr_t)pctrlr->cmb.bar_va;
pctrlr->cmb.current_offset = (addr + size) - (uintptr_t)pctrlr->cmb.bar_va;
return (void *)addr;
}
static int
nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair,
const struct spdk_nvme_io_qpair_opts *opts)
{
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr;
uint16_t i;
volatile uint32_t *doorbell_base;
uint16_t num_trackers;
size_t page_align = sysconf(_SC_PAGESIZE);
size_t queue_align, queue_len;
uint32_t flags = SPDK_MALLOC_DMA;
uint64_t sq_paddr = 0;
uint64_t cq_paddr = 0;
if (opts) {
pqpair->sq_vaddr = opts->sq.vaddr;
pqpair->cq_vaddr = opts->cq.vaddr;
sq_paddr = opts->sq.paddr;
cq_paddr = opts->cq.paddr;
}
pqpair->retry_count = ctrlr->opts.transport_retry_count;
/*
* Limit the maximum number of completions to return per call to prevent wraparound,
* and calculate how many trackers can be submitted at once without overflowing the
* completion queue.
*/
pqpair->max_completions_cap = pqpair->num_entries / 4;
pqpair->max_completions_cap = spdk_max(pqpair->max_completions_cap, NVME_MIN_COMPLETIONS);
pqpair->max_completions_cap = spdk_min(pqpair->max_completions_cap, NVME_MAX_COMPLETIONS);
num_trackers = pqpair->num_entries - pqpair->max_completions_cap;
SPDK_INFOLOG(nvme, "max_completions_cap = %" PRIu16 " num_trackers = %" PRIu16 "\n",
pqpair->max_completions_cap, num_trackers);
assert(num_trackers != 0);
pqpair->sq_in_cmb = false;
if (nvme_qpair_is_admin_queue(&pqpair->qpair)) {
flags |= SPDK_MALLOC_SHARE;
}
/* cmd and cpl rings must be aligned on page size boundaries. */
if (ctrlr->opts.use_cmb_sqs) {
pqpair->cmd = nvme_pcie_ctrlr_alloc_cmb(ctrlr, pqpair->num_entries * sizeof(struct spdk_nvme_cmd),
page_align, &pqpair->cmd_bus_addr);
if (pqpair->cmd != NULL) {
pqpair->sq_in_cmb = true;
}
}
if (pqpair->sq_in_cmb == false) {
if (pqpair->sq_vaddr) {
pqpair->cmd = pqpair->sq_vaddr;
} else {
/* To ensure physical address contiguity we make each ring occupy
* a single hugepage only. See MAX_IO_QUEUE_ENTRIES.
*/
queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cmd);
queue_align = spdk_max(spdk_align32pow2(queue_len), page_align);
pqpair->cmd = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags);
if (pqpair->cmd == NULL) {
SPDK_ERRLOG("alloc qpair_cmd failed\n");
return -ENOMEM;
}
}
if (sq_paddr) {
assert(pqpair->sq_vaddr != NULL);
pqpair->cmd_bus_addr = sq_paddr;
} else {
pqpair->cmd_bus_addr = spdk_vtophys(pqpair->cmd, NULL);
if (pqpair->cmd_bus_addr == SPDK_VTOPHYS_ERROR) {
SPDK_ERRLOG("spdk_vtophys(pqpair->cmd) failed\n");
return -EFAULT;
}
}
}
if (pqpair->cq_vaddr) {
pqpair->cpl = pqpair->cq_vaddr;
} else {
queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cpl);
queue_align = spdk_max(spdk_align32pow2(queue_len), page_align);
pqpair->cpl = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags);
if (pqpair->cpl == NULL) {
SPDK_ERRLOG("alloc qpair_cpl failed\n");
return -ENOMEM;
}
}
if (cq_paddr) {
assert(pqpair->cq_vaddr != NULL);
pqpair->cpl_bus_addr = cq_paddr;
} else {
pqpair->cpl_bus_addr = spdk_vtophys(pqpair->cpl, NULL);
if (pqpair->cpl_bus_addr == SPDK_VTOPHYS_ERROR) {
SPDK_ERRLOG("spdk_vtophys(pqpair->cpl) failed\n");
return -EFAULT;
}
}
doorbell_base = &pctrlr->regs->doorbell[0].sq_tdbl;
pqpair->sq_tdbl = doorbell_base + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32;
pqpair->cq_hdbl = doorbell_base + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32;
/*
* Reserve space for all of the trackers in a single allocation.
* struct nvme_tracker must be padded so that its size is already a power of 2.
* This ensures the PRP list embedded in the nvme_tracker object will not span a
* 4KB boundary, while allowing access to trackers in tr[] via normal array indexing.
*/
pqpair->tr = spdk_zmalloc(num_trackers * sizeof(*tr), sizeof(*tr), NULL,
SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
if (pqpair->tr == NULL) {
SPDK_ERRLOG("nvme_tr failed\n");
return -ENOMEM;
}
TAILQ_INIT(&pqpair->free_tr);
TAILQ_INIT(&pqpair->outstanding_tr);
for (i = 0; i < num_trackers; i++) {
tr = &pqpair->tr[i];
nvme_qpair_construct_tracker(tr, i, spdk_vtophys(tr, NULL));
TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list);
}
nvme_pcie_qpair_reset(qpair);
return 0;
}
/* Used when dst points to MMIO (i.e. CMB) in a virtual machine - in these cases we must
* not use wide instructions because QEMU will not emulate such instructions to MMIO space.
* So this function ensures we only copy 8 bytes at a time.
@ -1181,71 +787,6 @@ nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *sr
#endif
}
/**
* Note: the ctrlr_lock must be held when calling this function.
*/
static void
nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair,
struct nvme_request *req, struct spdk_nvme_cpl *cpl)
{
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
struct nvme_request *active_req = req;
struct spdk_nvme_ctrlr_process *active_proc;
/*
* The admin request is from another process. Move to the per
* process list for that process to handle it later.
*/
assert(nvme_qpair_is_admin_queue(qpair));
assert(active_req->pid != getpid());
active_proc = nvme_ctrlr_get_process(ctrlr, active_req->pid);
if (active_proc) {
/* Save the original completion information */
memcpy(&active_req->cpl, cpl, sizeof(*cpl));
STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq);
} else {
SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n",
active_req->pid);
nvme_free_request(active_req);
}
}
/**
* Note: the ctrlr_lock must be held when calling this function.
*/
static void
nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair)
{
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
struct nvme_request *req, *tmp_req;
pid_t pid = getpid();
struct spdk_nvme_ctrlr_process *proc;
/*
* Check whether there is any pending admin request from
* other active processes.
*/
assert(nvme_qpair_is_admin_queue(qpair));
proc = nvme_ctrlr_get_current_process(ctrlr);
if (!proc) {
SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid);
assert(proc);
return;
}
STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
assert(req->pid == pid);
nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &req->cpl);
nvme_free_request(req);
}
}
static inline int
nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
{
@ -1538,180 +1079,6 @@ nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
nvme_pcie_qpair_abort_trackers(qpair, dnr);
}
static int
nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn,
void *cb_arg)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que);
struct nvme_request *req;
struct spdk_nvme_cmd *cmd;
req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
if (req == NULL) {
return -ENOMEM;
}
cmd = &req->cmd;
cmd->opc = SPDK_NVME_OPC_CREATE_IO_CQ;
cmd->cdw10_bits.create_io_q.qid = io_que->id;
cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1;
cmd->cdw11_bits.create_io_cq.pc = 1;
cmd->dptr.prp.prp1 = pqpair->cpl_bus_addr;
return nvme_ctrlr_submit_admin_request(ctrlr, req);
}
static int
nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que);
struct nvme_request *req;
struct spdk_nvme_cmd *cmd;
req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
if (req == NULL) {
return -ENOMEM;
}
cmd = &req->cmd;
cmd->opc = SPDK_NVME_OPC_CREATE_IO_SQ;
cmd->cdw10_bits.create_io_q.qid = io_que->id;
cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1;
cmd->cdw11_bits.create_io_sq.pc = 1;
cmd->cdw11_bits.create_io_sq.qprio = io_que->qprio;
cmd->cdw11_bits.create_io_sq.cqid = io_que->id;
cmd->dptr.prp.prp1 = pqpair->cmd_bus_addr;
return nvme_ctrlr_submit_admin_request(ctrlr, req);
}
static int
nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
spdk_nvme_cmd_cb cb_fn, void *cb_arg)
{
struct nvme_request *req;
struct spdk_nvme_cmd *cmd;
req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
if (req == NULL) {
return -ENOMEM;
}
cmd = &req->cmd;
cmd->opc = SPDK_NVME_OPC_DELETE_IO_CQ;
cmd->cdw10_bits.delete_io_q.qid = qpair->id;
return nvme_ctrlr_submit_admin_request(ctrlr, req);
}
static int
nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
spdk_nvme_cmd_cb cb_fn, void *cb_arg)
{
struct nvme_request *req;
struct spdk_nvme_cmd *cmd;
req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
if (req == NULL) {
return -ENOMEM;
}
cmd = &req->cmd;
cmd->opc = SPDK_NVME_OPC_DELETE_IO_SQ;
cmd->cdw10_bits.delete_io_q.qid = qpair->id;
return nvme_ctrlr_submit_admin_request(ctrlr, req);
}
static int
_nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
uint16_t qid)
{
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_completion_poll_status *status;
int rc;
status = calloc(1, sizeof(*status));
if (!status) {
SPDK_ERRLOG("Failed to allocate status tracker\n");
return -ENOMEM;
}
rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
SPDK_ERRLOG("nvme_create_io_cq failed!\n");
if (!status->timed_out) {
free(status);
}
return -1;
}
memset(status, 0, sizeof(*status));
rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
SPDK_ERRLOG("nvme_create_io_sq failed!\n");
if (status->timed_out) {
/* Request is still queued, the memory will be freed in a completion callback.
allocate a new request */
status = calloc(1, sizeof(*status));
if (!status) {
SPDK_ERRLOG("Failed to allocate status tracker\n");
return -ENOMEM;
}
}
memset(status, 0, sizeof(*status));
/* Attempt to delete the completion queue */
rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
/* The originall or newly allocated status structure can be freed since
* the corresponding request has been completed of failed to submit */
free(status);
return -1;
}
nvme_wait_for_completion(ctrlr->adminq, status);
if (!status->timed_out) {
/* status can be freed regardless of nvme_wait_for_completion return value */
free(status);
}
return -1;
}
if (ctrlr->shadow_doorbell) {
pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) *
pctrlr->doorbell_stride_u32;
pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) *
pctrlr->doorbell_stride_u32;
pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) *
pctrlr->doorbell_stride_u32;
pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) *
pctrlr->doorbell_stride_u32;
pqpair->flags.has_shadow_doorbell = 1;
} else {
pqpair->flags.has_shadow_doorbell = 0;
}
nvme_pcie_qpair_reset(qpair);
free(status);
return 0;
}
static struct spdk_nvme_qpair *
nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
const struct spdk_nvme_io_qpair_opts *opts)
@ -1749,21 +1116,6 @@ nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
return qpair;
}
static int
nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
if (nvme_qpair_is_admin_queue(qpair)) {
return 0;
} else {
return _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id);
}
}
static void
nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
}
static int32_t nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair,
uint32_t max_completions);
@ -2482,81 +1834,6 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
return num_completions;
}
static struct spdk_nvme_transport_poll_group *
nvme_pcie_poll_group_create(void)
{
struct nvme_pcie_poll_group *group = calloc(1, sizeof(*group));
if (group == NULL) {
SPDK_ERRLOG("Unable to allocate poll group.\n");
return NULL;
}
return &group->group;
}
static int
nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair)
{
return 0;
}
static int
nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)
{
return 0;
}
static int
nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup,
struct spdk_nvme_qpair *qpair)
{
return 0;
}
static int
nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
struct spdk_nvme_qpair *qpair)
{
return 0;
}
static int64_t
nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup,
uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb)
{
struct spdk_nvme_qpair *qpair, *tmp_qpair;
int32_t local_completions = 0;
int64_t total_completions = 0;
STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) {
disconnected_qpair_cb(qpair, tgroup->group->ctx);
}
STAILQ_FOREACH_SAFE(qpair, &tgroup->connected_qpairs, poll_group_stailq, tmp_qpair) {
local_completions = spdk_nvme_qpair_process_completions(qpair, completions_per_qpair);
if (local_completions < 0) {
disconnected_qpair_cb(qpair, tgroup->group->ctx);
local_completions = 0;
}
total_completions += local_completions;
}
return total_completions;
}
static int
nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup)
{
if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) {
return -EBUSY;
}
free(tgroup);
return 0;
}
static struct spdk_pci_id nvme_pci_driver_id[] = {
{
.class_id = SPDK_PCI_CLASS_NVME,

593
lib/nvme/nvme_pcie_common.c Normal file
View File

@ -0,0 +1,593 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* NVMe over PCIe common library
*/
#include "spdk/stdinc.h"
#include "spdk/likely.h"
#include "spdk/string.h"
#include "nvme_internal.h"
#include "nvme_pcie_internal.h"
int
nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
uint32_t i;
/* all head/tail vals are set to 0 */
pqpair->last_sq_tail = pqpair->sq_tail = pqpair->sq_head = pqpair->cq_head = 0;
/*
* First time through the completion queue, HW will set phase
* bit on completions to 1. So set this to 1 here, indicating
* we're looking for a 1 to know which entries have completed.
* we'll toggle the bit each time when the completion queue
* rolls over.
*/
pqpair->flags.phase = 1;
for (i = 0; i < pqpair->num_entries; i++) {
pqpair->cpl[i].status.p = 0;
}
return 0;
}
static void
nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr)
{
tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp);
tr->cid = cid;
tr->req = NULL;
}
static void *
nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t size, uint64_t alignment,
uint64_t *phys_addr)
{
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
uintptr_t addr;
if (pctrlr->cmb.mem_register_addr != NULL) {
/* BAR is mapped for data */
return NULL;
}
addr = (uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.current_offset;
addr = (addr + (alignment - 1)) & ~(alignment - 1);
/* CMB may only consume part of the BAR, calculate accordingly */
if (addr + size > ((uintptr_t)pctrlr->cmb.bar_va + pctrlr->cmb.size)) {
SPDK_ERRLOG("Tried to allocate past valid CMB range!\n");
return NULL;
}
*phys_addr = pctrlr->cmb.bar_pa + addr - (uintptr_t)pctrlr->cmb.bar_va;
pctrlr->cmb.current_offset = (addr + size) - (uintptr_t)pctrlr->cmb.bar_va;
return (void *)addr;
}
int
nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair,
const struct spdk_nvme_io_qpair_opts *opts)
{
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_tracker *tr;
uint16_t i;
volatile uint32_t *doorbell_base;
uint16_t num_trackers;
size_t page_align = sysconf(_SC_PAGESIZE);
size_t queue_align, queue_len;
uint32_t flags = SPDK_MALLOC_DMA;
uint64_t sq_paddr = 0;
uint64_t cq_paddr = 0;
if (opts) {
pqpair->sq_vaddr = opts->sq.vaddr;
pqpair->cq_vaddr = opts->cq.vaddr;
sq_paddr = opts->sq.paddr;
cq_paddr = opts->cq.paddr;
}
pqpair->retry_count = ctrlr->opts.transport_retry_count;
/*
* Limit the maximum number of completions to return per call to prevent wraparound,
* and calculate how many trackers can be submitted at once without overflowing the
* completion queue.
*/
pqpair->max_completions_cap = pqpair->num_entries / 4;
pqpair->max_completions_cap = spdk_max(pqpair->max_completions_cap, NVME_MIN_COMPLETIONS);
pqpair->max_completions_cap = spdk_min(pqpair->max_completions_cap, NVME_MAX_COMPLETIONS);
num_trackers = pqpair->num_entries - pqpair->max_completions_cap;
SPDK_INFOLOG(nvme, "max_completions_cap = %" PRIu16 " num_trackers = %" PRIu16 "\n",
pqpair->max_completions_cap, num_trackers);
assert(num_trackers != 0);
pqpair->sq_in_cmb = false;
if (nvme_qpair_is_admin_queue(&pqpair->qpair)) {
flags |= SPDK_MALLOC_SHARE;
}
/* cmd and cpl rings must be aligned on page size boundaries. */
if (ctrlr->opts.use_cmb_sqs) {
pqpair->cmd = nvme_pcie_ctrlr_alloc_cmb(ctrlr, pqpair->num_entries * sizeof(struct spdk_nvme_cmd),
page_align, &pqpair->cmd_bus_addr);
if (pqpair->cmd != NULL) {
pqpair->sq_in_cmb = true;
}
}
if (pqpair->sq_in_cmb == false) {
if (pqpair->sq_vaddr) {
pqpair->cmd = pqpair->sq_vaddr;
} else {
/* To ensure physical address contiguity we make each ring occupy
* a single hugepage only. See MAX_IO_QUEUE_ENTRIES.
*/
queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cmd);
queue_align = spdk_max(spdk_align32pow2(queue_len), page_align);
pqpair->cmd = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags);
if (pqpair->cmd == NULL) {
SPDK_ERRLOG("alloc qpair_cmd failed\n");
return -ENOMEM;
}
}
if (sq_paddr) {
assert(pqpair->sq_vaddr != NULL);
pqpair->cmd_bus_addr = sq_paddr;
} else {
pqpair->cmd_bus_addr = spdk_vtophys(pqpair->cmd, NULL);
if (pqpair->cmd_bus_addr == SPDK_VTOPHYS_ERROR) {
SPDK_ERRLOG("spdk_vtophys(pqpair->cmd) failed\n");
return -EFAULT;
}
}
}
if (pqpair->cq_vaddr) {
pqpair->cpl = pqpair->cq_vaddr;
} else {
queue_len = pqpair->num_entries * sizeof(struct spdk_nvme_cpl);
queue_align = spdk_max(spdk_align32pow2(queue_len), page_align);
pqpair->cpl = spdk_zmalloc(queue_len, queue_align, NULL, SPDK_ENV_SOCKET_ID_ANY, flags);
if (pqpair->cpl == NULL) {
SPDK_ERRLOG("alloc qpair_cpl failed\n");
return -ENOMEM;
}
}
if (cq_paddr) {
assert(pqpair->cq_vaddr != NULL);
pqpair->cpl_bus_addr = cq_paddr;
} else {
pqpair->cpl_bus_addr = spdk_vtophys(pqpair->cpl, NULL);
if (pqpair->cpl_bus_addr == SPDK_VTOPHYS_ERROR) {
SPDK_ERRLOG("spdk_vtophys(pqpair->cpl) failed\n");
return -EFAULT;
}
}
doorbell_base = &pctrlr->regs->doorbell[0].sq_tdbl;
pqpair->sq_tdbl = doorbell_base + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32;
pqpair->cq_hdbl = doorbell_base + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32;
/*
* Reserve space for all of the trackers in a single allocation.
* struct nvme_tracker must be padded so that its size is already a power of 2.
* This ensures the PRP list embedded in the nvme_tracker object will not span a
* 4KB boundary, while allowing access to trackers in tr[] via normal array indexing.
*/
pqpair->tr = spdk_zmalloc(num_trackers * sizeof(*tr), sizeof(*tr), NULL,
SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
if (pqpair->tr == NULL) {
SPDK_ERRLOG("nvme_tr failed\n");
return -ENOMEM;
}
TAILQ_INIT(&pqpair->free_tr);
TAILQ_INIT(&pqpair->outstanding_tr);
for (i = 0; i < num_trackers; i++) {
tr = &pqpair->tr[i];
nvme_qpair_construct_tracker(tr, i, spdk_vtophys(tr, NULL));
TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list);
}
nvme_pcie_qpair_reset(qpair);
return 0;
}
int
nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries)
{
struct nvme_pcie_qpair *pqpair;
int rc;
pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
if (pqpair == NULL) {
return -ENOMEM;
}
pqpair->num_entries = num_entries;
pqpair->flags.delay_cmd_submit = 0;
ctrlr->adminq = &pqpair->qpair;
rc = nvme_qpair_init(ctrlr->adminq,
0, /* qpair ID */
ctrlr,
SPDK_NVME_QPRIO_URGENT,
num_entries);
if (rc != 0) {
return rc;
}
return nvme_pcie_qpair_construct(ctrlr->adminq, NULL);
}
/**
* Note: the ctrlr_lock must be held when calling this function.
*/
void
nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair,
struct nvme_request *req, struct spdk_nvme_cpl *cpl)
{
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
struct nvme_request *active_req = req;
struct spdk_nvme_ctrlr_process *active_proc;
/*
* The admin request is from another process. Move to the per
* process list for that process to handle it later.
*/
assert(nvme_qpair_is_admin_queue(qpair));
assert(active_req->pid != getpid());
active_proc = nvme_ctrlr_get_process(ctrlr, active_req->pid);
if (active_proc) {
/* Save the original completion information */
memcpy(&active_req->cpl, cpl, sizeof(*cpl));
STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq);
} else {
SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n",
active_req->pid);
nvme_free_request(active_req);
}
}
/**
* Note: the ctrlr_lock must be held when calling this function.
*/
void
nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair)
{
struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
struct nvme_request *req, *tmp_req;
pid_t pid = getpid();
struct spdk_nvme_ctrlr_process *proc;
/*
* Check whether there is any pending admin request from
* other active processes.
*/
assert(nvme_qpair_is_admin_queue(qpair));
proc = nvme_ctrlr_get_current_process(ctrlr);
if (!proc) {
SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid);
assert(proc);
return;
}
STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
assert(req->pid == pid);
nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &req->cpl);
nvme_free_request(req);
}
}
int
nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn,
void *cb_arg)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que);
struct nvme_request *req;
struct spdk_nvme_cmd *cmd;
req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
if (req == NULL) {
return -ENOMEM;
}
cmd = &req->cmd;
cmd->opc = SPDK_NVME_OPC_CREATE_IO_CQ;
cmd->cdw10_bits.create_io_q.qid = io_que->id;
cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1;
cmd->cdw11_bits.create_io_cq.pc = 1;
cmd->dptr.prp.prp1 = pqpair->cpl_bus_addr;
return nvme_ctrlr_submit_admin_request(ctrlr, req);
}
int
nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
{
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que);
struct nvme_request *req;
struct spdk_nvme_cmd *cmd;
req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
if (req == NULL) {
return -ENOMEM;
}
cmd = &req->cmd;
cmd->opc = SPDK_NVME_OPC_CREATE_IO_SQ;
cmd->cdw10_bits.create_io_q.qid = io_que->id;
cmd->cdw10_bits.create_io_q.qsize = pqpair->num_entries - 1;
cmd->cdw11_bits.create_io_sq.pc = 1;
cmd->cdw11_bits.create_io_sq.qprio = io_que->qprio;
cmd->cdw11_bits.create_io_sq.cqid = io_que->id;
cmd->dptr.prp.prp1 = pqpair->cmd_bus_addr;
return nvme_ctrlr_submit_admin_request(ctrlr, req);
}
int
nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
spdk_nvme_cmd_cb cb_fn, void *cb_arg)
{
struct nvme_request *req;
struct spdk_nvme_cmd *cmd;
req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
if (req == NULL) {
return -ENOMEM;
}
cmd = &req->cmd;
cmd->opc = SPDK_NVME_OPC_DELETE_IO_CQ;
cmd->cdw10_bits.delete_io_q.qid = qpair->id;
return nvme_ctrlr_submit_admin_request(ctrlr, req);
}
int
nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
spdk_nvme_cmd_cb cb_fn, void *cb_arg)
{
struct nvme_request *req;
struct spdk_nvme_cmd *cmd;
req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg);
if (req == NULL) {
return -ENOMEM;
}
cmd = &req->cmd;
cmd->opc = SPDK_NVME_OPC_DELETE_IO_SQ;
cmd->cdw10_bits.delete_io_q.qid = qpair->id;
return nvme_ctrlr_submit_admin_request(ctrlr, req);
}
static int
_nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
uint16_t qid)
{
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_completion_poll_status *status;
int rc;
status = calloc(1, sizeof(*status));
if (!status) {
SPDK_ERRLOG("Failed to allocate status tracker\n");
return -ENOMEM;
}
rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
SPDK_ERRLOG("nvme_create_io_cq failed!\n");
if (!status->timed_out) {
free(status);
}
return -1;
}
memset(status, 0, sizeof(*status));
rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
free(status);
return rc;
}
if (nvme_wait_for_completion(ctrlr->adminq, status)) {
SPDK_ERRLOG("nvme_create_io_sq failed!\n");
if (status->timed_out) {
/* Request is still queued, the memory will be freed in a completion callback.
allocate a new request */
status = calloc(1, sizeof(*status));
if (!status) {
SPDK_ERRLOG("Failed to allocate status tracker\n");
return -ENOMEM;
}
}
memset(status, 0, sizeof(*status));
/* Attempt to delete the completion queue */
rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status);
if (rc != 0) {
/* The originall or newly allocated status structure can be freed since
* the corresponding request has been completed of failed to submit */
free(status);
return -1;
}
nvme_wait_for_completion(ctrlr->adminq, status);
if (!status->timed_out) {
/* status can be freed regardless of nvme_wait_for_completion return value */
free(status);
}
return -1;
}
if (ctrlr->shadow_doorbell) {
pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) *
pctrlr->doorbell_stride_u32;
pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) *
pctrlr->doorbell_stride_u32;
pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) *
pctrlr->doorbell_stride_u32;
pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) *
pctrlr->doorbell_stride_u32;
pqpair->flags.has_shadow_doorbell = 1;
} else {
pqpair->flags.has_shadow_doorbell = 0;
}
nvme_pcie_qpair_reset(qpair);
free(status);
return 0;
}
int
nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
if (nvme_qpair_is_admin_queue(qpair)) {
return 0;
} else {
return _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id);
}
}
void
nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
}
struct spdk_nvme_transport_poll_group *
nvme_pcie_poll_group_create(void)
{
struct nvme_pcie_poll_group *group = calloc(1, sizeof(*group));
if (group == NULL) {
SPDK_ERRLOG("Unable to allocate poll group.\n");
return NULL;
}
return &group->group;
}
int
nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair)
{
return 0;
}
int
nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)
{
return 0;
}
int
nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup,
struct spdk_nvme_qpair *qpair)
{
return 0;
}
int
nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
struct spdk_nvme_qpair *qpair)
{
return 0;
}
int64_t
nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup,
uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb)
{
struct spdk_nvme_qpair *qpair, *tmp_qpair;
int32_t local_completions = 0;
int64_t total_completions = 0;
STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) {
disconnected_qpair_cb(qpair, tgroup->group->ctx);
}
STAILQ_FOREACH_SAFE(qpair, &tgroup->connected_qpairs, poll_group_stailq, tmp_qpair) {
local_completions = spdk_nvme_qpair_process_completions(qpair, completions_per_qpair);
if (local_completions < 0) {
disconnected_qpair_cb(qpair, tgroup->group->ctx);
local_completions = 0;
}
total_completions += local_completions;
}
return total_completions;
}
int
nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup)
{
if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) {
return -EBUSY;
}
free(tgroup);
return 0;
}

View File

@ -0,0 +1,235 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __NVME_PCIE_INTERNAL_H__
#define __NVME_PCIE_INTERNAL_H__
/*
* Number of completion queue entries to process before ringing the
* completion queue doorbell.
*/
#define NVME_MIN_COMPLETIONS (1)
#define NVME_MAX_COMPLETIONS (128)
/*
* NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL
* segment.
*/
#define NVME_MAX_SGL_DESCRIPTORS (250)
#define NVME_MAX_PRP_LIST_ENTRIES (503)
/* PCIe transport extensions for spdk_nvme_ctrlr */
struct nvme_pcie_ctrlr {
struct spdk_nvme_ctrlr ctrlr;
/** NVMe MMIO register space */
volatile struct spdk_nvme_registers *regs;
/** NVMe MMIO register size */
uint64_t regs_size;
struct {
/* BAR mapping address which contains controller memory buffer */
void *bar_va;
/* BAR physical address which contains controller memory buffer */
uint64_t bar_pa;
/* Controller memory buffer size in Bytes */
uint64_t size;
/* Current offset of controller memory buffer, relative to start of BAR virt addr */
uint64_t current_offset;
void *mem_register_addr;
size_t mem_register_size;
} cmb;
/** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */
uint32_t doorbell_stride_u32;
/* Opaque handle to associated PCI device. */
struct spdk_pci_device *devhandle;
/* Flag to indicate the MMIO register has been remapped */
bool is_remapped;
};
struct nvme_tracker {
TAILQ_ENTRY(nvme_tracker) tq_list;
struct nvme_request *req;
uint16_t cid;
uint16_t rsvd0;
uint32_t rsvd1;
spdk_nvme_cmd_cb cb_fn;
void *cb_arg;
uint64_t prp_sgl_bus_addr;
/* Don't move, metadata SGL is always contiguous with Data Block SGL */
struct spdk_nvme_sgl_descriptor meta_sgl;
union {
uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES];
struct spdk_nvme_sgl_descriptor sgl[NVME_MAX_SGL_DESCRIPTORS];
} u;
};
/*
* struct nvme_tracker must be exactly 4K so that the prp[] array does not cross a page boundary
* and so that there is no padding required to meet alignment requirements.
*/
SPDK_STATIC_ASSERT(sizeof(struct nvme_tracker) == 4096, "nvme_tracker is not 4K");
SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned");
SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, meta_sgl) & 7) == 0, "SGL must be Qword aligned");
struct nvme_pcie_poll_group {
struct spdk_nvme_transport_poll_group group;
};
/* PCIe transport extensions for spdk_nvme_qpair */
struct nvme_pcie_qpair {
/* Submission queue tail doorbell */
volatile uint32_t *sq_tdbl;
/* Completion queue head doorbell */
volatile uint32_t *cq_hdbl;
/* Submission queue */
struct spdk_nvme_cmd *cmd;
/* Completion queue */
struct spdk_nvme_cpl *cpl;
TAILQ_HEAD(, nvme_tracker) free_tr;
TAILQ_HEAD(nvme_outstanding_tr_head, nvme_tracker) outstanding_tr;
/* Array of trackers indexed by command ID. */
struct nvme_tracker *tr;
uint16_t num_entries;
uint8_t retry_count;
uint16_t max_completions_cap;
uint16_t last_sq_tail;
uint16_t sq_tail;
uint16_t cq_head;
uint16_t sq_head;
struct {
uint8_t phase : 1;
uint8_t delay_cmd_submit : 1;
uint8_t has_shadow_doorbell : 1;
} flags;
/*
* Base qpair structure.
* This is located after the hot data in this structure so that the important parts of
* nvme_pcie_qpair are in the same cache line.
*/
struct spdk_nvme_qpair qpair;
struct {
/* Submission queue shadow tail doorbell */
volatile uint32_t *sq_tdbl;
/* Completion queue shadow head doorbell */
volatile uint32_t *cq_hdbl;
/* Submission queue event index */
volatile uint32_t *sq_eventidx;
/* Completion queue event index */
volatile uint32_t *cq_eventidx;
} shadow_doorbell;
/*
* Fields below this point should not be touched on the normal I/O path.
*/
bool sq_in_cmb;
uint64_t cmd_bus_addr;
uint64_t cpl_bus_addr;
struct spdk_nvme_cmd *sq_vaddr;
struct spdk_nvme_cpl *cq_vaddr;
};
static inline struct nvme_pcie_qpair *
nvme_pcie_qpair(struct spdk_nvme_qpair *qpair)
{
return SPDK_CONTAINEROF(qpair, struct nvme_pcie_qpair, qpair);
}
static inline struct nvme_pcie_ctrlr *
nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
{
return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr);
}
int nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair);
int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair,
const struct spdk_nvme_io_qpair_opts *opts);
int nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries);
void nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair,
struct nvme_request *req, struct spdk_nvme_cpl *cpl);
void nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair);
int nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn,
void *cb_arg);
int nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
int nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
spdk_nvme_cmd_cb cb_fn, void *cb_arg);
int nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
spdk_nvme_cmd_cb cb_fn, void *cb_arg);
int nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
void nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
struct spdk_nvme_transport_poll_group *nvme_pcie_poll_group_create(void);
int nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair);
int nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair);
int nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup,
struct spdk_nvme_qpair *qpair);
int nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
struct spdk_nvme_qpair *qpair);
int64_t nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup,
uint32_t completions_per_qpair,
spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb);
int nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup);
#endif

View File

@ -38,6 +38,7 @@
#define UNIT_TEST_NO_VTOPHYS
#include "nvme/nvme_pcie.c"
#include "nvme/nvme_pcie_common.c"
#include "common/lib/nvme/common_stubs.h"
pid_t g_spdk_nvme_pid;