3cb9bc2593
This new API signals that the ctrlr will soon be reset. This allows the transport to skip unnecessary steps in following calls to the driver prior to the reset - for example, skipping PCIe DELETE_SQ/CQ commands when freeing an IO qpair. Note that if we are deleting a qpair after prepare_for_reset was called, and the qpair is still waiting for a CREATE_IO_CQ or CREATE_IO_SQ, we cannot poll for those commands to complete, but we also cannot free the qpair immediately. So set a flag for this case to defer the destruction until the outstanding CREATE_IO_CQ or CREATE_IO_SQ callback is invoked (typically as an aborted command when the reset happens). Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I34c6276ae71e7d61ad4a3720f1a985b1ee96bd8b Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9249 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
374 lines
12 KiB
C
374 lines
12 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright (c) Intel Corporation. All rights reserved.
|
|
* Copyright (c) 2021 Mellanox Technologies LTD. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef __NVME_PCIE_INTERNAL_H__
|
|
#define __NVME_PCIE_INTERNAL_H__
|
|
|
|
/*
|
|
* Number of completion queue entries to process before ringing the
|
|
* completion queue doorbell.
|
|
*/
|
|
#define NVME_MIN_COMPLETIONS (1)
|
|
#define NVME_MAX_COMPLETIONS (128)
|
|
|
|
/*
|
|
* NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL
|
|
* segment.
|
|
*/
|
|
#define NVME_MAX_SGL_DESCRIPTORS (250)
|
|
|
|
#define NVME_MAX_PRP_LIST_ENTRIES (503)
|
|
|
|
/* PCIe transport extensions for spdk_nvme_ctrlr */
|
|
struct nvme_pcie_ctrlr {
|
|
struct spdk_nvme_ctrlr ctrlr;
|
|
|
|
/** NVMe MMIO register space */
|
|
volatile struct spdk_nvme_registers *regs;
|
|
|
|
/** NVMe MMIO register size */
|
|
uint64_t regs_size;
|
|
|
|
struct {
|
|
/* BAR mapping address which contains controller memory buffer */
|
|
void *bar_va;
|
|
|
|
/* BAR physical address which contains controller memory buffer */
|
|
uint64_t bar_pa;
|
|
|
|
/* Controller memory buffer size in Bytes */
|
|
uint64_t size;
|
|
|
|
/* Current offset of controller memory buffer, relative to start of BAR virt addr */
|
|
uint64_t current_offset;
|
|
|
|
void *mem_register_addr;
|
|
size_t mem_register_size;
|
|
} cmb;
|
|
|
|
struct {
|
|
/* BAR mapping address which contains persistent memory region */
|
|
void *bar_va;
|
|
|
|
/* BAR physical address which contains persistent memory region */
|
|
uint64_t bar_pa;
|
|
|
|
/* Persistent memory region size in Bytes */
|
|
uint64_t size;
|
|
|
|
void *mem_register_addr;
|
|
size_t mem_register_size;
|
|
} pmr;
|
|
|
|
/** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */
|
|
uint32_t doorbell_stride_u32;
|
|
|
|
/* Opaque handle to associated PCI device. */
|
|
struct spdk_pci_device *devhandle;
|
|
|
|
/* Flag to indicate the MMIO register has been remapped */
|
|
bool is_remapped;
|
|
|
|
volatile uint32_t *doorbell_base;
|
|
};
|
|
|
|
extern __thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr;
|
|
|
|
struct nvme_tracker {
|
|
TAILQ_ENTRY(nvme_tracker) tq_list;
|
|
|
|
struct nvme_request *req;
|
|
uint16_t cid;
|
|
|
|
uint16_t bad_vtophys : 1;
|
|
uint16_t rsvd0 : 15;
|
|
uint32_t rsvd1;
|
|
|
|
spdk_nvme_cmd_cb cb_fn;
|
|
void *cb_arg;
|
|
|
|
uint64_t prp_sgl_bus_addr;
|
|
|
|
/* Don't move, metadata SGL is always contiguous with Data Block SGL */
|
|
struct spdk_nvme_sgl_descriptor meta_sgl;
|
|
union {
|
|
uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES];
|
|
struct spdk_nvme_sgl_descriptor sgl[NVME_MAX_SGL_DESCRIPTORS];
|
|
} u;
|
|
};
|
|
/*
|
|
* struct nvme_tracker must be exactly 4K so that the prp[] array does not cross a page boundary
|
|
* and so that there is no padding required to meet alignment requirements.
|
|
*/
|
|
SPDK_STATIC_ASSERT(sizeof(struct nvme_tracker) == 4096, "nvme_tracker is not 4K");
|
|
SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned");
|
|
SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, meta_sgl) & 7) == 0, "SGL must be Qword aligned");
|
|
|
|
struct nvme_pcie_poll_group {
|
|
struct spdk_nvme_transport_poll_group group;
|
|
struct spdk_nvme_pcie_stat stats;
|
|
};
|
|
|
|
enum nvme_pcie_qpair_state {
|
|
NVME_PCIE_QPAIR_WAIT_FOR_CQ = 1,
|
|
NVME_PCIE_QPAIR_WAIT_FOR_SQ,
|
|
NVME_PCIE_QPAIR_READY,
|
|
NVME_PCIE_QPAIR_FAILED,
|
|
};
|
|
|
|
/* PCIe transport extensions for spdk_nvme_qpair */
|
|
struct nvme_pcie_qpair {
|
|
/* Submission queue tail doorbell */
|
|
volatile uint32_t *sq_tdbl;
|
|
|
|
/* Completion queue head doorbell */
|
|
volatile uint32_t *cq_hdbl;
|
|
|
|
/* Submission queue */
|
|
struct spdk_nvme_cmd *cmd;
|
|
|
|
/* Completion queue */
|
|
struct spdk_nvme_cpl *cpl;
|
|
|
|
TAILQ_HEAD(, nvme_tracker) free_tr;
|
|
TAILQ_HEAD(nvme_outstanding_tr_head, nvme_tracker) outstanding_tr;
|
|
|
|
/* Array of trackers indexed by command ID. */
|
|
struct nvme_tracker *tr;
|
|
|
|
struct spdk_nvme_pcie_stat *stat;
|
|
|
|
uint16_t num_entries;
|
|
|
|
uint8_t pcie_state;
|
|
|
|
uint8_t retry_count;
|
|
|
|
uint16_t max_completions_cap;
|
|
|
|
uint16_t last_sq_tail;
|
|
uint16_t sq_tail;
|
|
uint16_t cq_head;
|
|
uint16_t sq_head;
|
|
|
|
struct {
|
|
uint8_t phase : 1;
|
|
uint8_t delay_cmd_submit : 1;
|
|
uint8_t has_shadow_doorbell : 1;
|
|
uint8_t has_pending_vtophys_failures : 1;
|
|
uint8_t defer_destruction : 1;
|
|
} flags;
|
|
|
|
/*
|
|
* Base qpair structure.
|
|
* This is located after the hot data in this structure so that the important parts of
|
|
* nvme_pcie_qpair are in the same cache line.
|
|
*/
|
|
struct spdk_nvme_qpair qpair;
|
|
|
|
struct {
|
|
/* Submission queue shadow tail doorbell */
|
|
volatile uint32_t *sq_tdbl;
|
|
|
|
/* Completion queue shadow head doorbell */
|
|
volatile uint32_t *cq_hdbl;
|
|
|
|
/* Submission queue event index */
|
|
volatile uint32_t *sq_eventidx;
|
|
|
|
/* Completion queue event index */
|
|
volatile uint32_t *cq_eventidx;
|
|
} shadow_doorbell;
|
|
|
|
/*
|
|
* Fields below this point should not be touched on the normal I/O path.
|
|
*/
|
|
|
|
bool sq_in_cmb;
|
|
bool shared_stats;
|
|
|
|
uint64_t cmd_bus_addr;
|
|
uint64_t cpl_bus_addr;
|
|
|
|
struct spdk_nvme_cmd *sq_vaddr;
|
|
struct spdk_nvme_cpl *cq_vaddr;
|
|
};
|
|
|
|
static inline struct nvme_pcie_qpair *
|
|
nvme_pcie_qpair(struct spdk_nvme_qpair *qpair)
|
|
{
|
|
return SPDK_CONTAINEROF(qpair, struct nvme_pcie_qpair, qpair);
|
|
}
|
|
|
|
static inline struct nvme_pcie_ctrlr *
|
|
nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
|
|
{
|
|
return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr);
|
|
}
|
|
|
|
static inline int
|
|
nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
|
|
{
|
|
return (uint16_t)(new_idx - event_idx) <= (uint16_t)(new_idx - old);
|
|
}
|
|
|
|
static inline bool
|
|
nvme_pcie_qpair_update_mmio_required(struct spdk_nvme_qpair *qpair, uint16_t value,
|
|
volatile uint32_t *shadow_db,
|
|
volatile uint32_t *eventidx)
|
|
{
|
|
uint16_t old;
|
|
|
|
if (!shadow_db) {
|
|
return true;
|
|
}
|
|
|
|
spdk_wmb();
|
|
|
|
old = *shadow_db;
|
|
*shadow_db = value;
|
|
|
|
/*
|
|
* Ensure that the doorbell is updated before reading the EventIdx from
|
|
* memory
|
|
*/
|
|
spdk_mb();
|
|
|
|
if (!nvme_pcie_qpair_need_event(*eventidx, value, old)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline void
|
|
nvme_pcie_qpair_ring_sq_doorbell(struct spdk_nvme_qpair *qpair)
|
|
{
|
|
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
|
|
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
|
|
bool need_mmio = true;
|
|
|
|
if (qpair->first_fused_submitted) {
|
|
/* This is first cmd of two fused commands - don't ring doorbell */
|
|
qpair->first_fused_submitted = 0;
|
|
return;
|
|
}
|
|
|
|
if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) {
|
|
need_mmio = nvme_pcie_qpair_update_mmio_required(qpair,
|
|
pqpair->sq_tail,
|
|
pqpair->shadow_doorbell.sq_tdbl,
|
|
pqpair->shadow_doorbell.sq_eventidx);
|
|
}
|
|
|
|
if (spdk_likely(need_mmio)) {
|
|
spdk_wmb();
|
|
pqpair->stat->sq_doobell_updates++;
|
|
g_thread_mmio_ctrlr = pctrlr;
|
|
spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail);
|
|
g_thread_mmio_ctrlr = NULL;
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
nvme_pcie_qpair_ring_cq_doorbell(struct spdk_nvme_qpair *qpair)
|
|
{
|
|
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
|
|
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
|
|
bool need_mmio = true;
|
|
|
|
if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) {
|
|
need_mmio = nvme_pcie_qpair_update_mmio_required(qpair,
|
|
pqpair->cq_head,
|
|
pqpair->shadow_doorbell.cq_hdbl,
|
|
pqpair->shadow_doorbell.cq_eventidx);
|
|
}
|
|
|
|
if (spdk_likely(need_mmio)) {
|
|
pqpair->stat->cq_doorbell_updates++;
|
|
g_thread_mmio_ctrlr = pctrlr;
|
|
spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head);
|
|
g_thread_mmio_ctrlr = NULL;
|
|
}
|
|
}
|
|
|
|
int nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair);
|
|
int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair,
|
|
const struct spdk_nvme_io_qpair_opts *opts);
|
|
int nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t num_entries);
|
|
void nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair,
|
|
struct nvme_request *req, struct spdk_nvme_cpl *cpl);
|
|
void nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair);
|
|
int nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr,
|
|
struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn,
|
|
void *cb_arg);
|
|
int nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr,
|
|
struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
|
|
int nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
|
|
spdk_nvme_cmd_cb cb_fn, void *cb_arg);
|
|
int nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
|
|
spdk_nvme_cmd_cb cb_fn, void *cb_arg);
|
|
int nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
|
|
void nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
|
|
void nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr);
|
|
void nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair,
|
|
struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
|
|
bool print_on_error);
|
|
void nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
|
|
struct spdk_nvme_cpl *cpl, bool print_on_error);
|
|
void nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr);
|
|
void nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair);
|
|
void nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair);
|
|
void nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);
|
|
int32_t nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair,
|
|
uint32_t max_completions);
|
|
int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair);
|
|
struct spdk_nvme_qpair *nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
|
|
const struct spdk_nvme_io_qpair_opts *opts);
|
|
int nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);
|
|
int nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req);
|
|
|
|
struct spdk_nvme_transport_poll_group *nvme_pcie_poll_group_create(void);
|
|
int nvme_pcie_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair);
|
|
int nvme_pcie_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair);
|
|
int nvme_pcie_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup,
|
|
struct spdk_nvme_qpair *qpair);
|
|
int nvme_pcie_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
|
|
struct spdk_nvme_qpair *qpair);
|
|
int64_t nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup,
|
|
uint32_t completions_per_qpair,
|
|
spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb);
|
|
int nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup);
|
|
|
|
#endif
|