606b453fcd
Session start callbacks have technically two ways of returning an error code: a) as the callback return value, e.g. return -1 b) via vhost_session_start_done(int rc) The former doesn't have any effect as that return value is not checked anywhere. It's only present because we wanted to use the same function signature as for the foreach_session() callbacks. It's going to be cleaned up in subsequent patches, but before that happens we have to fix vhost_nvme. With vhost_session_start_done(-1) called, DPDK will terminate the entire socket connection. Change-Id: Ib1eff17a67eef055b0dc89fa13d1fed2f8a1fbf0 Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/467230 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
1550 lines
38 KiB
C
1550 lines
38 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright(c) Intel Corporation. All rights reserved.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "spdk/stdinc.h"
|
|
|
|
#include "spdk/nvme.h"
|
|
#include "spdk/env.h"
|
|
#include "spdk/conf.h"
|
|
#include "spdk/util.h"
|
|
#include "spdk/string.h"
|
|
#include "spdk/thread.h"
|
|
#include "spdk/barrier.h"
|
|
#include "spdk/vhost.h"
|
|
#include "spdk/bdev.h"
|
|
#include "spdk/version.h"
|
|
#include "spdk/nvme_spec.h"
|
|
#include "spdk/likely.h"
|
|
|
|
#include "vhost_internal.h"
|
|
|
|
#define MAX_IO_QUEUES 31
|
|
#define MAX_IOVS 64
|
|
#define MAX_NAMESPACE 8
|
|
#define MAX_QUEUE_ENTRIES_SUPPORTED 256
|
|
#define MAX_BATCH_IO 8
|
|
|
|
struct spdk_vhost_nvme_sq {
|
|
uint16_t sqid;
|
|
uint16_t size;
|
|
uint16_t cqid;
|
|
bool valid;
|
|
struct spdk_nvme_cmd *sq_cmd;
|
|
uint16_t sq_head;
|
|
uint16_t sq_tail;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_cq {
|
|
uint8_t phase;
|
|
uint16_t size;
|
|
uint16_t cqid;
|
|
bool valid;
|
|
volatile struct spdk_nvme_cpl *cq_cqe;
|
|
uint16_t cq_head;
|
|
uint16_t guest_signaled_cq_head;
|
|
uint32_t need_signaled_cnt;
|
|
STAILQ_HEAD(, spdk_vhost_nvme_task) cq_full_waited_tasks;
|
|
bool irq_enabled;
|
|
int virq;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_ns {
|
|
struct spdk_bdev *bdev;
|
|
uint32_t block_size;
|
|
uint64_t capacity;
|
|
uint32_t nsid;
|
|
uint32_t active_ns;
|
|
struct spdk_bdev_desc *bdev_desc;
|
|
struct spdk_io_channel *bdev_io_channel;
|
|
struct spdk_nvme_ns_data nsdata;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_task {
|
|
struct spdk_nvme_cmd cmd;
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
uint16_t sqid;
|
|
uint16_t cqid;
|
|
|
|
/** array of iovecs to transfer. */
|
|
struct iovec iovs[MAX_IOVS];
|
|
|
|
/** Number of iovecs in iovs array. */
|
|
int iovcnt;
|
|
|
|
/** Current iovec position. */
|
|
int iovpos;
|
|
|
|
/** Offset in current iovec. */
|
|
uint32_t iov_offset;
|
|
|
|
/* for bdev_io_wait */
|
|
struct spdk_bdev_io_wait_entry bdev_io_wait;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
|
|
/* parent pointer. */
|
|
struct spdk_vhost_nvme_task *parent;
|
|
uint8_t dnr;
|
|
uint8_t sct;
|
|
uint8_t sc;
|
|
uint32_t num_children;
|
|
STAILQ_ENTRY(spdk_vhost_nvme_task) stailq;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_dev {
|
|
struct spdk_vhost_dev vdev;
|
|
|
|
uint32_t num_io_queues;
|
|
union spdk_nvme_cap_register cap;
|
|
union spdk_nvme_cc_register cc;
|
|
union spdk_nvme_csts_register csts;
|
|
struct spdk_nvme_ctrlr_data cdata;
|
|
|
|
uint32_t num_sqs;
|
|
uint32_t num_cqs;
|
|
|
|
uint32_t num_ns;
|
|
struct spdk_vhost_nvme_ns ns[MAX_NAMESPACE];
|
|
|
|
volatile uint32_t *bar;
|
|
volatile uint32_t *bar_db;
|
|
uint64_t bar_size;
|
|
bool dataplane_started;
|
|
|
|
volatile uint32_t *dbbuf_dbs;
|
|
volatile uint32_t *dbbuf_eis;
|
|
struct spdk_vhost_nvme_sq sq_queue[MAX_IO_QUEUES + 1];
|
|
struct spdk_vhost_nvme_cq cq_queue[MAX_IO_QUEUES + 1];
|
|
|
|
/* The one and only session associated with this device */
|
|
struct spdk_vhost_session *vsession;
|
|
|
|
TAILQ_ENTRY(spdk_vhost_nvme_dev) tailq;
|
|
STAILQ_HEAD(, spdk_vhost_nvme_task) free_tasks;
|
|
struct spdk_poller *requestq_poller;
|
|
struct spdk_poller *stop_poller;
|
|
};
|
|
|
|
static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend;
|
|
|
|
/*
|
|
* Report the SPDK version as the firmware revision.
|
|
* SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
|
|
*/
|
|
#define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
|
|
|
|
static int
|
|
spdk_nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq,
|
|
struct spdk_vhost_nvme_task *task);
|
|
|
|
static struct spdk_vhost_nvme_dev *
|
|
to_nvme_dev(struct spdk_vhost_dev *vdev)
|
|
{
|
|
if (vdev->backend != &spdk_vhost_nvme_device_backend) {
|
|
SPDK_ERRLOG("%s: not a vhost-nvme device\n", vdev->name);
|
|
return NULL;
|
|
}
|
|
|
|
return SPDK_CONTAINEROF(vdev, struct spdk_vhost_nvme_dev, vdev);
|
|
}
|
|
|
|
static TAILQ_HEAD(, spdk_vhost_nvme_dev) g_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_ctrlrs);
|
|
|
|
static inline unsigned int sq_offset(unsigned int qid, uint32_t db_stride)
|
|
{
|
|
return qid * 2 * db_stride;
|
|
}
|
|
|
|
static inline unsigned int cq_offset(unsigned int qid, uint32_t db_stride)
|
|
{
|
|
return (qid * 2 + 1) * db_stride;
|
|
}
|
|
|
|
static void
|
|
nvme_inc_cq_head(struct spdk_vhost_nvme_cq *cq)
|
|
{
|
|
cq->cq_head++;
|
|
if (cq->cq_head >= cq->size) {
|
|
cq->cq_head = 0;
|
|
cq->phase = !cq->phase;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
nvme_cq_is_full(struct spdk_vhost_nvme_cq *cq)
|
|
{
|
|
return ((cq->cq_head + 1) % cq->size == cq->guest_signaled_cq_head);
|
|
}
|
|
|
|
static void
|
|
nvme_inc_sq_head(struct spdk_vhost_nvme_sq *sq)
|
|
{
|
|
sq->sq_head = (sq->sq_head + 1) % sq->size;
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_sq *
|
|
spdk_vhost_nvme_get_sq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid)
|
|
{
|
|
if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) {
|
|
return NULL;
|
|
}
|
|
|
|
return &dev->sq_queue[qid];
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_cq *
|
|
spdk_vhost_nvme_get_cq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid)
|
|
{
|
|
if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) {
|
|
return NULL;
|
|
}
|
|
|
|
return &dev->cq_queue[qid];
|
|
}
|
|
|
|
static inline uint32_t
|
|
spdk_vhost_nvme_get_queue_head(struct spdk_vhost_nvme_dev *nvme, uint32_t offset)
|
|
{
|
|
if (nvme->dataplane_started) {
|
|
return nvme->dbbuf_dbs[offset];
|
|
|
|
} else if (nvme->bar) {
|
|
return nvme->bar_db[offset];
|
|
}
|
|
|
|
assert(0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_nvme_map_prps(struct spdk_vhost_nvme_dev *nvme, struct spdk_nvme_cmd *cmd,
|
|
struct spdk_vhost_nvme_task *task, uint32_t len)
|
|
{
|
|
struct spdk_vhost_session *vsession = nvme->vsession;
|
|
uint64_t prp1, prp2;
|
|
void *vva;
|
|
uint32_t i;
|
|
uint32_t residue_len, nents, mps = 4096;
|
|
uint64_t *prp_list;
|
|
|
|
prp1 = cmd->dptr.prp.prp1;
|
|
prp2 = cmd->dptr.prp.prp2;
|
|
|
|
/* PRP1 may started with unaligned page address */
|
|
residue_len = mps - (prp1 % mps);
|
|
residue_len = spdk_min(len, residue_len);
|
|
|
|
vva = vhost_gpa_to_vva(vsession, prp1, residue_len);
|
|
if (spdk_unlikely(vva == NULL)) {
|
|
SPDK_ERRLOG("GPA to VVA failed\n");
|
|
return -1;
|
|
}
|
|
task->iovs[0].iov_base = vva;
|
|
task->iovs[0].iov_len = residue_len;
|
|
len -= residue_len;
|
|
|
|
if (len) {
|
|
if (spdk_unlikely(prp2 == 0)) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Invalid PRP2=0 in command\n");
|
|
return -1;
|
|
}
|
|
|
|
if (len <= mps) {
|
|
/* 2 PRP used */
|
|
task->iovcnt = 2;
|
|
vva = vhost_gpa_to_vva(vsession, prp2, len);
|
|
if (spdk_unlikely(vva == NULL)) {
|
|
return -1;
|
|
}
|
|
task->iovs[1].iov_base = vva;
|
|
task->iovs[1].iov_len = len;
|
|
} else {
|
|
/* PRP list used */
|
|
nents = (len + mps - 1) / mps;
|
|
vva = vhost_gpa_to_vva(vsession, prp2, nents * sizeof(*prp_list));
|
|
if (spdk_unlikely(vva == NULL)) {
|
|
return -1;
|
|
}
|
|
prp_list = vva;
|
|
i = 0;
|
|
while (len != 0) {
|
|
residue_len = spdk_min(len, mps);
|
|
vva = vhost_gpa_to_vva(vsession, prp_list[i], residue_len);
|
|
if (spdk_unlikely(vva == NULL)) {
|
|
return -1;
|
|
}
|
|
task->iovs[i + 1].iov_base = vva;
|
|
task->iovs[i + 1].iov_len = residue_len;
|
|
len -= residue_len;
|
|
i++;
|
|
}
|
|
task->iovcnt = i + 1;
|
|
}
|
|
} else {
|
|
/* 1 PRP used */
|
|
task->iovcnt = 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
spdk_nvme_cq_signal_fd(struct spdk_vhost_nvme_dev *nvme)
|
|
{
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
uint32_t qid, cq_head;
|
|
|
|
assert(nvme != NULL);
|
|
|
|
for (qid = 1; qid <= MAX_IO_QUEUES; qid++) {
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq || !cq->valid) {
|
|
continue;
|
|
}
|
|
|
|
cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(qid, 1));
|
|
if (cq->irq_enabled && cq->need_signaled_cnt && (cq->cq_head != cq_head)) {
|
|
eventfd_write(cq->virq, (eventfd_t)1);
|
|
cq->need_signaled_cnt = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
spdk_vhost_nvme_task_complete(struct spdk_vhost_nvme_task *task)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = task->nvme;
|
|
struct spdk_nvme_cpl cqe = {0};
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
struct spdk_nvme_cmd *cmd = &task->cmd;
|
|
uint16_t cqid = task->cqid;
|
|
uint16_t sqid = task->sqid;
|
|
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, cqid);
|
|
sq = spdk_vhost_nvme_get_sq_from_qid(nvme, sqid);
|
|
if (spdk_unlikely(!cq || !sq)) {
|
|
return;
|
|
}
|
|
|
|
cq->guest_signaled_cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(cqid, 1));
|
|
if (spdk_unlikely(nvme_cq_is_full(cq))) {
|
|
STAILQ_INSERT_TAIL(&cq->cq_full_waited_tasks, task, stailq);
|
|
return;
|
|
}
|
|
|
|
cqe.sqid = sqid;
|
|
cqe.sqhd = sq->sq_head;
|
|
cqe.cid = cmd->cid;
|
|
cqe.status.dnr = task->dnr;
|
|
cqe.status.sct = task->sct;
|
|
cqe.status.sc = task->sc;
|
|
cqe.status.p = !cq->phase;
|
|
cq->cq_cqe[cq->cq_head] = cqe;
|
|
spdk_smp_wmb();
|
|
cq->cq_cqe[cq->cq_head].status.p = cq->phase;
|
|
|
|
nvme_inc_cq_head(cq);
|
|
cq->need_signaled_cnt++;
|
|
|
|
/* MMIO Controll */
|
|
if (nvme->dataplane_started) {
|
|
nvme->dbbuf_eis[cq_offset(cqid, 1)] = (uint32_t)(cq->guest_signaled_cq_head - 1);
|
|
}
|
|
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq);
|
|
}
|
|
|
|
static void
|
|
blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
{
|
|
struct spdk_vhost_nvme_task *task = cb_arg;
|
|
struct spdk_nvme_cmd *cmd = &task->cmd;
|
|
int sc, sct;
|
|
|
|
assert(bdev_io != NULL);
|
|
|
|
spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
task->dnr = !success;
|
|
task->sct = sct;
|
|
task->sc = sc;
|
|
|
|
if (spdk_unlikely(!success)) {
|
|
SPDK_ERRLOG("I/O error, sector %u\n", cmd->cdw10);
|
|
}
|
|
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
static void
|
|
blk_unmap_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
{
|
|
struct spdk_vhost_nvme_task *child = cb_arg;
|
|
struct spdk_vhost_nvme_task *task = child->parent;
|
|
struct spdk_vhost_nvme_dev *nvme = task->nvme;
|
|
int sct, sc;
|
|
|
|
assert(bdev_io != NULL);
|
|
|
|
task->num_children--;
|
|
if (!success) {
|
|
task->dnr = 1;
|
|
spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
|
|
task->sct = sct;
|
|
task->sc = sc;
|
|
}
|
|
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
if (!task->num_children) {
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq);
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_ns *
|
|
spdk_vhost_nvme_get_ns_from_nsid(struct spdk_vhost_nvme_dev *dev, uint32_t nsid)
|
|
{
|
|
if (spdk_unlikely(!nsid || nsid > dev->num_ns)) {
|
|
return NULL;
|
|
}
|
|
|
|
return &dev->ns[nsid - 1];
|
|
}
|
|
|
|
static void
|
|
vhost_nvme_resubmit_task(void *arg)
|
|
{
|
|
struct spdk_vhost_nvme_task *task = (struct spdk_vhost_nvme_task *)arg;
|
|
int rc;
|
|
|
|
rc = spdk_nvme_process_sq(task->nvme, task->sq, task);
|
|
if (rc) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "vhost_nvme: task resubmit failed, rc = %d.\n", rc);
|
|
}
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_queue_task(struct spdk_vhost_nvme_task *task)
|
|
{
|
|
int rc;
|
|
|
|
task->bdev_io_wait.bdev = task->ns->bdev;
|
|
task->bdev_io_wait.cb_fn = vhost_nvme_resubmit_task;
|
|
task->bdev_io_wait.cb_arg = task;
|
|
|
|
rc = spdk_bdev_queue_io_wait(task->ns->bdev, task->ns->bdev_io_channel, &task->bdev_io_wait);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("Queue io failed in vhost_nvme_queue_task, rc=%d.\n", rc);
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
spdk_nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq,
|
|
struct spdk_vhost_nvme_task *task)
|
|
{
|
|
struct spdk_vhost_nvme_task *child;
|
|
struct spdk_nvme_cmd *cmd = &task->cmd;
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
int ret = -1;
|
|
uint32_t len, nlba, block_size;
|
|
uint64_t slba;
|
|
struct spdk_nvme_dsm_range *range;
|
|
uint16_t i, num_ranges = 0;
|
|
|
|
task->nvme = nvme;
|
|
task->dnr = 0;
|
|
task->sct = 0;
|
|
task->sc = 0;
|
|
|
|
ns = spdk_vhost_nvme_get_ns_from_nsid(nvme, cmd->nsid);
|
|
if (spdk_unlikely(!ns)) {
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
|
|
block_size = ns->block_size;
|
|
task->num_children = 0;
|
|
task->cqid = sq->cqid;
|
|
task->sqid = sq->sqid;
|
|
|
|
task->ns = ns;
|
|
|
|
if (spdk_unlikely(!ns->active_ns)) {
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
|
|
/* valid only for Read/Write commands */
|
|
nlba = (cmd->cdw12 & 0xffff) + 1;
|
|
slba = cmd->cdw11;
|
|
slba = (slba << 32) | cmd->cdw10;
|
|
|
|
if (cmd->opc == SPDK_NVME_OPC_READ || cmd->opc == SPDK_NVME_OPC_WRITE ||
|
|
cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
|
|
if (cmd->psdt != SPDK_NVME_PSDT_PRP) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Invalid PSDT %u%ub in command\n",
|
|
cmd->psdt >> 1, cmd->psdt & 1u);
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_FIELD;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
|
|
if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
|
|
num_ranges = (cmd->cdw10 & 0xff) + 1;
|
|
len = num_ranges * sizeof(struct spdk_nvme_dsm_range);
|
|
} else {
|
|
len = nlba * block_size;
|
|
}
|
|
|
|
ret = spdk_nvme_map_prps(nvme, cmd, task, len);
|
|
if (spdk_unlikely(ret != 0)) {
|
|
SPDK_ERRLOG("nvme command map prps failed\n");
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_FIELD;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
switch (cmd->opc) {
|
|
case SPDK_NVME_OPC_READ:
|
|
ret = spdk_bdev_readv(ns->bdev_desc, ns->bdev_io_channel,
|
|
task->iovs, task->iovcnt, slba * block_size,
|
|
nlba * block_size, blk_request_complete_cb, task);
|
|
break;
|
|
case SPDK_NVME_OPC_WRITE:
|
|
ret = spdk_bdev_writev(ns->bdev_desc, ns->bdev_io_channel,
|
|
task->iovs, task->iovcnt, slba * block_size,
|
|
nlba * block_size, blk_request_complete_cb, task);
|
|
break;
|
|
case SPDK_NVME_OPC_FLUSH:
|
|
ret = spdk_bdev_flush(ns->bdev_desc, ns->bdev_io_channel,
|
|
0, ns->capacity,
|
|
blk_request_complete_cb, task);
|
|
break;
|
|
case SPDK_NVME_OPC_DATASET_MANAGEMENT:
|
|
range = (struct spdk_nvme_dsm_range *)task->iovs[0].iov_base;
|
|
for (i = 0; i < num_ranges; i++) {
|
|
if (!STAILQ_EMPTY(&nvme->free_tasks)) {
|
|
child = STAILQ_FIRST(&nvme->free_tasks);
|
|
STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
|
|
} else {
|
|
SPDK_ERRLOG("No free task now\n");
|
|
ret = -1;
|
|
break;
|
|
}
|
|
task->num_children++;
|
|
child->parent = task;
|
|
ret = spdk_bdev_unmap(ns->bdev_desc, ns->bdev_io_channel,
|
|
range[i].starting_lba * block_size,
|
|
range[i].length * block_size,
|
|
blk_unmap_complete_cb, child);
|
|
if (ret) {
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
ret = -1;
|
|
break;
|
|
}
|
|
|
|
if (spdk_unlikely(ret)) {
|
|
if (ret == -ENOMEM) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "No memory, start to queue io.\n");
|
|
task->sq = sq;
|
|
ret = vhost_nvme_queue_task(task);
|
|
} else {
|
|
/* post error status to cqe */
|
|
SPDK_ERRLOG("Error Submission For Command %u, ret %d\n", cmd->opc, ret);
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
nvme_worker(void *arg)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = (struct spdk_vhost_nvme_dev *)arg;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
struct spdk_vhost_nvme_task *task;
|
|
uint32_t qid, dbbuf_sq;
|
|
int ret;
|
|
int count = -1;
|
|
|
|
if (spdk_unlikely(!nvme->num_sqs)) {
|
|
return -1;
|
|
}
|
|
|
|
if (spdk_unlikely(!nvme->dataplane_started && !nvme->bar)) {
|
|
return -1;
|
|
}
|
|
|
|
for (qid = 1; qid <= MAX_IO_QUEUES; qid++) {
|
|
|
|
sq = spdk_vhost_nvme_get_sq_from_qid(nvme, qid);
|
|
if (!sq->valid) {
|
|
continue;
|
|
}
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, sq->cqid);
|
|
if (spdk_unlikely(!cq)) {
|
|
return -1;
|
|
}
|
|
cq->guest_signaled_cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(sq->cqid, 1));
|
|
if (spdk_unlikely(!STAILQ_EMPTY(&cq->cq_full_waited_tasks) &&
|
|
!nvme_cq_is_full(cq))) {
|
|
task = STAILQ_FIRST(&cq->cq_full_waited_tasks);
|
|
STAILQ_REMOVE_HEAD(&cq->cq_full_waited_tasks, stailq);
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
dbbuf_sq = spdk_vhost_nvme_get_queue_head(nvme, sq_offset(qid, 1));
|
|
sq->sq_tail = (uint16_t)dbbuf_sq;
|
|
count = 0;
|
|
|
|
while (sq->sq_head != sq->sq_tail) {
|
|
if (spdk_unlikely(!sq->sq_cmd)) {
|
|
break;
|
|
}
|
|
if (spdk_likely(!STAILQ_EMPTY(&nvme->free_tasks))) {
|
|
task = STAILQ_FIRST(&nvme->free_tasks);
|
|
STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
task->cmd = sq->sq_cmd[sq->sq_head];
|
|
nvme_inc_sq_head(sq);
|
|
|
|
/* processing IO */
|
|
ret = spdk_nvme_process_sq(nvme, sq, task);
|
|
if (spdk_unlikely(ret)) {
|
|
SPDK_ERRLOG("QID %u CID %u, SQ HEAD %u, DBBUF SQ TAIL %u\n", qid, task->cmd.cid, sq->sq_head,
|
|
sq->sq_tail);
|
|
}
|
|
|
|
/* MMIO Control */
|
|
if (nvme->dataplane_started) {
|
|
nvme->dbbuf_eis[sq_offset(qid, 1)] = (uint32_t)(sq->sq_head - 1);
|
|
}
|
|
|
|
/* Maximum batch I/Os to pick up at once */
|
|
if (count++ == MAX_BATCH_IO) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Completion Queue */
|
|
spdk_nvme_cq_signal_fd(nvme);
|
|
|
|
return count;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_doorbell_buffer_config(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
struct spdk_vhost_session *vsession = nvme->vsession;
|
|
uint64_t dbs_dma_addr, eis_dma_addr;
|
|
|
|
dbs_dma_addr = cmd->dptr.prp.prp1;
|
|
eis_dma_addr = cmd->dptr.prp.prp2;
|
|
|
|
if ((dbs_dma_addr % 4096) || (eis_dma_addr % 4096)) {
|
|
return -1;
|
|
}
|
|
/* Guest Physical Address to Host Virtual Address */
|
|
nvme->dbbuf_dbs = vhost_gpa_to_vva(vsession, dbs_dma_addr, 4096);
|
|
nvme->dbbuf_eis = vhost_gpa_to_vva(vsession, eis_dma_addr, 4096);
|
|
if (!nvme->dbbuf_dbs || !nvme->dbbuf_eis) {
|
|
return -1;
|
|
}
|
|
/* zeroed the doorbell buffer memory */
|
|
memset((void *)nvme->dbbuf_dbs, 0, 4096);
|
|
memset((void *)nvme->dbbuf_eis, 0, 4096);
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
|
|
/* Data plane started */
|
|
nvme->dataplane_started = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_create_io_sq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qid, qsize, cqid;
|
|
uint64_t dma_addr;
|
|
uint64_t requested_len;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
|
|
/* physical contiguous */
|
|
if (!(cmd->cdw11 & 0x1)) {
|
|
return -1;
|
|
}
|
|
|
|
cqid = (cmd->cdw11 >> 16) & 0xffff;
|
|
qid = cmd->cdw10 & 0xffff;
|
|
qsize = (cmd->cdw10 >> 16) & 0xffff;
|
|
dma_addr = cmd->dptr.prp.prp1;
|
|
if (!dma_addr || dma_addr % 4096) {
|
|
return -1;
|
|
}
|
|
|
|
sq = spdk_vhost_nvme_get_sq_from_qid(nvme, qid);
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, cqid);
|
|
if (!sq || !cq) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "User requested invalid QID %u or CQID %u\n",
|
|
qid, cqid);
|
|
cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
|
|
return -1;
|
|
}
|
|
|
|
sq->sqid = qid;
|
|
sq->cqid = cqid;
|
|
sq->size = qsize + 1;
|
|
sq->sq_head = sq->sq_tail = 0;
|
|
requested_len = sizeof(struct spdk_nvme_cmd) * sq->size;
|
|
sq->sq_cmd = vhost_gpa_to_vva(nvme->vsession, dma_addr, requested_len);
|
|
if (!sq->sq_cmd) {
|
|
return -1;
|
|
}
|
|
nvme->num_sqs++;
|
|
sq->valid = true;
|
|
if (nvme->bar) {
|
|
nvme->bar_db[sq_offset(qid, 1)] = 0;
|
|
}
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_delete_io_sq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qid;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
|
|
qid = cmd->cdw10 & 0xffff;
|
|
sq = spdk_vhost_nvme_get_sq_from_qid(nvme, qid);
|
|
if (!sq) {
|
|
return -1;
|
|
}
|
|
|
|
/* We didn't see scenarios when deleting submission
|
|
* queue while I/O is running against the submisson
|
|
* queue for now, otherwise, we must ensure the poller
|
|
* will not run with this submission queue.
|
|
*/
|
|
nvme->num_sqs--;
|
|
sq->valid = false;
|
|
|
|
memset(sq, 0, sizeof(*sq));
|
|
sq->sq_cmd = NULL;
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_create_io_cq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qsize, qid;
|
|
uint64_t dma_addr;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
uint64_t requested_len;
|
|
|
|
/* physical contiguous */
|
|
if (!(cmd->cdw11 & 0x1)) {
|
|
return -1;
|
|
}
|
|
|
|
qid = cmd->cdw10 & 0xffff;
|
|
qsize = (cmd->cdw10 >> 16) & 0xffff;
|
|
dma_addr = cmd->dptr.prp.prp1;
|
|
if (!dma_addr || dma_addr % 4096) {
|
|
return -1;
|
|
}
|
|
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "User requested invalid QID %u\n", qid);
|
|
cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
|
|
return -1;
|
|
}
|
|
cq->cqid = qid;
|
|
cq->size = qsize + 1;
|
|
cq->phase = 1;
|
|
cq->irq_enabled = (cmd->cdw11 >> 1) & 0x1;
|
|
/* Setup virq through vhost messages */
|
|
cq->virq = -1;
|
|
cq->cq_head = 0;
|
|
cq->guest_signaled_cq_head = 0;
|
|
cq->need_signaled_cnt = 0;
|
|
requested_len = sizeof(struct spdk_nvme_cpl) * cq->size;
|
|
cq->cq_cqe = vhost_gpa_to_vva(nvme->vsession, dma_addr, requested_len);
|
|
if (!cq->cq_cqe) {
|
|
return -1;
|
|
}
|
|
nvme->num_cqs++;
|
|
cq->valid = true;
|
|
if (nvme->bar) {
|
|
nvme->bar_db[cq_offset(qid, 1)] = 0;
|
|
}
|
|
STAILQ_INIT(&cq->cq_full_waited_tasks);
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_delete_io_cq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qid;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
|
|
qid = cmd->cdw10 & 0xffff;
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq) {
|
|
return -1;
|
|
}
|
|
nvme->num_cqs--;
|
|
cq->valid = false;
|
|
|
|
memset(cq, 0, sizeof(*cq));
|
|
cq->cq_cqe = NULL;
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
return 0;
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_dev *
|
|
spdk_vhost_nvme_get_by_name(int vid)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
struct spdk_vhost_dev *vdev;
|
|
struct spdk_vhost_session *vsession;
|
|
|
|
TAILQ_FOREACH(nvme, &g_nvme_ctrlrs, tailq) {
|
|
vdev = &nvme->vdev;
|
|
TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) {
|
|
if (vsession->vid == vid) {
|
|
return nvme;
|
|
}
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_get_cap(int vid, uint64_t *cap)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
|
|
nvme = spdk_vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
*cap = nvme->cap.raw;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf)
|
|
{
|
|
struct spdk_nvme_cmd *req = (struct spdk_nvme_cmd *)cmd;
|
|
struct spdk_nvme_cpl *cpl = (struct spdk_nvme_cpl *)cqe;
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
int ret = 0;
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
|
|
nvme = spdk_vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Admin Command Opcode %u\n", req->opc);
|
|
switch (req->opc) {
|
|
case SPDK_NVME_OPC_IDENTIFY:
|
|
if (req->cdw10 == SPDK_NVME_IDENTIFY_CTRLR) {
|
|
memcpy(buf, &nvme->cdata, sizeof(struct spdk_nvme_ctrlr_data));
|
|
|
|
} else if (req->cdw10 == SPDK_NVME_IDENTIFY_NS) {
|
|
ns = spdk_vhost_nvme_get_ns_from_nsid(nvme, req->nsid);
|
|
if (!ns) {
|
|
cpl->status.sc = SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE;
|
|
cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
break;
|
|
}
|
|
memcpy(buf, &ns->nsdata, sizeof(struct spdk_nvme_ns_data));
|
|
}
|
|
/* successfully */
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
break;
|
|
case SPDK_NVME_OPC_CREATE_IO_CQ:
|
|
ret = vhost_nvme_create_io_cq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_DELETE_IO_CQ:
|
|
ret = vhost_nvme_delete_io_cq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_CREATE_IO_SQ:
|
|
ret = vhost_nvme_create_io_sq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_DELETE_IO_SQ:
|
|
ret = vhost_nvme_delete_io_sq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_GET_FEATURES:
|
|
case SPDK_NVME_OPC_SET_FEATURES:
|
|
if (req->cdw10 == SPDK_NVME_FEAT_NUMBER_OF_QUEUES) {
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
cpl->cdw0 = (nvme->num_io_queues - 1) | ((nvme->num_io_queues - 1) << 16);
|
|
} else {
|
|
cpl->status.sc = SPDK_NVME_SC_INVALID_FIELD;
|
|
cpl->status.sct = SPDK_NVME_SCT_GENERIC;
|
|
}
|
|
break;
|
|
case SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG:
|
|
ret = vhost_nvme_doorbell_buffer_config(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_ABORT:
|
|
/* TODO: ABORT failed fow now */
|
|
cpl->cdw0 = 1;
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
break;
|
|
}
|
|
|
|
if (ret) {
|
|
SPDK_ERRLOG("Admin Passthrough Failed with %u\n", req->opc);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
|
|
nvme = spdk_vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
nvme->bar = (volatile uint32_t *)(uintptr_t)(bar_addr);
|
|
/* BAR0 SQ/CQ doorbell registers start from offset 0x1000 */
|
|
nvme->bar_db = (volatile uint32_t *)(uintptr_t)(bar_addr + 0x1000ull);
|
|
nvme->bar_size = bar_size;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
|
|
nvme = spdk_vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq) {
|
|
return -1;
|
|
}
|
|
if (cq->irq_enabled) {
|
|
cq->virq = fd;
|
|
} else {
|
|
SPDK_ERRLOG("NVMe Qid %d Disabled IRQ\n", qid);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
free_task_pool(struct spdk_vhost_nvme_dev *nvme)
|
|
{
|
|
struct spdk_vhost_nvme_task *task;
|
|
|
|
while (!STAILQ_EMPTY(&nvme->free_tasks)) {
|
|
task = STAILQ_FIRST(&nvme->free_tasks);
|
|
STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
|
|
spdk_free(task);
|
|
}
|
|
}
|
|
|
|
static int
|
|
alloc_task_pool(struct spdk_vhost_nvme_dev *nvme)
|
|
{
|
|
uint32_t entries, i;
|
|
struct spdk_vhost_nvme_task *task;
|
|
|
|
entries = nvme->num_io_queues * MAX_QUEUE_ENTRIES_SUPPORTED;
|
|
|
|
for (i = 0; i < entries; i++) {
|
|
task = spdk_zmalloc(sizeof(struct spdk_vhost_nvme_task),
|
|
SPDK_CACHE_LINE_SIZE, NULL,
|
|
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
|
|
if (task == NULL) {
|
|
SPDK_ERRLOG("Controller %s alloc task pool failed\n",
|
|
nvme->vdev.name);
|
|
free_task_pool(nvme);
|
|
return -1;
|
|
}
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_start_cb(struct spdk_vhost_dev *vdev,
|
|
struct spdk_vhost_session *vsession, void *unused)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
int rc = 0;
|
|
|
|
if (nvme == NULL) {
|
|
rc = -1;
|
|
goto out;
|
|
}
|
|
|
|
rc = alloc_task_pool(nvme);
|
|
if (rc) {
|
|
goto out;
|
|
}
|
|
|
|
SPDK_NOTICELOG("Start Device %u, Path %s, lcore %d\n", vsession->vid,
|
|
vdev->path, spdk_env_get_current_core());
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
ns_dev->bdev_io_channel = spdk_bdev_get_io_channel(ns_dev->bdev_desc);
|
|
if (!ns_dev->bdev_io_channel) {
|
|
rc = -1;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
nvme->vsession = vsession;
|
|
/* Start the NVMe Poller */
|
|
nvme->requestq_poller = spdk_poller_register(nvme_worker, nvme, 0);
|
|
|
|
out:
|
|
vhost_session_start_done(vsession, rc);
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_start(struct spdk_vhost_session *vsession)
|
|
{
|
|
struct vhost_poll_group *pg;
|
|
|
|
if (vsession->vdev->active_session_num > 0) {
|
|
/* We're trying to start a second session */
|
|
SPDK_ERRLOG("Vhost-NVMe devices can support only one simultaneous connection.\n");
|
|
return -1;
|
|
}
|
|
|
|
pg = vhost_get_poll_group(vsession->vdev->cpumask);
|
|
return vhost_session_send_event(pg, vsession, spdk_vhost_nvme_start_cb,
|
|
3, "start session");
|
|
}
|
|
|
|
static void
|
|
spdk_vhost_nvme_deactive_ns(struct spdk_vhost_nvme_ns *ns)
|
|
{
|
|
ns->active_ns = 0;
|
|
spdk_bdev_close(ns->bdev_desc);
|
|
ns->bdev_desc = NULL;
|
|
ns->bdev = NULL;
|
|
}
|
|
|
|
static void
|
|
bdev_remove_cb(void *remove_ctx)
|
|
{
|
|
struct spdk_vhost_nvme_ns *ns = remove_ctx;
|
|
|
|
SPDK_NOTICELOG("Removing NS %u, Block Device %s\n",
|
|
ns->nsid, spdk_bdev_get_name(ns->bdev));
|
|
|
|
spdk_vhost_nvme_deactive_ns(ns);
|
|
}
|
|
|
|
static int
|
|
destroy_device_poller_cb(void *arg)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = arg;
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Destroy device poller callback\n");
|
|
|
|
/* FIXME wait for pending I/Os to complete */
|
|
|
|
if (spdk_vhost_trylock() != 0) {
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
if (ns_dev->bdev_io_channel) {
|
|
spdk_put_io_channel(ns_dev->bdev_io_channel);
|
|
ns_dev->bdev_io_channel = NULL;
|
|
}
|
|
}
|
|
/* Clear BAR space */
|
|
if (nvme->bar) {
|
|
memset((void *)nvme->bar, 0, nvme->bar_size);
|
|
}
|
|
nvme->num_sqs = 0;
|
|
nvme->num_cqs = 0;
|
|
nvme->dbbuf_dbs = NULL;
|
|
nvme->dbbuf_eis = NULL;
|
|
nvme->dataplane_started = false;
|
|
|
|
spdk_poller_unregister(&nvme->stop_poller);
|
|
vhost_session_stop_done(nvme->vsession, 0);
|
|
|
|
spdk_vhost_unlock();
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_stop_cb(struct spdk_vhost_dev *vdev,
|
|
struct spdk_vhost_session *vsession, void *unused)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
|
|
if (nvme == NULL) {
|
|
vhost_session_stop_done(vsession, -1);
|
|
return -1;
|
|
}
|
|
|
|
free_task_pool(nvme);
|
|
SPDK_NOTICELOG("Stopping Device %u, Path %s\n", vsession->vid, vdev->path);
|
|
|
|
spdk_poller_unregister(&nvme->requestq_poller);
|
|
nvme->stop_poller = spdk_poller_register(destroy_device_poller_cb, nvme, 1000);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_stop(struct spdk_vhost_session *vsession)
|
|
{
|
|
return vhost_session_send_event(vsession->poll_group, vsession,
|
|
spdk_vhost_nvme_stop_cb, 3, "start session");
|
|
}
|
|
|
|
static void
|
|
spdk_vhost_nvme_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return;
|
|
}
|
|
|
|
spdk_json_write_named_array_begin(w, "namespaces");
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
if (!ns_dev->active_ns) {
|
|
continue;
|
|
}
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_uint32(w, "nsid", ns_dev->nsid);
|
|
spdk_json_write_named_string(w, "bdev", spdk_bdev_get_name(ns_dev->bdev));
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
|
|
spdk_json_write_array_end(w);
|
|
}
|
|
|
|
static void
|
|
spdk_vhost_nvme_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return;
|
|
}
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_string(w, "method", "vhost_create_nvme_controller");
|
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
|
spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name);
|
|
spdk_json_write_named_uint32(w, "io_queues", nvme->num_io_queues);
|
|
spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(nvme->vdev.cpumask));
|
|
spdk_json_write_object_end(w);
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
if (!ns_dev->active_ns) {
|
|
continue;
|
|
}
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_string(w, "method", "vhost_nvme_controller_add_ns");
|
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
|
spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name);
|
|
spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(ns_dev->bdev));
|
|
spdk_json_write_object_end(w);
|
|
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
}
|
|
|
|
static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend = {
|
|
.session_ctx_size = 0,
|
|
.start_session = spdk_vhost_nvme_start,
|
|
.stop_session = spdk_vhost_nvme_stop,
|
|
.dump_info_json = spdk_vhost_nvme_dump_info_json,
|
|
.write_config_json = spdk_vhost_nvme_write_config_json,
|
|
.remove_device = vhost_nvme_dev_remove,
|
|
};
|
|
|
|
static int
|
|
spdk_vhost_nvme_ns_identify_update(struct spdk_vhost_nvme_dev *dev)
|
|
{
|
|
struct spdk_nvme_ctrlr_data *cdata = &dev->cdata;
|
|
struct spdk_nvme_ns_data *nsdata;
|
|
uint64_t num_blocks;
|
|
uint32_t i;
|
|
|
|
/* Identify Namespace */
|
|
cdata->nn = dev->num_ns;
|
|
for (i = 0; i < dev->num_ns; i++) {
|
|
nsdata = &dev->ns[i].nsdata;
|
|
if (dev->ns[i].active_ns) {
|
|
num_blocks = spdk_bdev_get_num_blocks(dev->ns[i].bdev);
|
|
nsdata->nsze = num_blocks;
|
|
/* ncap must be non-zero for active Namespace */
|
|
nsdata->ncap = num_blocks;
|
|
nsdata->nuse = num_blocks;
|
|
nsdata->nlbaf = 0;
|
|
nsdata->flbas.format = 0;
|
|
nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(dev->ns[i].bdev));
|
|
nsdata->noiob = spdk_bdev_get_optimal_io_boundary(dev->ns[i].bdev);
|
|
dev->ns[i].block_size = spdk_bdev_get_block_size(dev->ns[i].bdev);
|
|
dev->ns[i].capacity = num_blocks * dev->ns[i].block_size;
|
|
} else {
|
|
memset(nsdata, 0, sizeof(*nsdata));
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_ctrlr_identify_update(struct spdk_vhost_nvme_dev *dev)
|
|
{
|
|
struct spdk_nvme_ctrlr_data *cdata = &dev->cdata;
|
|
char sn[20];
|
|
|
|
/* Controller Capabilities */
|
|
dev->cap.bits.cqr = 1;
|
|
dev->cap.bits.to = 1;
|
|
dev->cap.bits.dstrd = 0;
|
|
dev->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
|
|
dev->cap.bits.mpsmin = 0;
|
|
dev->cap.bits.mpsmax = 0;
|
|
/* MQES is 0 based value */
|
|
dev->cap.bits.mqes = MAX_QUEUE_ENTRIES_SUPPORTED - 1;
|
|
|
|
/* Controller Configuration */
|
|
dev->cc.bits.en = 0;
|
|
|
|
/* Controller Status */
|
|
dev->csts.bits.rdy = 0;
|
|
|
|
/* Identify Controller */
|
|
spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
|
|
cdata->vid = 0x8086;
|
|
cdata->ssvid = 0x8086;
|
|
spdk_strcpy_pad(cdata->mn, "SPDK Virtual NVMe Controller", sizeof(cdata->mn), ' ');
|
|
snprintf(sn, sizeof(sn), "NVMe_%s", dev->vdev.name);
|
|
spdk_strcpy_pad(cdata->sn, sn, sizeof(cdata->sn), ' ');
|
|
cdata->ieee[0] = 0xe4;
|
|
cdata->ieee[1] = 0xd2;
|
|
cdata->ieee[2] = 0x5c;
|
|
cdata->ver.bits.mjr = 1;
|
|
cdata->ver.bits.mnr = 0;
|
|
cdata->mdts = 5; /* 128 KiB */
|
|
cdata->rab = 6;
|
|
cdata->sqes.min = 6;
|
|
cdata->sqes.max = 6;
|
|
cdata->cqes.min = 4;
|
|
cdata->cqes.max = 4;
|
|
cdata->oncs.dsm = 1;
|
|
/* Emulated NVMe controller */
|
|
cdata->oacs.doorbell_buffer_config = 1;
|
|
|
|
spdk_vhost_nvme_ns_identify_update(dev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t num_io_queues)
|
|
{
|
|
struct spdk_vhost_nvme_dev *dev;
|
|
int rc;
|
|
|
|
if (posix_memalign((void **)&dev, SPDK_CACHE_LINE_SIZE, sizeof(*dev))) {
|
|
return -ENOMEM;
|
|
}
|
|
memset(dev, 0, sizeof(*dev));
|
|
|
|
if (num_io_queues < 1 || num_io_queues > MAX_IO_QUEUES) {
|
|
free(dev);
|
|
return -EINVAL;
|
|
}
|
|
|
|
spdk_vhost_lock();
|
|
rc = vhost_dev_register(&dev->vdev, name, cpumask,
|
|
&spdk_vhost_nvme_device_backend);
|
|
|
|
if (rc) {
|
|
free(dev);
|
|
spdk_vhost_unlock();
|
|
return rc;
|
|
}
|
|
|
|
dev->num_io_queues = num_io_queues;
|
|
STAILQ_INIT(&dev->free_tasks);
|
|
TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, dev, tailq);
|
|
|
|
spdk_vhost_nvme_ctrlr_identify_update(dev);
|
|
|
|
SPDK_NOTICELOG("Controller %s: Constructed\n", name);
|
|
spdk_vhost_unlock();
|
|
return rc;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
int rc;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
TAILQ_REMOVE(&g_nvme_ctrlrs, nvme, tailq);
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns = &nvme->ns[i];
|
|
if (ns->active_ns) {
|
|
spdk_vhost_nvme_deactive_ns(ns);
|
|
}
|
|
}
|
|
|
|
rc = vhost_dev_unregister(vdev);
|
|
if (rc != 0) {
|
|
return rc;
|
|
}
|
|
|
|
free(nvme);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev, const char *bdev_name)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
struct spdk_bdev *bdev;
|
|
int rc = -1;
|
|
|
|
if (nvme == NULL) {
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (nvme->num_ns == MAX_NAMESPACE) {
|
|
SPDK_ERRLOG("Can't support %d Namespaces\n", nvme->num_ns);
|
|
return -ENOSPC;
|
|
}
|
|
|
|
bdev = spdk_bdev_get_by_name(bdev_name);
|
|
if (!bdev) {
|
|
SPDK_ERRLOG("could not find bdev %s\n", bdev_name);
|
|
return -ENODEV;
|
|
}
|
|
|
|
ns = &nvme->ns[nvme->num_ns];
|
|
rc = spdk_bdev_open(bdev, true, bdev_remove_cb, ns, &nvme->ns[nvme->num_ns].bdev_desc);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("Could not open bdev '%s', error=%d\n",
|
|
bdev_name, rc);
|
|
return rc;
|
|
}
|
|
|
|
nvme->ns[nvme->num_ns].bdev = bdev;
|
|
nvme->ns[nvme->num_ns].active_ns = 1;
|
|
nvme->ns[nvme->num_ns].nsid = nvme->num_ns + 1;
|
|
nvme->num_ns++;
|
|
|
|
spdk_vhost_nvme_ns_identify_update(nvme);
|
|
|
|
return rc;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_controller_construct(void)
|
|
{
|
|
struct spdk_conf_section *sp;
|
|
const char *name;
|
|
const char *bdev_name;
|
|
const char *cpumask;
|
|
int rc, i = 0;
|
|
struct spdk_vhost_dev *vdev;
|
|
uint32_t ctrlr_num, io_queues;
|
|
|
|
for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) {
|
|
if (!spdk_conf_section_match_prefix(sp, "VhostNvme")) {
|
|
continue;
|
|
}
|
|
|
|
if (sscanf(spdk_conf_section_get_name(sp), "VhostNvme%u", &ctrlr_num) != 1) {
|
|
SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
|
|
spdk_conf_section_get_name(sp));
|
|
return -1;
|
|
}
|
|
|
|
name = spdk_conf_section_get_val(sp, "Name");
|
|
if (name == NULL) {
|
|
SPDK_ERRLOG("VhostNvme%u: missing Name\n", ctrlr_num);
|
|
return -1;
|
|
}
|
|
|
|
cpumask = spdk_conf_section_get_val(sp, "Cpumask");
|
|
rc = spdk_conf_section_get_intval(sp, "NumberOfQueues");
|
|
if (rc > 0) {
|
|
io_queues = rc;
|
|
} else {
|
|
io_queues = 1;
|
|
}
|
|
|
|
rc = vhost_nvme_dev_construct(name, cpumask, io_queues);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("VhostNvme%u: Construct failed\n", ctrlr_num);
|
|
return -1;
|
|
}
|
|
|
|
vdev = spdk_vhost_dev_find(name);
|
|
if (!vdev) {
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; spdk_conf_section_get_nval(sp, "Namespace", i) != NULL; i++) {
|
|
bdev_name = spdk_conf_section_get_nmval(sp, "Namespace", i, 0);
|
|
if (!bdev_name) {
|
|
SPDK_ERRLOG("namespace configuration missing bdev name\n");
|
|
break;
|
|
}
|
|
rc = vhost_nvme_dev_add_ns(vdev, bdev_name);
|
|
if (rc < 0) {
|
|
SPDK_WARNLOG("VhostNvme%u: Construct Namespace with %s failed\n",
|
|
ctrlr_num, bdev_name);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
SPDK_LOG_REGISTER_COMPONENT("vhost_nvme", SPDK_LOG_VHOST_NVME)
|