6e77b0b68d
Change the way we increase poll group reference counts for round-robin scheduling. So far we used to increase them whenever someone called vhost_get_poll_group() and this worked fine for Vhost-Block which picks a new poll group for each session. Vhost-SCSI, however, picks only one poll group for all sessions on a vhost device. This means that some threads will have multiple Vhost-SCSI pollers but will still appear to the vhost scheduler as if they had only one. To fix it, increase poll group refcnt only when sessions are really being started - in vhost_session_start_done(). Change-Id: I60f0d2101239e5a91138a5afd30c51dc1ccf7c2e Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466733 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
1545 lines
38 KiB
C
1545 lines
38 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright(c) Intel Corporation. All rights reserved.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "spdk/stdinc.h"
|
|
|
|
#include "spdk/nvme.h"
|
|
#include "spdk/env.h"
|
|
#include "spdk/conf.h"
|
|
#include "spdk/util.h"
|
|
#include "spdk/string.h"
|
|
#include "spdk/thread.h"
|
|
#include "spdk/barrier.h"
|
|
#include "spdk/vhost.h"
|
|
#include "spdk/bdev.h"
|
|
#include "spdk/version.h"
|
|
#include "spdk/nvme_spec.h"
|
|
#include "spdk/likely.h"
|
|
|
|
#include "vhost_internal.h"
|
|
|
|
#define MAX_IO_QUEUES 31
|
|
#define MAX_IOVS 64
|
|
#define MAX_NAMESPACE 8
|
|
#define MAX_QUEUE_ENTRIES_SUPPORTED 256
|
|
#define MAX_BATCH_IO 8
|
|
|
|
struct spdk_vhost_nvme_sq {
|
|
uint16_t sqid;
|
|
uint16_t size;
|
|
uint16_t cqid;
|
|
bool valid;
|
|
struct spdk_nvme_cmd *sq_cmd;
|
|
uint16_t sq_head;
|
|
uint16_t sq_tail;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_cq {
|
|
uint8_t phase;
|
|
uint16_t size;
|
|
uint16_t cqid;
|
|
bool valid;
|
|
volatile struct spdk_nvme_cpl *cq_cqe;
|
|
uint16_t cq_head;
|
|
uint16_t guest_signaled_cq_head;
|
|
uint32_t need_signaled_cnt;
|
|
STAILQ_HEAD(, spdk_vhost_nvme_task) cq_full_waited_tasks;
|
|
bool irq_enabled;
|
|
int virq;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_ns {
|
|
struct spdk_bdev *bdev;
|
|
uint32_t block_size;
|
|
uint64_t capacity;
|
|
uint32_t nsid;
|
|
uint32_t active_ns;
|
|
struct spdk_bdev_desc *bdev_desc;
|
|
struct spdk_io_channel *bdev_io_channel;
|
|
struct spdk_nvme_ns_data nsdata;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_task {
|
|
struct spdk_nvme_cmd cmd;
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
uint16_t sqid;
|
|
uint16_t cqid;
|
|
|
|
/** array of iovecs to transfer. */
|
|
struct iovec iovs[MAX_IOVS];
|
|
|
|
/** Number of iovecs in iovs array. */
|
|
int iovcnt;
|
|
|
|
/** Current iovec position. */
|
|
int iovpos;
|
|
|
|
/** Offset in current iovec. */
|
|
uint32_t iov_offset;
|
|
|
|
/* for bdev_io_wait */
|
|
struct spdk_bdev_io_wait_entry bdev_io_wait;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
|
|
/* parent pointer. */
|
|
struct spdk_vhost_nvme_task *parent;
|
|
uint8_t dnr;
|
|
uint8_t sct;
|
|
uint8_t sc;
|
|
uint32_t num_children;
|
|
STAILQ_ENTRY(spdk_vhost_nvme_task) stailq;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_dev {
|
|
struct spdk_vhost_dev vdev;
|
|
|
|
uint32_t num_io_queues;
|
|
union spdk_nvme_cap_register cap;
|
|
union spdk_nvme_cc_register cc;
|
|
union spdk_nvme_csts_register csts;
|
|
struct spdk_nvme_ctrlr_data cdata;
|
|
|
|
uint32_t num_sqs;
|
|
uint32_t num_cqs;
|
|
|
|
uint32_t num_ns;
|
|
struct spdk_vhost_nvme_ns ns[MAX_NAMESPACE];
|
|
|
|
volatile uint32_t *bar;
|
|
volatile uint32_t *bar_db;
|
|
uint64_t bar_size;
|
|
bool dataplane_started;
|
|
|
|
volatile uint32_t *dbbuf_dbs;
|
|
volatile uint32_t *dbbuf_eis;
|
|
struct spdk_vhost_nvme_sq sq_queue[MAX_IO_QUEUES + 1];
|
|
struct spdk_vhost_nvme_cq cq_queue[MAX_IO_QUEUES + 1];
|
|
|
|
/* The one and only session associated with this device */
|
|
struct spdk_vhost_session *vsession;
|
|
|
|
TAILQ_ENTRY(spdk_vhost_nvme_dev) tailq;
|
|
STAILQ_HEAD(, spdk_vhost_nvme_task) free_tasks;
|
|
struct spdk_poller *requestq_poller;
|
|
struct spdk_poller *stop_poller;
|
|
};
|
|
|
|
static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend;
|
|
|
|
/*
|
|
* Report the SPDK version as the firmware revision.
|
|
* SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
|
|
*/
|
|
#define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
|
|
|
|
static int
|
|
spdk_nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq,
|
|
struct spdk_vhost_nvme_task *task);
|
|
|
|
static struct spdk_vhost_nvme_dev *
|
|
to_nvme_dev(struct spdk_vhost_dev *vdev)
|
|
{
|
|
if (vdev->backend != &spdk_vhost_nvme_device_backend) {
|
|
SPDK_ERRLOG("%s: not a vhost-nvme device\n", vdev->name);
|
|
return NULL;
|
|
}
|
|
|
|
return SPDK_CONTAINEROF(vdev, struct spdk_vhost_nvme_dev, vdev);
|
|
}
|
|
|
|
static TAILQ_HEAD(, spdk_vhost_nvme_dev) g_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_ctrlrs);
|
|
|
|
static inline unsigned int sq_offset(unsigned int qid, uint32_t db_stride)
|
|
{
|
|
return qid * 2 * db_stride;
|
|
}
|
|
|
|
static inline unsigned int cq_offset(unsigned int qid, uint32_t db_stride)
|
|
{
|
|
return (qid * 2 + 1) * db_stride;
|
|
}
|
|
|
|
static void
|
|
nvme_inc_cq_head(struct spdk_vhost_nvme_cq *cq)
|
|
{
|
|
cq->cq_head++;
|
|
if (cq->cq_head >= cq->size) {
|
|
cq->cq_head = 0;
|
|
cq->phase = !cq->phase;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
nvme_cq_is_full(struct spdk_vhost_nvme_cq *cq)
|
|
{
|
|
return ((cq->cq_head + 1) % cq->size == cq->guest_signaled_cq_head);
|
|
}
|
|
|
|
static void
|
|
nvme_inc_sq_head(struct spdk_vhost_nvme_sq *sq)
|
|
{
|
|
sq->sq_head = (sq->sq_head + 1) % sq->size;
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_sq *
|
|
spdk_vhost_nvme_get_sq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid)
|
|
{
|
|
if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) {
|
|
return NULL;
|
|
}
|
|
|
|
return &dev->sq_queue[qid];
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_cq *
|
|
spdk_vhost_nvme_get_cq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid)
|
|
{
|
|
if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) {
|
|
return NULL;
|
|
}
|
|
|
|
return &dev->cq_queue[qid];
|
|
}
|
|
|
|
static inline uint32_t
|
|
spdk_vhost_nvme_get_queue_head(struct spdk_vhost_nvme_dev *nvme, uint32_t offset)
|
|
{
|
|
if (nvme->dataplane_started) {
|
|
return nvme->dbbuf_dbs[offset];
|
|
|
|
} else if (nvme->bar) {
|
|
return nvme->bar_db[offset];
|
|
}
|
|
|
|
assert(0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_nvme_map_prps(struct spdk_vhost_nvme_dev *nvme, struct spdk_nvme_cmd *cmd,
|
|
struct spdk_vhost_nvme_task *task, uint32_t len)
|
|
{
|
|
struct spdk_vhost_session *vsession = nvme->vsession;
|
|
uint64_t prp1, prp2;
|
|
void *vva;
|
|
uint32_t i;
|
|
uint32_t residue_len, nents, mps = 4096;
|
|
uint64_t *prp_list;
|
|
|
|
prp1 = cmd->dptr.prp.prp1;
|
|
prp2 = cmd->dptr.prp.prp2;
|
|
|
|
/* PRP1 may started with unaligned page address */
|
|
residue_len = mps - (prp1 % mps);
|
|
residue_len = spdk_min(len, residue_len);
|
|
|
|
vva = vhost_gpa_to_vva(vsession, prp1, residue_len);
|
|
if (spdk_unlikely(vva == NULL)) {
|
|
SPDK_ERRLOG("GPA to VVA failed\n");
|
|
return -1;
|
|
}
|
|
task->iovs[0].iov_base = vva;
|
|
task->iovs[0].iov_len = residue_len;
|
|
len -= residue_len;
|
|
|
|
if (len) {
|
|
if (spdk_unlikely(prp2 == 0)) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Invalid PRP2=0 in command\n");
|
|
return -1;
|
|
}
|
|
|
|
if (len <= mps) {
|
|
/* 2 PRP used */
|
|
task->iovcnt = 2;
|
|
vva = vhost_gpa_to_vva(vsession, prp2, len);
|
|
if (spdk_unlikely(vva == NULL)) {
|
|
return -1;
|
|
}
|
|
task->iovs[1].iov_base = vva;
|
|
task->iovs[1].iov_len = len;
|
|
} else {
|
|
/* PRP list used */
|
|
nents = (len + mps - 1) / mps;
|
|
vva = vhost_gpa_to_vva(vsession, prp2, nents * sizeof(*prp_list));
|
|
if (spdk_unlikely(vva == NULL)) {
|
|
return -1;
|
|
}
|
|
prp_list = vva;
|
|
i = 0;
|
|
while (len != 0) {
|
|
residue_len = spdk_min(len, mps);
|
|
vva = vhost_gpa_to_vva(vsession, prp_list[i], residue_len);
|
|
if (spdk_unlikely(vva == NULL)) {
|
|
return -1;
|
|
}
|
|
task->iovs[i + 1].iov_base = vva;
|
|
task->iovs[i + 1].iov_len = residue_len;
|
|
len -= residue_len;
|
|
i++;
|
|
}
|
|
task->iovcnt = i + 1;
|
|
}
|
|
} else {
|
|
/* 1 PRP used */
|
|
task->iovcnt = 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
spdk_nvme_cq_signal_fd(struct spdk_vhost_nvme_dev *nvme)
|
|
{
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
uint32_t qid, cq_head;
|
|
|
|
assert(nvme != NULL);
|
|
|
|
for (qid = 1; qid <= MAX_IO_QUEUES; qid++) {
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq || !cq->valid) {
|
|
continue;
|
|
}
|
|
|
|
cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(qid, 1));
|
|
if (cq->irq_enabled && cq->need_signaled_cnt && (cq->cq_head != cq_head)) {
|
|
eventfd_write(cq->virq, (eventfd_t)1);
|
|
cq->need_signaled_cnt = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
spdk_vhost_nvme_task_complete(struct spdk_vhost_nvme_task *task)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = task->nvme;
|
|
struct spdk_nvme_cpl cqe = {0};
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
struct spdk_nvme_cmd *cmd = &task->cmd;
|
|
uint16_t cqid = task->cqid;
|
|
uint16_t sqid = task->sqid;
|
|
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, cqid);
|
|
sq = spdk_vhost_nvme_get_sq_from_qid(nvme, sqid);
|
|
if (spdk_unlikely(!cq || !sq)) {
|
|
return;
|
|
}
|
|
|
|
cq->guest_signaled_cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(cqid, 1));
|
|
if (spdk_unlikely(nvme_cq_is_full(cq))) {
|
|
STAILQ_INSERT_TAIL(&cq->cq_full_waited_tasks, task, stailq);
|
|
return;
|
|
}
|
|
|
|
cqe.sqid = sqid;
|
|
cqe.sqhd = sq->sq_head;
|
|
cqe.cid = cmd->cid;
|
|
cqe.status.dnr = task->dnr;
|
|
cqe.status.sct = task->sct;
|
|
cqe.status.sc = task->sc;
|
|
cqe.status.p = !cq->phase;
|
|
cq->cq_cqe[cq->cq_head] = cqe;
|
|
spdk_smp_wmb();
|
|
cq->cq_cqe[cq->cq_head].status.p = cq->phase;
|
|
|
|
nvme_inc_cq_head(cq);
|
|
cq->need_signaled_cnt++;
|
|
|
|
/* MMIO Controll */
|
|
if (nvme->dataplane_started) {
|
|
nvme->dbbuf_eis[cq_offset(cqid, 1)] = (uint32_t)(cq->guest_signaled_cq_head - 1);
|
|
}
|
|
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq);
|
|
}
|
|
|
|
static void
|
|
blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
{
|
|
struct spdk_vhost_nvme_task *task = cb_arg;
|
|
struct spdk_nvme_cmd *cmd = &task->cmd;
|
|
int sc, sct;
|
|
|
|
assert(bdev_io != NULL);
|
|
|
|
spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
task->dnr = !success;
|
|
task->sct = sct;
|
|
task->sc = sc;
|
|
|
|
if (spdk_unlikely(!success)) {
|
|
SPDK_ERRLOG("I/O error, sector %u\n", cmd->cdw10);
|
|
}
|
|
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
static void
|
|
blk_unmap_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
{
|
|
struct spdk_vhost_nvme_task *child = cb_arg;
|
|
struct spdk_vhost_nvme_task *task = child->parent;
|
|
struct spdk_vhost_nvme_dev *nvme = task->nvme;
|
|
int sct, sc;
|
|
|
|
assert(bdev_io != NULL);
|
|
|
|
task->num_children--;
|
|
if (!success) {
|
|
task->dnr = 1;
|
|
spdk_bdev_io_get_nvme_status(bdev_io, &sct, &sc);
|
|
task->sct = sct;
|
|
task->sc = sc;
|
|
}
|
|
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
if (!task->num_children) {
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq);
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_ns *
|
|
spdk_vhost_nvme_get_ns_from_nsid(struct spdk_vhost_nvme_dev *dev, uint32_t nsid)
|
|
{
|
|
if (spdk_unlikely(!nsid || nsid > dev->num_ns)) {
|
|
return NULL;
|
|
}
|
|
|
|
return &dev->ns[nsid - 1];
|
|
}
|
|
|
|
static void
|
|
vhost_nvme_resubmit_task(void *arg)
|
|
{
|
|
struct spdk_vhost_nvme_task *task = (struct spdk_vhost_nvme_task *)arg;
|
|
int rc;
|
|
|
|
rc = spdk_nvme_process_sq(task->nvme, task->sq, task);
|
|
if (rc) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "vhost_nvme: task resubmit failed, rc = %d.\n", rc);
|
|
}
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_queue_task(struct spdk_vhost_nvme_task *task)
|
|
{
|
|
int rc;
|
|
|
|
task->bdev_io_wait.bdev = task->ns->bdev;
|
|
task->bdev_io_wait.cb_fn = vhost_nvme_resubmit_task;
|
|
task->bdev_io_wait.cb_arg = task;
|
|
|
|
rc = spdk_bdev_queue_io_wait(task->ns->bdev, task->ns->bdev_io_channel, &task->bdev_io_wait);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("Queue io failed in vhost_nvme_queue_task, rc=%d.\n", rc);
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
spdk_nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq,
|
|
struct spdk_vhost_nvme_task *task)
|
|
{
|
|
struct spdk_vhost_nvme_task *child;
|
|
struct spdk_nvme_cmd *cmd = &task->cmd;
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
int ret = -1;
|
|
uint32_t len, nlba, block_size;
|
|
uint64_t slba;
|
|
struct spdk_nvme_dsm_range *range;
|
|
uint16_t i, num_ranges = 0;
|
|
|
|
task->nvme = nvme;
|
|
task->dnr = 0;
|
|
task->sct = 0;
|
|
task->sc = 0;
|
|
|
|
ns = spdk_vhost_nvme_get_ns_from_nsid(nvme, cmd->nsid);
|
|
if (spdk_unlikely(!ns)) {
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
|
|
block_size = ns->block_size;
|
|
task->num_children = 0;
|
|
task->cqid = sq->cqid;
|
|
task->sqid = sq->sqid;
|
|
|
|
task->ns = ns;
|
|
|
|
if (spdk_unlikely(!ns->active_ns)) {
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
|
|
/* valid only for Read/Write commands */
|
|
nlba = (cmd->cdw12 & 0xffff) + 1;
|
|
slba = cmd->cdw11;
|
|
slba = (slba << 32) | cmd->cdw10;
|
|
|
|
if (cmd->opc == SPDK_NVME_OPC_READ || cmd->opc == SPDK_NVME_OPC_WRITE ||
|
|
cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
|
|
if (cmd->psdt != SPDK_NVME_PSDT_PRP) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Invalid PSDT %u%ub in command\n",
|
|
cmd->psdt >> 1, cmd->psdt & 1u);
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_FIELD;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
|
|
if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
|
|
num_ranges = (cmd->cdw10 & 0xff) + 1;
|
|
len = num_ranges * sizeof(struct spdk_nvme_dsm_range);
|
|
} else {
|
|
len = nlba * block_size;
|
|
}
|
|
|
|
ret = spdk_nvme_map_prps(nvme, cmd, task, len);
|
|
if (spdk_unlikely(ret != 0)) {
|
|
SPDK_ERRLOG("nvme command map prps failed\n");
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_FIELD;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
switch (cmd->opc) {
|
|
case SPDK_NVME_OPC_READ:
|
|
ret = spdk_bdev_readv(ns->bdev_desc, ns->bdev_io_channel,
|
|
task->iovs, task->iovcnt, slba * block_size,
|
|
nlba * block_size, blk_request_complete_cb, task);
|
|
break;
|
|
case SPDK_NVME_OPC_WRITE:
|
|
ret = spdk_bdev_writev(ns->bdev_desc, ns->bdev_io_channel,
|
|
task->iovs, task->iovcnt, slba * block_size,
|
|
nlba * block_size, blk_request_complete_cb, task);
|
|
break;
|
|
case SPDK_NVME_OPC_FLUSH:
|
|
ret = spdk_bdev_flush(ns->bdev_desc, ns->bdev_io_channel,
|
|
0, ns->capacity,
|
|
blk_request_complete_cb, task);
|
|
break;
|
|
case SPDK_NVME_OPC_DATASET_MANAGEMENT:
|
|
range = (struct spdk_nvme_dsm_range *)task->iovs[0].iov_base;
|
|
for (i = 0; i < num_ranges; i++) {
|
|
if (!STAILQ_EMPTY(&nvme->free_tasks)) {
|
|
child = STAILQ_FIRST(&nvme->free_tasks);
|
|
STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
|
|
} else {
|
|
SPDK_ERRLOG("No free task now\n");
|
|
ret = -1;
|
|
break;
|
|
}
|
|
task->num_children++;
|
|
child->parent = task;
|
|
ret = spdk_bdev_unmap(ns->bdev_desc, ns->bdev_io_channel,
|
|
range[i].starting_lba * block_size,
|
|
range[i].length * block_size,
|
|
blk_unmap_complete_cb, child);
|
|
if (ret) {
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
ret = -1;
|
|
break;
|
|
}
|
|
|
|
if (spdk_unlikely(ret)) {
|
|
if (ret == -ENOMEM) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "No memory, start to queue io.\n");
|
|
task->sq = sq;
|
|
ret = vhost_nvme_queue_task(task);
|
|
} else {
|
|
/* post error status to cqe */
|
|
SPDK_ERRLOG("Error Submission For Command %u, ret %d\n", cmd->opc, ret);
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
nvme_worker(void *arg)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = (struct spdk_vhost_nvme_dev *)arg;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
struct spdk_vhost_nvme_task *task;
|
|
uint32_t qid, dbbuf_sq;
|
|
int ret;
|
|
int count = -1;
|
|
|
|
if (spdk_unlikely(!nvme->num_sqs)) {
|
|
return -1;
|
|
}
|
|
|
|
if (spdk_unlikely(!nvme->dataplane_started && !nvme->bar)) {
|
|
return -1;
|
|
}
|
|
|
|
for (qid = 1; qid <= MAX_IO_QUEUES; qid++) {
|
|
|
|
sq = spdk_vhost_nvme_get_sq_from_qid(nvme, qid);
|
|
if (!sq->valid) {
|
|
continue;
|
|
}
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, sq->cqid);
|
|
if (spdk_unlikely(!cq)) {
|
|
return -1;
|
|
}
|
|
cq->guest_signaled_cq_head = spdk_vhost_nvme_get_queue_head(nvme, cq_offset(sq->cqid, 1));
|
|
if (spdk_unlikely(!STAILQ_EMPTY(&cq->cq_full_waited_tasks) &&
|
|
!nvme_cq_is_full(cq))) {
|
|
task = STAILQ_FIRST(&cq->cq_full_waited_tasks);
|
|
STAILQ_REMOVE_HEAD(&cq->cq_full_waited_tasks, stailq);
|
|
spdk_vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
dbbuf_sq = spdk_vhost_nvme_get_queue_head(nvme, sq_offset(qid, 1));
|
|
sq->sq_tail = (uint16_t)dbbuf_sq;
|
|
count = 0;
|
|
|
|
while (sq->sq_head != sq->sq_tail) {
|
|
if (spdk_unlikely(!sq->sq_cmd)) {
|
|
break;
|
|
}
|
|
if (spdk_likely(!STAILQ_EMPTY(&nvme->free_tasks))) {
|
|
task = STAILQ_FIRST(&nvme->free_tasks);
|
|
STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
|
|
} else {
|
|
return -1;
|
|
}
|
|
|
|
task->cmd = sq->sq_cmd[sq->sq_head];
|
|
nvme_inc_sq_head(sq);
|
|
|
|
/* processing IO */
|
|
ret = spdk_nvme_process_sq(nvme, sq, task);
|
|
if (spdk_unlikely(ret)) {
|
|
SPDK_ERRLOG("QID %u CID %u, SQ HEAD %u, DBBUF SQ TAIL %u\n", qid, task->cmd.cid, sq->sq_head,
|
|
sq->sq_tail);
|
|
}
|
|
|
|
/* MMIO Control */
|
|
if (nvme->dataplane_started) {
|
|
nvme->dbbuf_eis[sq_offset(qid, 1)] = (uint32_t)(sq->sq_head - 1);
|
|
}
|
|
|
|
/* Maximum batch I/Os to pick up at once */
|
|
if (count++ == MAX_BATCH_IO) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Completion Queue */
|
|
spdk_nvme_cq_signal_fd(nvme);
|
|
|
|
return count;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_doorbell_buffer_config(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
struct spdk_vhost_session *vsession = nvme->vsession;
|
|
uint64_t dbs_dma_addr, eis_dma_addr;
|
|
|
|
dbs_dma_addr = cmd->dptr.prp.prp1;
|
|
eis_dma_addr = cmd->dptr.prp.prp2;
|
|
|
|
if ((dbs_dma_addr % 4096) || (eis_dma_addr % 4096)) {
|
|
return -1;
|
|
}
|
|
/* Guest Physical Address to Host Virtual Address */
|
|
nvme->dbbuf_dbs = vhost_gpa_to_vva(vsession, dbs_dma_addr, 4096);
|
|
nvme->dbbuf_eis = vhost_gpa_to_vva(vsession, eis_dma_addr, 4096);
|
|
if (!nvme->dbbuf_dbs || !nvme->dbbuf_eis) {
|
|
return -1;
|
|
}
|
|
/* zeroed the doorbell buffer memory */
|
|
memset((void *)nvme->dbbuf_dbs, 0, 4096);
|
|
memset((void *)nvme->dbbuf_eis, 0, 4096);
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
|
|
/* Data plane started */
|
|
nvme->dataplane_started = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_create_io_sq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qid, qsize, cqid;
|
|
uint64_t dma_addr;
|
|
uint64_t requested_len;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
|
|
/* physical contiguous */
|
|
if (!(cmd->cdw11 & 0x1)) {
|
|
return -1;
|
|
}
|
|
|
|
cqid = (cmd->cdw11 >> 16) & 0xffff;
|
|
qid = cmd->cdw10 & 0xffff;
|
|
qsize = (cmd->cdw10 >> 16) & 0xffff;
|
|
dma_addr = cmd->dptr.prp.prp1;
|
|
if (!dma_addr || dma_addr % 4096) {
|
|
return -1;
|
|
}
|
|
|
|
sq = spdk_vhost_nvme_get_sq_from_qid(nvme, qid);
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, cqid);
|
|
if (!sq || !cq) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "User requested invalid QID %u or CQID %u\n",
|
|
qid, cqid);
|
|
cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
|
|
return -1;
|
|
}
|
|
|
|
sq->sqid = qid;
|
|
sq->cqid = cqid;
|
|
sq->size = qsize + 1;
|
|
sq->sq_head = sq->sq_tail = 0;
|
|
requested_len = sizeof(struct spdk_nvme_cmd) * sq->size;
|
|
sq->sq_cmd = vhost_gpa_to_vva(nvme->vsession, dma_addr, requested_len);
|
|
if (!sq->sq_cmd) {
|
|
return -1;
|
|
}
|
|
nvme->num_sqs++;
|
|
sq->valid = true;
|
|
if (nvme->bar) {
|
|
nvme->bar_db[sq_offset(qid, 1)] = 0;
|
|
}
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_delete_io_sq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qid;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
|
|
qid = cmd->cdw10 & 0xffff;
|
|
sq = spdk_vhost_nvme_get_sq_from_qid(nvme, qid);
|
|
if (!sq) {
|
|
return -1;
|
|
}
|
|
|
|
/* We didn't see scenarios when deleting submission
|
|
* queue while I/O is running against the submisson
|
|
* queue for now, otherwise, we must ensure the poller
|
|
* will not run with this submission queue.
|
|
*/
|
|
nvme->num_sqs--;
|
|
sq->valid = false;
|
|
|
|
memset(sq, 0, sizeof(*sq));
|
|
sq->sq_cmd = NULL;
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_create_io_cq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qsize, qid;
|
|
uint64_t dma_addr;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
uint64_t requested_len;
|
|
|
|
/* physical contiguous */
|
|
if (!(cmd->cdw11 & 0x1)) {
|
|
return -1;
|
|
}
|
|
|
|
qid = cmd->cdw10 & 0xffff;
|
|
qsize = (cmd->cdw10 >> 16) & 0xffff;
|
|
dma_addr = cmd->dptr.prp.prp1;
|
|
if (!dma_addr || dma_addr % 4096) {
|
|
return -1;
|
|
}
|
|
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "User requested invalid QID %u\n", qid);
|
|
cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
|
|
return -1;
|
|
}
|
|
cq->cqid = qid;
|
|
cq->size = qsize + 1;
|
|
cq->phase = 1;
|
|
cq->irq_enabled = (cmd->cdw11 >> 1) & 0x1;
|
|
/* Setup virq through vhost messages */
|
|
cq->virq = -1;
|
|
cq->cq_head = 0;
|
|
cq->guest_signaled_cq_head = 0;
|
|
cq->need_signaled_cnt = 0;
|
|
requested_len = sizeof(struct spdk_nvme_cpl) * cq->size;
|
|
cq->cq_cqe = vhost_gpa_to_vva(nvme->vsession, dma_addr, requested_len);
|
|
if (!cq->cq_cqe) {
|
|
return -1;
|
|
}
|
|
nvme->num_cqs++;
|
|
cq->valid = true;
|
|
if (nvme->bar) {
|
|
nvme->bar_db[cq_offset(qid, 1)] = 0;
|
|
}
|
|
STAILQ_INIT(&cq->cq_full_waited_tasks);
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_delete_io_cq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qid;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
|
|
qid = cmd->cdw10 & 0xffff;
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq) {
|
|
return -1;
|
|
}
|
|
nvme->num_cqs--;
|
|
cq->valid = false;
|
|
|
|
memset(cq, 0, sizeof(*cq));
|
|
cq->cq_cqe = NULL;
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
return 0;
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_dev *
|
|
spdk_vhost_nvme_get_by_name(int vid)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
struct spdk_vhost_dev *vdev;
|
|
struct spdk_vhost_session *vsession;
|
|
|
|
TAILQ_FOREACH(nvme, &g_nvme_ctrlrs, tailq) {
|
|
vdev = &nvme->vdev;
|
|
TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) {
|
|
if (vsession->vid == vid) {
|
|
return nvme;
|
|
}
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_get_cap(int vid, uint64_t *cap)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
|
|
nvme = spdk_vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
*cap = nvme->cap.raw;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf)
|
|
{
|
|
struct spdk_nvme_cmd *req = (struct spdk_nvme_cmd *)cmd;
|
|
struct spdk_nvme_cpl *cpl = (struct spdk_nvme_cpl *)cqe;
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
int ret = 0;
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
|
|
nvme = spdk_vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Admin Command Opcode %u\n", req->opc);
|
|
switch (req->opc) {
|
|
case SPDK_NVME_OPC_IDENTIFY:
|
|
if (req->cdw10 == SPDK_NVME_IDENTIFY_CTRLR) {
|
|
memcpy(buf, &nvme->cdata, sizeof(struct spdk_nvme_ctrlr_data));
|
|
|
|
} else if (req->cdw10 == SPDK_NVME_IDENTIFY_NS) {
|
|
ns = spdk_vhost_nvme_get_ns_from_nsid(nvme, req->nsid);
|
|
if (!ns) {
|
|
cpl->status.sc = SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE;
|
|
cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
break;
|
|
}
|
|
memcpy(buf, &ns->nsdata, sizeof(struct spdk_nvme_ns_data));
|
|
}
|
|
/* successfully */
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
break;
|
|
case SPDK_NVME_OPC_CREATE_IO_CQ:
|
|
ret = vhost_nvme_create_io_cq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_DELETE_IO_CQ:
|
|
ret = vhost_nvme_delete_io_cq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_CREATE_IO_SQ:
|
|
ret = vhost_nvme_create_io_sq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_DELETE_IO_SQ:
|
|
ret = vhost_nvme_delete_io_sq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_GET_FEATURES:
|
|
case SPDK_NVME_OPC_SET_FEATURES:
|
|
if (req->cdw10 == SPDK_NVME_FEAT_NUMBER_OF_QUEUES) {
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
cpl->cdw0 = (nvme->num_io_queues - 1) | ((nvme->num_io_queues - 1) << 16);
|
|
} else {
|
|
cpl->status.sc = SPDK_NVME_SC_INVALID_FIELD;
|
|
cpl->status.sct = SPDK_NVME_SCT_GENERIC;
|
|
}
|
|
break;
|
|
case SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG:
|
|
ret = vhost_nvme_doorbell_buffer_config(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_ABORT:
|
|
/* TODO: ABORT failed fow now */
|
|
cpl->cdw0 = 1;
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
break;
|
|
}
|
|
|
|
if (ret) {
|
|
SPDK_ERRLOG("Admin Passthrough Failed with %u\n", req->opc);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
|
|
nvme = spdk_vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
nvme->bar = (volatile uint32_t *)(uintptr_t)(bar_addr);
|
|
/* BAR0 SQ/CQ doorbell registers start from offset 0x1000 */
|
|
nvme->bar_db = (volatile uint32_t *)(uintptr_t)(bar_addr + 0x1000ull);
|
|
nvme->bar_size = bar_size;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
|
|
nvme = spdk_vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
cq = spdk_vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq) {
|
|
return -1;
|
|
}
|
|
if (cq->irq_enabled) {
|
|
cq->virq = fd;
|
|
} else {
|
|
SPDK_ERRLOG("NVMe Qid %d Disabled IRQ\n", qid);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
free_task_pool(struct spdk_vhost_nvme_dev *nvme)
|
|
{
|
|
struct spdk_vhost_nvme_task *task;
|
|
|
|
while (!STAILQ_EMPTY(&nvme->free_tasks)) {
|
|
task = STAILQ_FIRST(&nvme->free_tasks);
|
|
STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
|
|
spdk_free(task);
|
|
}
|
|
}
|
|
|
|
static int
|
|
alloc_task_pool(struct spdk_vhost_nvme_dev *nvme)
|
|
{
|
|
uint32_t entries, i;
|
|
struct spdk_vhost_nvme_task *task;
|
|
|
|
entries = nvme->num_io_queues * MAX_QUEUE_ENTRIES_SUPPORTED;
|
|
|
|
for (i = 0; i < entries; i++) {
|
|
task = spdk_zmalloc(sizeof(struct spdk_vhost_nvme_task),
|
|
SPDK_CACHE_LINE_SIZE, NULL,
|
|
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
|
|
if (task == NULL) {
|
|
SPDK_ERRLOG("Controller %s alloc task pool failed\n",
|
|
nvme->vdev.name);
|
|
free_task_pool(nvme);
|
|
return -1;
|
|
}
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_start_cb(struct spdk_vhost_dev *vdev,
|
|
struct spdk_vhost_session *vsession, void *unused)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
if (alloc_task_pool(nvme)) {
|
|
return -1;
|
|
}
|
|
|
|
SPDK_NOTICELOG("Start Device %u, Path %s, lcore %d\n", vsession->vid,
|
|
vdev->path, spdk_env_get_current_core());
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
ns_dev->bdev_io_channel = spdk_bdev_get_io_channel(ns_dev->bdev_desc);
|
|
if (!ns_dev->bdev_io_channel) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
nvme->vsession = vsession;
|
|
/* Start the NVMe Poller */
|
|
nvme->requestq_poller = spdk_poller_register(nvme_worker, nvme, 0);
|
|
|
|
vhost_session_start_done(vsession, 0);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_start(struct spdk_vhost_session *vsession)
|
|
{
|
|
struct vhost_poll_group *pg;
|
|
|
|
if (vsession->vdev->active_session_num > 0) {
|
|
/* We're trying to start a second session */
|
|
SPDK_ERRLOG("Vhost-NVMe devices can support only one simultaneous connection.\n");
|
|
return -1;
|
|
}
|
|
|
|
pg = vhost_get_poll_group(vsession->vdev->cpumask);
|
|
return vhost_session_send_event(pg, vsession, spdk_vhost_nvme_start_cb,
|
|
3, "start session");
|
|
}
|
|
|
|
static void
|
|
spdk_vhost_nvme_deactive_ns(struct spdk_vhost_nvme_ns *ns)
|
|
{
|
|
ns->active_ns = 0;
|
|
spdk_bdev_close(ns->bdev_desc);
|
|
ns->bdev_desc = NULL;
|
|
ns->bdev = NULL;
|
|
}
|
|
|
|
static void
|
|
bdev_remove_cb(void *remove_ctx)
|
|
{
|
|
struct spdk_vhost_nvme_ns *ns = remove_ctx;
|
|
|
|
SPDK_NOTICELOG("Removing NS %u, Block Device %s\n",
|
|
ns->nsid, spdk_bdev_get_name(ns->bdev));
|
|
|
|
spdk_vhost_nvme_deactive_ns(ns);
|
|
}
|
|
|
|
static int
|
|
destroy_device_poller_cb(void *arg)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = arg;
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Destroy device poller callback\n");
|
|
|
|
/* FIXME wait for pending I/Os to complete */
|
|
|
|
if (spdk_vhost_trylock() != 0) {
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
if (ns_dev->bdev_io_channel) {
|
|
spdk_put_io_channel(ns_dev->bdev_io_channel);
|
|
ns_dev->bdev_io_channel = NULL;
|
|
}
|
|
}
|
|
/* Clear BAR space */
|
|
if (nvme->bar) {
|
|
memset((void *)nvme->bar, 0, nvme->bar_size);
|
|
}
|
|
nvme->num_sqs = 0;
|
|
nvme->num_cqs = 0;
|
|
nvme->dbbuf_dbs = NULL;
|
|
nvme->dbbuf_eis = NULL;
|
|
nvme->dataplane_started = false;
|
|
|
|
spdk_poller_unregister(&nvme->stop_poller);
|
|
vhost_session_stop_done(nvme->vsession, 0);
|
|
|
|
spdk_vhost_unlock();
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_stop_cb(struct spdk_vhost_dev *vdev,
|
|
struct spdk_vhost_session *vsession, void *unused)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
|
|
if (nvme == NULL) {
|
|
vhost_session_stop_done(vsession, -1);
|
|
return -1;
|
|
}
|
|
|
|
free_task_pool(nvme);
|
|
SPDK_NOTICELOG("Stopping Device %u, Path %s\n", vsession->vid, vdev->path);
|
|
|
|
spdk_poller_unregister(&nvme->requestq_poller);
|
|
nvme->stop_poller = spdk_poller_register(destroy_device_poller_cb, nvme, 1000);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_stop(struct spdk_vhost_session *vsession)
|
|
{
|
|
return vhost_session_send_event(vsession->poll_group, vsession,
|
|
spdk_vhost_nvme_stop_cb, 3, "start session");
|
|
}
|
|
|
|
static void
|
|
spdk_vhost_nvme_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return;
|
|
}
|
|
|
|
spdk_json_write_named_array_begin(w, "namespaces");
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
if (!ns_dev->active_ns) {
|
|
continue;
|
|
}
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_uint32(w, "nsid", ns_dev->nsid);
|
|
spdk_json_write_named_string(w, "bdev", spdk_bdev_get_name(ns_dev->bdev));
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
|
|
spdk_json_write_array_end(w);
|
|
}
|
|
|
|
static void
|
|
spdk_vhost_nvme_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return;
|
|
}
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_string(w, "method", "construct_vhost_nvme_controller");
|
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
|
spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name);
|
|
spdk_json_write_named_uint32(w, "io_queues", nvme->num_io_queues);
|
|
spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(nvme->vdev.cpumask));
|
|
spdk_json_write_object_end(w);
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
if (!ns_dev->active_ns) {
|
|
continue;
|
|
}
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_string(w, "method", "add_vhost_nvme_ns");
|
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
|
spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name);
|
|
spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(ns_dev->bdev));
|
|
spdk_json_write_object_end(w);
|
|
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
}
|
|
|
|
static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend = {
|
|
.session_ctx_size = 0,
|
|
.start_session = spdk_vhost_nvme_start,
|
|
.stop_session = spdk_vhost_nvme_stop,
|
|
.dump_info_json = spdk_vhost_nvme_dump_info_json,
|
|
.write_config_json = spdk_vhost_nvme_write_config_json,
|
|
.remove_device = vhost_nvme_dev_remove,
|
|
};
|
|
|
|
static int
|
|
spdk_vhost_nvme_ns_identify_update(struct spdk_vhost_nvme_dev *dev)
|
|
{
|
|
struct spdk_nvme_ctrlr_data *cdata = &dev->cdata;
|
|
struct spdk_nvme_ns_data *nsdata;
|
|
uint64_t num_blocks;
|
|
uint32_t i;
|
|
|
|
/* Identify Namespace */
|
|
cdata->nn = dev->num_ns;
|
|
for (i = 0; i < dev->num_ns; i++) {
|
|
nsdata = &dev->ns[i].nsdata;
|
|
if (dev->ns[i].active_ns) {
|
|
num_blocks = spdk_bdev_get_num_blocks(dev->ns[i].bdev);
|
|
nsdata->nsze = num_blocks;
|
|
/* ncap must be non-zero for active Namespace */
|
|
nsdata->ncap = num_blocks;
|
|
nsdata->nuse = num_blocks;
|
|
nsdata->nlbaf = 0;
|
|
nsdata->flbas.format = 0;
|
|
nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(dev->ns[i].bdev));
|
|
nsdata->noiob = spdk_bdev_get_optimal_io_boundary(dev->ns[i].bdev);
|
|
dev->ns[i].block_size = spdk_bdev_get_block_size(dev->ns[i].bdev);
|
|
dev->ns[i].capacity = num_blocks * dev->ns[i].block_size;
|
|
} else {
|
|
memset(nsdata, 0, sizeof(*nsdata));
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
spdk_vhost_nvme_ctrlr_identify_update(struct spdk_vhost_nvme_dev *dev)
|
|
{
|
|
struct spdk_nvme_ctrlr_data *cdata = &dev->cdata;
|
|
char sn[20];
|
|
|
|
/* Controller Capabilities */
|
|
dev->cap.bits.cqr = 1;
|
|
dev->cap.bits.to = 1;
|
|
dev->cap.bits.dstrd = 0;
|
|
dev->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
|
|
dev->cap.bits.mpsmin = 0;
|
|
dev->cap.bits.mpsmax = 0;
|
|
/* MQES is 0 based value */
|
|
dev->cap.bits.mqes = MAX_QUEUE_ENTRIES_SUPPORTED - 1;
|
|
|
|
/* Controller Configuration */
|
|
dev->cc.bits.en = 0;
|
|
|
|
/* Controller Status */
|
|
dev->csts.bits.rdy = 0;
|
|
|
|
/* Identify Controller */
|
|
spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
|
|
cdata->vid = 0x8086;
|
|
cdata->ssvid = 0x8086;
|
|
spdk_strcpy_pad(cdata->mn, "SPDK Virtual NVMe Controller", sizeof(cdata->mn), ' ');
|
|
snprintf(sn, sizeof(sn), "NVMe_%s", dev->vdev.name);
|
|
spdk_strcpy_pad(cdata->sn, sn, sizeof(cdata->sn), ' ');
|
|
cdata->ieee[0] = 0xe4;
|
|
cdata->ieee[1] = 0xd2;
|
|
cdata->ieee[2] = 0x5c;
|
|
cdata->ver.bits.mjr = 1;
|
|
cdata->ver.bits.mnr = 0;
|
|
cdata->mdts = 5; /* 128 KiB */
|
|
cdata->rab = 6;
|
|
cdata->sqes.min = 6;
|
|
cdata->sqes.max = 6;
|
|
cdata->cqes.min = 4;
|
|
cdata->cqes.max = 4;
|
|
cdata->oncs.dsm = 1;
|
|
/* Emulated NVMe controller */
|
|
cdata->oacs.doorbell_buffer_config = 1;
|
|
|
|
spdk_vhost_nvme_ns_identify_update(dev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t num_io_queues)
|
|
{
|
|
struct spdk_vhost_nvme_dev *dev;
|
|
int rc;
|
|
|
|
if (posix_memalign((void **)&dev, SPDK_CACHE_LINE_SIZE, sizeof(*dev))) {
|
|
return -ENOMEM;
|
|
}
|
|
memset(dev, 0, sizeof(*dev));
|
|
|
|
if (num_io_queues < 1 || num_io_queues > MAX_IO_QUEUES) {
|
|
free(dev);
|
|
return -EINVAL;
|
|
}
|
|
|
|
spdk_vhost_lock();
|
|
rc = vhost_dev_register(&dev->vdev, name, cpumask,
|
|
&spdk_vhost_nvme_device_backend);
|
|
|
|
if (rc) {
|
|
free(dev);
|
|
spdk_vhost_unlock();
|
|
return rc;
|
|
}
|
|
|
|
dev->num_io_queues = num_io_queues;
|
|
STAILQ_INIT(&dev->free_tasks);
|
|
TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, dev, tailq);
|
|
|
|
spdk_vhost_nvme_ctrlr_identify_update(dev);
|
|
|
|
SPDK_NOTICELOG("Controller %s: Constructed\n", name);
|
|
spdk_vhost_unlock();
|
|
return rc;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
int rc;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
TAILQ_REMOVE(&g_nvme_ctrlrs, nvme, tailq);
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns = &nvme->ns[i];
|
|
if (ns->active_ns) {
|
|
spdk_vhost_nvme_deactive_ns(ns);
|
|
}
|
|
}
|
|
|
|
rc = vhost_dev_unregister(vdev);
|
|
if (rc != 0) {
|
|
return rc;
|
|
}
|
|
|
|
free(nvme);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev, const char *bdev_name)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
struct spdk_bdev *bdev;
|
|
int rc = -1;
|
|
|
|
if (nvme == NULL) {
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (nvme->num_ns == MAX_NAMESPACE) {
|
|
SPDK_ERRLOG("Can't support %d Namespaces\n", nvme->num_ns);
|
|
return -ENOSPC;
|
|
}
|
|
|
|
bdev = spdk_bdev_get_by_name(bdev_name);
|
|
if (!bdev) {
|
|
SPDK_ERRLOG("could not find bdev %s\n", bdev_name);
|
|
return -ENODEV;
|
|
}
|
|
|
|
ns = &nvme->ns[nvme->num_ns];
|
|
rc = spdk_bdev_open(bdev, true, bdev_remove_cb, ns, &nvme->ns[nvme->num_ns].bdev_desc);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("Could not open bdev '%s', error=%d\n",
|
|
bdev_name, rc);
|
|
return rc;
|
|
}
|
|
|
|
nvme->ns[nvme->num_ns].bdev = bdev;
|
|
nvme->ns[nvme->num_ns].active_ns = 1;
|
|
nvme->ns[nvme->num_ns].nsid = nvme->num_ns + 1;
|
|
nvme->num_ns++;
|
|
|
|
spdk_vhost_nvme_ns_identify_update(nvme);
|
|
|
|
return rc;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_controller_construct(void)
|
|
{
|
|
struct spdk_conf_section *sp;
|
|
const char *name;
|
|
const char *bdev_name;
|
|
const char *cpumask;
|
|
int rc, i = 0;
|
|
struct spdk_vhost_dev *vdev;
|
|
uint32_t ctrlr_num, io_queues;
|
|
|
|
for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) {
|
|
if (!spdk_conf_section_match_prefix(sp, "VhostNvme")) {
|
|
continue;
|
|
}
|
|
|
|
if (sscanf(spdk_conf_section_get_name(sp), "VhostNvme%u", &ctrlr_num) != 1) {
|
|
SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
|
|
spdk_conf_section_get_name(sp));
|
|
return -1;
|
|
}
|
|
|
|
name = spdk_conf_section_get_val(sp, "Name");
|
|
if (name == NULL) {
|
|
SPDK_ERRLOG("VhostNvme%u: missing Name\n", ctrlr_num);
|
|
return -1;
|
|
}
|
|
|
|
cpumask = spdk_conf_section_get_val(sp, "Cpumask");
|
|
rc = spdk_conf_section_get_intval(sp, "NumberOfQueues");
|
|
if (rc > 0) {
|
|
io_queues = rc;
|
|
} else {
|
|
io_queues = 1;
|
|
}
|
|
|
|
rc = vhost_nvme_dev_construct(name, cpumask, io_queues);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("VhostNvme%u: Construct failed\n", ctrlr_num);
|
|
return -1;
|
|
}
|
|
|
|
vdev = spdk_vhost_dev_find(name);
|
|
if (!vdev) {
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; spdk_conf_section_get_nval(sp, "Namespace", i) != NULL; i++) {
|
|
bdev_name = spdk_conf_section_get_nmval(sp, "Namespace", i, 0);
|
|
if (!bdev_name) {
|
|
SPDK_ERRLOG("namespace configuration missing bdev name\n");
|
|
break;
|
|
}
|
|
rc = vhost_nvme_dev_add_ns(vdev, bdev_name);
|
|
if (rc < 0) {
|
|
SPDK_WARNLOG("VhostNvme%u: Construct Namespace with %s failed\n",
|
|
ctrlr_num, bdev_name);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
SPDK_LOG_REGISTER_COMPONENT("vhost_nvme", SPDK_LOG_VHOST_NVME)
|