eb05cbd677
Poller should return status > 0 when it did some work (CPU was used for some time) marking its call as busy CPU time. Active pollers should return BUSY status only if they did any meangful work besides checking some conditions (e.g. processing requests, do some complicated operations). Signed-off-by: Maciej Szwed <maciej.szwed@intel.com> Change-Id: Id4636a0997489b129cecfe785592cc97b50992ba Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/2164 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
1501 lines
36 KiB
C
1501 lines
36 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright(c) Intel Corporation. All rights reserved.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "spdk/stdinc.h"
|
|
|
|
#include "spdk/nvme.h"
|
|
#include "spdk/env.h"
|
|
#include "spdk/conf.h"
|
|
#include "spdk/util.h"
|
|
#include "spdk/string.h"
|
|
#include "spdk/thread.h"
|
|
#include "spdk/barrier.h"
|
|
#include "spdk/vhost.h"
|
|
#include "spdk/bdev.h"
|
|
#include "spdk/version.h"
|
|
#include "spdk/nvme_spec.h"
|
|
#include "spdk/likely.h"
|
|
|
|
#include "vhost_internal.h"
|
|
|
|
#define MAX_IO_QUEUES 31
|
|
#define MAX_IOVS 64
|
|
#define MAX_NAMESPACE 8
|
|
#define MAX_QUEUE_ENTRIES_SUPPORTED 256
|
|
#define MAX_BATCH_IO 8
|
|
|
|
struct spdk_vhost_nvme_sq {
|
|
uint16_t sqid;
|
|
uint16_t size;
|
|
uint16_t cqid;
|
|
bool valid;
|
|
struct spdk_nvme_cmd *sq_cmd;
|
|
uint16_t sq_head;
|
|
uint16_t sq_tail;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_cq {
|
|
uint8_t phase;
|
|
uint16_t size;
|
|
uint16_t cqid;
|
|
bool valid;
|
|
volatile struct spdk_nvme_cpl *cq_cqe;
|
|
uint16_t cq_head;
|
|
uint16_t guest_signaled_cq_head;
|
|
uint32_t need_signaled_cnt;
|
|
STAILQ_HEAD(, spdk_vhost_nvme_task) cq_full_waited_tasks;
|
|
bool irq_enabled;
|
|
int virq;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_ns {
|
|
struct spdk_bdev *bdev;
|
|
uint32_t block_size;
|
|
uint64_t capacity;
|
|
uint32_t nsid;
|
|
uint32_t active_ns;
|
|
struct spdk_bdev_desc *bdev_desc;
|
|
struct spdk_io_channel *bdev_io_channel;
|
|
struct spdk_nvme_ns_data nsdata;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_task {
|
|
struct spdk_nvme_cmd cmd;
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
uint16_t sqid;
|
|
uint16_t cqid;
|
|
|
|
/** array of iovecs to transfer. */
|
|
struct iovec iovs[MAX_IOVS];
|
|
|
|
/** Number of iovecs in iovs array. */
|
|
int iovcnt;
|
|
|
|
/** Current iovec position. */
|
|
int iovpos;
|
|
|
|
/** Offset in current iovec. */
|
|
uint32_t iov_offset;
|
|
|
|
/* for bdev_io_wait */
|
|
struct spdk_bdev_io_wait_entry bdev_io_wait;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
|
|
/* parent pointer. */
|
|
struct spdk_vhost_nvme_task *parent;
|
|
uint8_t dnr;
|
|
uint8_t sct;
|
|
uint8_t sc;
|
|
uint32_t num_children;
|
|
STAILQ_ENTRY(spdk_vhost_nvme_task) stailq;
|
|
};
|
|
|
|
struct spdk_vhost_nvme_dev {
|
|
struct spdk_vhost_dev vdev;
|
|
|
|
uint32_t num_io_queues;
|
|
union spdk_nvme_cap_register cap;
|
|
union spdk_nvme_cc_register cc;
|
|
union spdk_nvme_csts_register csts;
|
|
struct spdk_nvme_ctrlr_data cdata;
|
|
|
|
uint32_t num_sqs;
|
|
uint32_t num_cqs;
|
|
|
|
uint32_t num_ns;
|
|
struct spdk_vhost_nvme_ns ns[MAX_NAMESPACE];
|
|
|
|
volatile uint32_t *bar;
|
|
volatile uint32_t *bar_db;
|
|
uint64_t bar_size;
|
|
bool dataplane_started;
|
|
|
|
volatile uint32_t *dbbuf_dbs;
|
|
volatile uint32_t *dbbuf_eis;
|
|
struct spdk_vhost_nvme_sq sq_queue[MAX_IO_QUEUES + 1];
|
|
struct spdk_vhost_nvme_cq cq_queue[MAX_IO_QUEUES + 1];
|
|
|
|
/* The one and only session associated with this device */
|
|
struct spdk_vhost_session *vsession;
|
|
|
|
TAILQ_ENTRY(spdk_vhost_nvme_dev) tailq;
|
|
STAILQ_HEAD(, spdk_vhost_nvme_task) free_tasks;
|
|
struct spdk_poller *requestq_poller;
|
|
struct spdk_poller *stop_poller;
|
|
};
|
|
|
|
static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend;
|
|
|
|
/*
|
|
* Report the SPDK version as the firmware revision.
|
|
* SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
|
|
*/
|
|
#define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING
|
|
|
|
static int
|
|
nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq,
|
|
struct spdk_vhost_nvme_task *task);
|
|
|
|
static struct spdk_vhost_nvme_dev *
|
|
to_nvme_dev(struct spdk_vhost_dev *vdev)
|
|
{
|
|
if (vdev->backend != &spdk_vhost_nvme_device_backend) {
|
|
SPDK_ERRLOG("%s: not a vhost-nvme device\n", vdev->name);
|
|
return NULL;
|
|
}
|
|
|
|
return SPDK_CONTAINEROF(vdev, struct spdk_vhost_nvme_dev, vdev);
|
|
}
|
|
|
|
static TAILQ_HEAD(, spdk_vhost_nvme_dev) g_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_ctrlrs);
|
|
|
|
static inline unsigned int sq_offset(unsigned int qid, uint32_t db_stride)
|
|
{
|
|
return qid * 2 * db_stride;
|
|
}
|
|
|
|
static inline unsigned int cq_offset(unsigned int qid, uint32_t db_stride)
|
|
{
|
|
return (qid * 2 + 1) * db_stride;
|
|
}
|
|
|
|
static void
|
|
nvme_inc_cq_head(struct spdk_vhost_nvme_cq *cq)
|
|
{
|
|
cq->cq_head++;
|
|
if (cq->cq_head >= cq->size) {
|
|
cq->cq_head = 0;
|
|
cq->phase = !cq->phase;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
nvme_cq_is_full(struct spdk_vhost_nvme_cq *cq)
|
|
{
|
|
return ((cq->cq_head + 1) % cq->size == cq->guest_signaled_cq_head);
|
|
}
|
|
|
|
static void
|
|
nvme_inc_sq_head(struct spdk_vhost_nvme_sq *sq)
|
|
{
|
|
sq->sq_head = (sq->sq_head + 1) % sq->size;
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_sq *
|
|
vhost_nvme_get_sq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid)
|
|
{
|
|
if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) {
|
|
return NULL;
|
|
}
|
|
|
|
return &dev->sq_queue[qid];
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_cq *
|
|
vhost_nvme_get_cq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid)
|
|
{
|
|
if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) {
|
|
return NULL;
|
|
}
|
|
|
|
return &dev->cq_queue[qid];
|
|
}
|
|
|
|
static inline uint32_t
|
|
vhost_nvme_get_queue_head(struct spdk_vhost_nvme_dev *nvme, uint32_t offset)
|
|
{
|
|
if (nvme->dataplane_started) {
|
|
return nvme->dbbuf_dbs[offset];
|
|
|
|
} else if (nvme->bar) {
|
|
return nvme->bar_db[offset];
|
|
}
|
|
|
|
assert(0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void *
|
|
vhost_nvme_gpa_to_vva(void *priv, uint64_t addr, uint64_t len)
|
|
{
|
|
struct spdk_vhost_session *vsession = priv;
|
|
|
|
return vhost_gpa_to_vva(vsession, addr, len);
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_map_prps(struct spdk_vhost_nvme_dev *nvme, struct spdk_nvme_cmd *cmd,
|
|
struct spdk_vhost_nvme_task *task, uint32_t len)
|
|
{
|
|
int err;
|
|
|
|
err = spdk_nvme_map_prps(nvme->vsession, cmd, task->iovs, len, 4096,
|
|
vhost_nvme_gpa_to_vva);
|
|
if (spdk_unlikely(err < 0)) {
|
|
return err;
|
|
}
|
|
task->iovcnt = err;
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
nvme_cq_signal_fd(struct spdk_vhost_nvme_dev *nvme)
|
|
{
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
uint32_t qid, cq_head;
|
|
|
|
assert(nvme != NULL);
|
|
|
|
for (qid = 1; qid <= MAX_IO_QUEUES; qid++) {
|
|
cq = vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq || !cq->valid) {
|
|
continue;
|
|
}
|
|
|
|
cq_head = vhost_nvme_get_queue_head(nvme, cq_offset(qid, 1));
|
|
if (cq->irq_enabled && cq->need_signaled_cnt && (cq->cq_head != cq_head)) {
|
|
eventfd_write(cq->virq, (eventfd_t)1);
|
|
cq->need_signaled_cnt = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
vhost_nvme_task_complete(struct spdk_vhost_nvme_task *task)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = task->nvme;
|
|
struct spdk_nvme_cpl cqe = {0};
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
struct spdk_nvme_cmd *cmd = &task->cmd;
|
|
uint16_t cqid = task->cqid;
|
|
uint16_t sqid = task->sqid;
|
|
|
|
cq = vhost_nvme_get_cq_from_qid(nvme, cqid);
|
|
sq = vhost_nvme_get_sq_from_qid(nvme, sqid);
|
|
if (spdk_unlikely(!cq || !sq)) {
|
|
return;
|
|
}
|
|
|
|
cq->guest_signaled_cq_head = vhost_nvme_get_queue_head(nvme, cq_offset(cqid, 1));
|
|
if (spdk_unlikely(nvme_cq_is_full(cq))) {
|
|
STAILQ_INSERT_TAIL(&cq->cq_full_waited_tasks, task, stailq);
|
|
return;
|
|
}
|
|
|
|
cqe.sqid = sqid;
|
|
cqe.sqhd = sq->sq_head;
|
|
cqe.cid = cmd->cid;
|
|
cqe.status.dnr = task->dnr;
|
|
cqe.status.sct = task->sct;
|
|
cqe.status.sc = task->sc;
|
|
cqe.status.p = !cq->phase;
|
|
cq->cq_cqe[cq->cq_head] = cqe;
|
|
spdk_smp_wmb();
|
|
cq->cq_cqe[cq->cq_head].status.p = cq->phase;
|
|
|
|
nvme_inc_cq_head(cq);
|
|
cq->need_signaled_cnt++;
|
|
|
|
/* MMIO Controll */
|
|
if (nvme->dataplane_started) {
|
|
nvme->dbbuf_eis[cq_offset(cqid, 1)] = (uint32_t)(cq->guest_signaled_cq_head - 1);
|
|
}
|
|
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq);
|
|
}
|
|
|
|
static void
|
|
blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
{
|
|
struct spdk_vhost_nvme_task *task = cb_arg;
|
|
struct spdk_nvme_cmd *cmd = &task->cmd;
|
|
int sc, sct;
|
|
uint32_t cdw0;
|
|
|
|
assert(bdev_io != NULL);
|
|
|
|
spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
task->dnr = !success;
|
|
task->sct = sct;
|
|
task->sc = sc;
|
|
|
|
if (spdk_unlikely(!success)) {
|
|
SPDK_ERRLOG("I/O error, sector %u\n", cmd->cdw10);
|
|
}
|
|
|
|
vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
static void
|
|
blk_unmap_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
|
{
|
|
struct spdk_vhost_nvme_task *child = cb_arg;
|
|
struct spdk_vhost_nvme_task *task = child->parent;
|
|
struct spdk_vhost_nvme_dev *nvme = task->nvme;
|
|
int sct, sc;
|
|
uint32_t cdw0;
|
|
|
|
assert(bdev_io != NULL);
|
|
|
|
task->num_children--;
|
|
if (!success) {
|
|
task->dnr = 1;
|
|
spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
|
|
task->sct = sct;
|
|
task->sc = sc;
|
|
}
|
|
|
|
spdk_bdev_free_io(bdev_io);
|
|
|
|
if (!task->num_children) {
|
|
vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq);
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_ns *
|
|
vhost_nvme_get_ns_from_nsid(struct spdk_vhost_nvme_dev *dev, uint32_t nsid)
|
|
{
|
|
if (spdk_unlikely(!nsid || nsid > dev->num_ns)) {
|
|
return NULL;
|
|
}
|
|
|
|
return &dev->ns[nsid - 1];
|
|
}
|
|
|
|
static void
|
|
vhost_nvme_resubmit_task(void *arg)
|
|
{
|
|
struct spdk_vhost_nvme_task *task = (struct spdk_vhost_nvme_task *)arg;
|
|
int rc;
|
|
|
|
rc = nvme_process_sq(task->nvme, task->sq, task);
|
|
if (rc) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "vhost_nvme: task resubmit failed, rc = %d.\n", rc);
|
|
}
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_queue_task(struct spdk_vhost_nvme_task *task)
|
|
{
|
|
int rc;
|
|
|
|
task->bdev_io_wait.bdev = task->ns->bdev;
|
|
task->bdev_io_wait.cb_fn = vhost_nvme_resubmit_task;
|
|
task->bdev_io_wait.cb_arg = task;
|
|
|
|
rc = spdk_bdev_queue_io_wait(task->ns->bdev, task->ns->bdev_io_channel, &task->bdev_io_wait);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("Queue io failed in vhost_nvme_queue_task, rc=%d.\n", rc);
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq,
|
|
struct spdk_vhost_nvme_task *task)
|
|
{
|
|
struct spdk_vhost_nvme_task *child;
|
|
struct spdk_nvme_cmd *cmd = &task->cmd;
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
int ret = -1;
|
|
uint32_t len, nlba, block_size;
|
|
uint64_t slba;
|
|
struct spdk_nvme_dsm_range *range;
|
|
uint16_t i, num_ranges = 0;
|
|
|
|
task->nvme = nvme;
|
|
task->dnr = 0;
|
|
task->sct = 0;
|
|
task->sc = 0;
|
|
|
|
ns = vhost_nvme_get_ns_from_nsid(nvme, cmd->nsid);
|
|
if (spdk_unlikely(!ns)) {
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
|
|
vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
|
|
block_size = ns->block_size;
|
|
task->num_children = 0;
|
|
task->cqid = sq->cqid;
|
|
task->sqid = sq->sqid;
|
|
|
|
task->ns = ns;
|
|
|
|
if (spdk_unlikely(!ns->active_ns)) {
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
|
|
vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
|
|
/* valid only for Read/Write commands */
|
|
nlba = (cmd->cdw12 & 0xffff) + 1;
|
|
slba = cmd->cdw11;
|
|
slba = (slba << 32) | cmd->cdw10;
|
|
|
|
if (cmd->opc == SPDK_NVME_OPC_READ || cmd->opc == SPDK_NVME_OPC_WRITE ||
|
|
cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
|
|
if (cmd->psdt != SPDK_NVME_PSDT_PRP) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Invalid PSDT %u%ub in command\n",
|
|
cmd->psdt >> 1, cmd->psdt & 1u);
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_FIELD;
|
|
vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
|
|
if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
|
|
num_ranges = (cmd->cdw10 & 0xff) + 1;
|
|
len = num_ranges * sizeof(struct spdk_nvme_dsm_range);
|
|
} else {
|
|
len = nlba * block_size;
|
|
}
|
|
|
|
ret = vhost_nvme_map_prps(nvme, cmd, task, len);
|
|
if (spdk_unlikely(ret != 0)) {
|
|
SPDK_ERRLOG("nvme command map prps failed\n");
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INVALID_FIELD;
|
|
vhost_nvme_task_complete(task);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
switch (cmd->opc) {
|
|
case SPDK_NVME_OPC_READ:
|
|
ret = spdk_bdev_readv(ns->bdev_desc, ns->bdev_io_channel,
|
|
task->iovs, task->iovcnt, slba * block_size,
|
|
nlba * block_size, blk_request_complete_cb, task);
|
|
break;
|
|
case SPDK_NVME_OPC_WRITE:
|
|
ret = spdk_bdev_writev(ns->bdev_desc, ns->bdev_io_channel,
|
|
task->iovs, task->iovcnt, slba * block_size,
|
|
nlba * block_size, blk_request_complete_cb, task);
|
|
break;
|
|
case SPDK_NVME_OPC_FLUSH:
|
|
ret = spdk_bdev_flush(ns->bdev_desc, ns->bdev_io_channel,
|
|
0, ns->capacity,
|
|
blk_request_complete_cb, task);
|
|
break;
|
|
case SPDK_NVME_OPC_DATASET_MANAGEMENT:
|
|
range = (struct spdk_nvme_dsm_range *)task->iovs[0].iov_base;
|
|
for (i = 0; i < num_ranges; i++) {
|
|
if (!STAILQ_EMPTY(&nvme->free_tasks)) {
|
|
child = STAILQ_FIRST(&nvme->free_tasks);
|
|
STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
|
|
} else {
|
|
SPDK_ERRLOG("No free task now\n");
|
|
ret = -1;
|
|
break;
|
|
}
|
|
task->num_children++;
|
|
child->parent = task;
|
|
ret = spdk_bdev_unmap(ns->bdev_desc, ns->bdev_io_channel,
|
|
range[i].starting_lba * block_size,
|
|
range[i].length * block_size,
|
|
blk_unmap_complete_cb, child);
|
|
if (ret) {
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq);
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
ret = -1;
|
|
break;
|
|
}
|
|
|
|
if (spdk_unlikely(ret)) {
|
|
if (ret == -ENOMEM) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "No memory, start to queue io.\n");
|
|
task->sq = sq;
|
|
ret = vhost_nvme_queue_task(task);
|
|
} else {
|
|
/* post error status to cqe */
|
|
SPDK_ERRLOG("Error Submission For Command %u, ret %d\n", cmd->opc, ret);
|
|
task->dnr = 1;
|
|
task->sct = SPDK_NVME_SCT_GENERIC;
|
|
task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
vhost_nvme_task_complete(task);
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
nvme_worker(void *arg)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = (struct spdk_vhost_nvme_dev *)arg;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
struct spdk_vhost_nvme_task *task;
|
|
uint32_t qid, dbbuf_sq;
|
|
int ret;
|
|
int count = -1;
|
|
|
|
if (spdk_unlikely(!nvme->num_sqs)) {
|
|
return SPDK_POLLER_IDLE;
|
|
}
|
|
|
|
if (spdk_unlikely(!nvme->dataplane_started && !nvme->bar)) {
|
|
return SPDK_POLLER_IDLE;
|
|
}
|
|
|
|
for (qid = 1; qid <= MAX_IO_QUEUES; qid++) {
|
|
|
|
sq = vhost_nvme_get_sq_from_qid(nvme, qid);
|
|
if (!sq->valid) {
|
|
continue;
|
|
}
|
|
cq = vhost_nvme_get_cq_from_qid(nvme, sq->cqid);
|
|
if (spdk_unlikely(!cq)) {
|
|
return SPDK_POLLER_BUSY;
|
|
}
|
|
cq->guest_signaled_cq_head = vhost_nvme_get_queue_head(nvme, cq_offset(sq->cqid, 1));
|
|
if (spdk_unlikely(!STAILQ_EMPTY(&cq->cq_full_waited_tasks) &&
|
|
!nvme_cq_is_full(cq))) {
|
|
task = STAILQ_FIRST(&cq->cq_full_waited_tasks);
|
|
STAILQ_REMOVE_HEAD(&cq->cq_full_waited_tasks, stailq);
|
|
vhost_nvme_task_complete(task);
|
|
}
|
|
|
|
dbbuf_sq = vhost_nvme_get_queue_head(nvme, sq_offset(qid, 1));
|
|
sq->sq_tail = (uint16_t)dbbuf_sq;
|
|
count = 0;
|
|
|
|
while (sq->sq_head != sq->sq_tail) {
|
|
if (spdk_unlikely(!sq->sq_cmd)) {
|
|
break;
|
|
}
|
|
if (spdk_likely(!STAILQ_EMPTY(&nvme->free_tasks))) {
|
|
task = STAILQ_FIRST(&nvme->free_tasks);
|
|
STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
|
|
} else {
|
|
return SPDK_POLLER_BUSY;
|
|
}
|
|
|
|
task->cmd = sq->sq_cmd[sq->sq_head];
|
|
nvme_inc_sq_head(sq);
|
|
|
|
/* processing IO */
|
|
ret = nvme_process_sq(nvme, sq, task);
|
|
if (spdk_unlikely(ret)) {
|
|
SPDK_ERRLOG("QID %u CID %u, SQ HEAD %u, DBBUF SQ TAIL %u\n", qid, task->cmd.cid, sq->sq_head,
|
|
sq->sq_tail);
|
|
}
|
|
|
|
/* MMIO Control */
|
|
if (nvme->dataplane_started) {
|
|
nvme->dbbuf_eis[sq_offset(qid, 1)] = (uint32_t)(sq->sq_head - 1);
|
|
}
|
|
|
|
/* Maximum batch I/Os to pick up at once */
|
|
if (count++ == MAX_BATCH_IO) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Completion Queue */
|
|
nvme_cq_signal_fd(nvme);
|
|
|
|
return count;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_doorbell_buffer_config(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
struct spdk_vhost_session *vsession = nvme->vsession;
|
|
uint64_t dbs_dma_addr, eis_dma_addr;
|
|
|
|
dbs_dma_addr = cmd->dptr.prp.prp1;
|
|
eis_dma_addr = cmd->dptr.prp.prp2;
|
|
|
|
if ((dbs_dma_addr % 4096) || (eis_dma_addr % 4096)) {
|
|
return -1;
|
|
}
|
|
/* Guest Physical Address to Host Virtual Address */
|
|
nvme->dbbuf_dbs = vhost_gpa_to_vva(vsession, dbs_dma_addr, 4096);
|
|
nvme->dbbuf_eis = vhost_gpa_to_vva(vsession, eis_dma_addr, 4096);
|
|
if (!nvme->dbbuf_dbs || !nvme->dbbuf_eis) {
|
|
return -1;
|
|
}
|
|
/* zeroed the doorbell buffer memory */
|
|
memset((void *)nvme->dbbuf_dbs, 0, 4096);
|
|
memset((void *)nvme->dbbuf_eis, 0, 4096);
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
|
|
/* Data plane started */
|
|
nvme->dataplane_started = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_create_io_sq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qid, qsize, cqid;
|
|
uint64_t dma_addr;
|
|
uint64_t requested_len;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
|
|
/* physical contiguous */
|
|
if (!(cmd->cdw11 & 0x1)) {
|
|
return -1;
|
|
}
|
|
|
|
cqid = (cmd->cdw11 >> 16) & 0xffff;
|
|
qid = cmd->cdw10 & 0xffff;
|
|
qsize = (cmd->cdw10 >> 16) & 0xffff;
|
|
dma_addr = cmd->dptr.prp.prp1;
|
|
if (!dma_addr || dma_addr % 4096) {
|
|
return -1;
|
|
}
|
|
|
|
sq = vhost_nvme_get_sq_from_qid(nvme, qid);
|
|
cq = vhost_nvme_get_cq_from_qid(nvme, cqid);
|
|
if (!sq || !cq) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "User requested invalid QID %u or CQID %u\n",
|
|
qid, cqid);
|
|
cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
|
|
return -1;
|
|
}
|
|
|
|
sq->sqid = qid;
|
|
sq->cqid = cqid;
|
|
sq->size = qsize + 1;
|
|
sq->sq_head = sq->sq_tail = 0;
|
|
requested_len = sizeof(struct spdk_nvme_cmd) * sq->size;
|
|
sq->sq_cmd = vhost_gpa_to_vva(nvme->vsession, dma_addr, requested_len);
|
|
if (!sq->sq_cmd) {
|
|
return -1;
|
|
}
|
|
nvme->num_sqs++;
|
|
sq->valid = true;
|
|
if (nvme->bar) {
|
|
nvme->bar_db[sq_offset(qid, 1)] = 0;
|
|
}
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_delete_io_sq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qid;
|
|
struct spdk_vhost_nvme_sq *sq;
|
|
|
|
qid = cmd->cdw10 & 0xffff;
|
|
sq = vhost_nvme_get_sq_from_qid(nvme, qid);
|
|
if (!sq) {
|
|
return -1;
|
|
}
|
|
|
|
/* We didn't see scenarios when deleting submission
|
|
* queue while I/O is running against the submisson
|
|
* queue for now, otherwise, we must ensure the poller
|
|
* will not run with this submission queue.
|
|
*/
|
|
nvme->num_sqs--;
|
|
sq->valid = false;
|
|
|
|
memset(sq, 0, sizeof(*sq));
|
|
sq->sq_cmd = NULL;
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_create_io_cq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qsize, qid;
|
|
uint64_t dma_addr;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
uint64_t requested_len;
|
|
|
|
/* physical contiguous */
|
|
if (!(cmd->cdw11 & 0x1)) {
|
|
return -1;
|
|
}
|
|
|
|
qid = cmd->cdw10 & 0xffff;
|
|
qsize = (cmd->cdw10 >> 16) & 0xffff;
|
|
dma_addr = cmd->dptr.prp.prp1;
|
|
if (!dma_addr || dma_addr % 4096) {
|
|
return -1;
|
|
}
|
|
|
|
cq = vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq) {
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "User requested invalid QID %u\n", qid);
|
|
cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
|
|
return -1;
|
|
}
|
|
cq->cqid = qid;
|
|
cq->size = qsize + 1;
|
|
cq->phase = 1;
|
|
cq->irq_enabled = (cmd->cdw11 >> 1) & 0x1;
|
|
/* Setup virq through vhost messages */
|
|
cq->virq = -1;
|
|
cq->cq_head = 0;
|
|
cq->guest_signaled_cq_head = 0;
|
|
cq->need_signaled_cnt = 0;
|
|
requested_len = sizeof(struct spdk_nvme_cpl) * cq->size;
|
|
cq->cq_cqe = vhost_gpa_to_vva(nvme->vsession, dma_addr, requested_len);
|
|
if (!cq->cq_cqe) {
|
|
return -1;
|
|
}
|
|
nvme->num_cqs++;
|
|
cq->valid = true;
|
|
if (nvme->bar) {
|
|
nvme->bar_db[cq_offset(qid, 1)] = 0;
|
|
}
|
|
STAILQ_INIT(&cq->cq_full_waited_tasks);
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_delete_io_cq(struct spdk_vhost_nvme_dev *nvme,
|
|
struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl)
|
|
{
|
|
uint16_t qid;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
|
|
qid = cmd->cdw10 & 0xffff;
|
|
cq = vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq) {
|
|
return -1;
|
|
}
|
|
nvme->num_cqs--;
|
|
cq->valid = false;
|
|
|
|
memset(cq, 0, sizeof(*cq));
|
|
cq->cq_cqe = NULL;
|
|
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
return 0;
|
|
}
|
|
|
|
static struct spdk_vhost_nvme_dev *
|
|
vhost_nvme_get_by_name(int vid)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
struct spdk_vhost_dev *vdev;
|
|
struct spdk_vhost_session *vsession;
|
|
|
|
TAILQ_FOREACH(nvme, &g_nvme_ctrlrs, tailq) {
|
|
vdev = &nvme->vdev;
|
|
TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) {
|
|
if (vsession->vid == vid) {
|
|
return nvme;
|
|
}
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_get_cap(int vid, uint64_t *cap)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
|
|
nvme = vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
*cap = nvme->cap.raw;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf)
|
|
{
|
|
struct spdk_nvme_cmd *req = (struct spdk_nvme_cmd *)cmd;
|
|
struct spdk_nvme_cpl *cpl = (struct spdk_nvme_cpl *)cqe;
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
int ret = 0;
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
|
|
nvme = vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Admin Command Opcode %u\n", req->opc);
|
|
switch (req->opc) {
|
|
case SPDK_NVME_OPC_IDENTIFY:
|
|
if (req->cdw10 == SPDK_NVME_IDENTIFY_CTRLR) {
|
|
memcpy(buf, &nvme->cdata, sizeof(struct spdk_nvme_ctrlr_data));
|
|
|
|
} else if (req->cdw10 == SPDK_NVME_IDENTIFY_NS) {
|
|
ns = vhost_nvme_get_ns_from_nsid(nvme, req->nsid);
|
|
if (!ns) {
|
|
cpl->status.sc = SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE;
|
|
cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
break;
|
|
}
|
|
memcpy(buf, &ns->nsdata, sizeof(struct spdk_nvme_ns_data));
|
|
}
|
|
/* successfully */
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
break;
|
|
case SPDK_NVME_OPC_CREATE_IO_CQ:
|
|
ret = vhost_nvme_create_io_cq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_DELETE_IO_CQ:
|
|
ret = vhost_nvme_delete_io_cq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_CREATE_IO_SQ:
|
|
ret = vhost_nvme_create_io_sq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_DELETE_IO_SQ:
|
|
ret = vhost_nvme_delete_io_sq(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_GET_FEATURES:
|
|
case SPDK_NVME_OPC_SET_FEATURES:
|
|
if (req->cdw10 == SPDK_NVME_FEAT_NUMBER_OF_QUEUES) {
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
cpl->cdw0 = (nvme->num_io_queues - 1) | ((nvme->num_io_queues - 1) << 16);
|
|
} else {
|
|
cpl->status.sc = SPDK_NVME_SC_INVALID_FIELD;
|
|
cpl->status.sct = SPDK_NVME_SCT_GENERIC;
|
|
}
|
|
break;
|
|
case SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG:
|
|
ret = vhost_nvme_doorbell_buffer_config(nvme, req, cpl);
|
|
break;
|
|
case SPDK_NVME_OPC_ABORT:
|
|
/* TODO: ABORT failed fow now */
|
|
cpl->cdw0 = 1;
|
|
cpl->status.sc = 0;
|
|
cpl->status.sct = 0;
|
|
break;
|
|
}
|
|
|
|
if (ret) {
|
|
SPDK_ERRLOG("Admin Passthrough Failed with %u\n", req->opc);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
|
|
nvme = vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
nvme->bar = (volatile uint32_t *)(uintptr_t)(bar_addr);
|
|
/* BAR0 SQ/CQ doorbell registers start from offset 0x1000 */
|
|
nvme->bar_db = (volatile uint32_t *)(uintptr_t)(bar_addr + 0x1000ull);
|
|
nvme->bar_size = bar_size;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme;
|
|
struct spdk_vhost_nvme_cq *cq;
|
|
|
|
nvme = vhost_nvme_get_by_name(vid);
|
|
if (!nvme) {
|
|
return -1;
|
|
}
|
|
|
|
cq = vhost_nvme_get_cq_from_qid(nvme, qid);
|
|
if (!cq) {
|
|
return -1;
|
|
}
|
|
if (cq->irq_enabled) {
|
|
cq->virq = fd;
|
|
} else {
|
|
SPDK_ERRLOG("NVMe Qid %d Disabled IRQ\n", qid);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
free_task_pool(struct spdk_vhost_nvme_dev *nvme)
|
|
{
|
|
struct spdk_vhost_nvme_task *task;
|
|
|
|
while (!STAILQ_EMPTY(&nvme->free_tasks)) {
|
|
task = STAILQ_FIRST(&nvme->free_tasks);
|
|
STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq);
|
|
spdk_free(task);
|
|
}
|
|
}
|
|
|
|
static int
|
|
alloc_task_pool(struct spdk_vhost_nvme_dev *nvme)
|
|
{
|
|
uint32_t entries, i;
|
|
struct spdk_vhost_nvme_task *task;
|
|
|
|
entries = nvme->num_io_queues * MAX_QUEUE_ENTRIES_SUPPORTED;
|
|
|
|
for (i = 0; i < entries; i++) {
|
|
task = spdk_zmalloc(sizeof(struct spdk_vhost_nvme_task),
|
|
SPDK_CACHE_LINE_SIZE, NULL,
|
|
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
|
|
if (task == NULL) {
|
|
SPDK_ERRLOG("Controller %s alloc task pool failed\n",
|
|
nvme->vdev.name);
|
|
free_task_pool(nvme);
|
|
return -1;
|
|
}
|
|
STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_start_cb(struct spdk_vhost_dev *vdev,
|
|
struct spdk_vhost_session *vsession, void *unused)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
int rc = 0;
|
|
|
|
if (nvme == NULL) {
|
|
rc = -1;
|
|
goto out;
|
|
}
|
|
|
|
rc = alloc_task_pool(nvme);
|
|
if (rc) {
|
|
goto out;
|
|
}
|
|
|
|
SPDK_NOTICELOG("Start Device %u, Path %s, lcore %d\n", vsession->vid,
|
|
vdev->path, spdk_env_get_current_core());
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
ns_dev->bdev_io_channel = spdk_bdev_get_io_channel(ns_dev->bdev_desc);
|
|
if (!ns_dev->bdev_io_channel) {
|
|
rc = -1;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
nvme->vsession = vsession;
|
|
/* Start the NVMe Poller */
|
|
nvme->requestq_poller = SPDK_POLLER_REGISTER(nvme_worker, nvme, 0);
|
|
|
|
out:
|
|
vhost_session_start_done(vsession, rc);
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_start(struct spdk_vhost_session *vsession)
|
|
{
|
|
if (vsession->vdev->active_session_num > 0) {
|
|
/* We're trying to start a second session */
|
|
SPDK_ERRLOG("Vhost-NVMe devices can support only one simultaneous connection.\n");
|
|
return -1;
|
|
}
|
|
|
|
return vhost_session_send_event(vsession, vhost_nvme_start_cb,
|
|
3, "start session");
|
|
}
|
|
|
|
static void
|
|
vhost_nvme_deactive_ns(struct spdk_vhost_nvme_ns *ns)
|
|
{
|
|
ns->active_ns = 0;
|
|
spdk_bdev_close(ns->bdev_desc);
|
|
ns->bdev_desc = NULL;
|
|
ns->bdev = NULL;
|
|
}
|
|
|
|
static void
|
|
bdev_remove_cb(void *remove_ctx)
|
|
{
|
|
struct spdk_vhost_nvme_ns *ns = remove_ctx;
|
|
|
|
SPDK_NOTICELOG("Removing NS %u, Block Device %s\n",
|
|
ns->nsid, spdk_bdev_get_name(ns->bdev));
|
|
|
|
vhost_nvme_deactive_ns(ns);
|
|
}
|
|
|
|
static int
|
|
destroy_device_poller_cb(void *arg)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = arg;
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_VHOST_NVME, "Destroy device poller callback\n");
|
|
|
|
/* FIXME wait for pending I/Os to complete */
|
|
|
|
if (spdk_vhost_trylock() != 0) {
|
|
return SPDK_POLLER_BUSY;
|
|
}
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
if (ns_dev->bdev_io_channel) {
|
|
spdk_put_io_channel(ns_dev->bdev_io_channel);
|
|
ns_dev->bdev_io_channel = NULL;
|
|
}
|
|
}
|
|
/* Clear BAR space */
|
|
if (nvme->bar) {
|
|
memset((void *)nvme->bar, 0, nvme->bar_size);
|
|
}
|
|
nvme->num_sqs = 0;
|
|
nvme->num_cqs = 0;
|
|
nvme->dbbuf_dbs = NULL;
|
|
nvme->dbbuf_eis = NULL;
|
|
nvme->dataplane_started = false;
|
|
|
|
spdk_poller_unregister(&nvme->stop_poller);
|
|
vhost_session_stop_done(nvme->vsession, 0);
|
|
|
|
spdk_vhost_unlock();
|
|
return SPDK_POLLER_BUSY;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_stop_cb(struct spdk_vhost_dev *vdev,
|
|
struct spdk_vhost_session *vsession, void *unused)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
|
|
if (nvme == NULL) {
|
|
vhost_session_stop_done(vsession, -1);
|
|
return -1;
|
|
}
|
|
|
|
free_task_pool(nvme);
|
|
SPDK_NOTICELOG("Stopping Device %u, Path %s\n", vsession->vid, vdev->path);
|
|
|
|
spdk_poller_unregister(&nvme->requestq_poller);
|
|
nvme->stop_poller = SPDK_POLLER_REGISTER(destroy_device_poller_cb, nvme, 1000);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_stop(struct spdk_vhost_session *vsession)
|
|
{
|
|
return vhost_session_send_event(vsession, vhost_nvme_stop_cb,
|
|
3, "start session");
|
|
}
|
|
|
|
static void
|
|
vhost_nvme_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return;
|
|
}
|
|
|
|
spdk_json_write_named_array_begin(w, "namespaces");
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
if (!ns_dev->active_ns) {
|
|
continue;
|
|
}
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_uint32(w, "nsid", ns_dev->nsid);
|
|
spdk_json_write_named_string(w, "bdev", spdk_bdev_get_name(ns_dev->bdev));
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
|
|
spdk_json_write_array_end(w);
|
|
}
|
|
|
|
static void
|
|
vhost_nvme_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns_dev;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return;
|
|
}
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_string(w, "method", "vhost_create_nvme_controller");
|
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
|
spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name);
|
|
spdk_json_write_named_uint32(w, "io_queues", nvme->num_io_queues);
|
|
spdk_json_write_named_string(w, "cpumask",
|
|
spdk_cpuset_fmt(spdk_thread_get_cpumask(nvme->vdev.thread)));
|
|
spdk_json_write_object_end(w);
|
|
|
|
spdk_json_write_object_end(w);
|
|
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns_dev = &nvme->ns[i];
|
|
if (!ns_dev->active_ns) {
|
|
continue;
|
|
}
|
|
|
|
spdk_json_write_object_begin(w);
|
|
spdk_json_write_named_string(w, "method", "vhost_nvme_controller_add_ns");
|
|
|
|
spdk_json_write_named_object_begin(w, "params");
|
|
spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name);
|
|
spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(ns_dev->bdev));
|
|
spdk_json_write_object_end(w);
|
|
|
|
spdk_json_write_object_end(w);
|
|
}
|
|
}
|
|
|
|
static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend = {
|
|
.session_ctx_size = 0,
|
|
.start_session = vhost_nvme_start,
|
|
.stop_session = vhost_nvme_stop,
|
|
.dump_info_json = vhost_nvme_dump_info_json,
|
|
.write_config_json = vhost_nvme_write_config_json,
|
|
.remove_device = vhost_nvme_dev_remove,
|
|
};
|
|
|
|
static int
|
|
vhost_nvme_ns_identify_update(struct spdk_vhost_nvme_dev *dev)
|
|
{
|
|
struct spdk_nvme_ctrlr_data *cdata = &dev->cdata;
|
|
struct spdk_nvme_ns_data *nsdata;
|
|
uint64_t num_blocks;
|
|
uint32_t i;
|
|
|
|
/* Identify Namespace */
|
|
cdata->nn = dev->num_ns;
|
|
for (i = 0; i < dev->num_ns; i++) {
|
|
nsdata = &dev->ns[i].nsdata;
|
|
if (dev->ns[i].active_ns) {
|
|
num_blocks = spdk_bdev_get_num_blocks(dev->ns[i].bdev);
|
|
nsdata->nsze = num_blocks;
|
|
/* ncap must be non-zero for active Namespace */
|
|
nsdata->ncap = num_blocks;
|
|
nsdata->nuse = num_blocks;
|
|
nsdata->nlbaf = 0;
|
|
nsdata->flbas.format = 0;
|
|
nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(dev->ns[i].bdev));
|
|
nsdata->noiob = spdk_bdev_get_optimal_io_boundary(dev->ns[i].bdev);
|
|
dev->ns[i].block_size = spdk_bdev_get_block_size(dev->ns[i].bdev);
|
|
dev->ns[i].capacity = num_blocks * dev->ns[i].block_size;
|
|
} else {
|
|
memset(nsdata, 0, sizeof(*nsdata));
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vhost_nvme_ctrlr_identify_update(struct spdk_vhost_nvme_dev *dev)
|
|
{
|
|
struct spdk_nvme_ctrlr_data *cdata = &dev->cdata;
|
|
char sn[20];
|
|
|
|
/* Controller Capabilities */
|
|
dev->cap.bits.cqr = 1;
|
|
dev->cap.bits.to = 1;
|
|
dev->cap.bits.dstrd = 0;
|
|
dev->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
|
|
dev->cap.bits.mpsmin = 0;
|
|
dev->cap.bits.mpsmax = 0;
|
|
/* MQES is 0 based value */
|
|
dev->cap.bits.mqes = MAX_QUEUE_ENTRIES_SUPPORTED - 1;
|
|
|
|
/* Controller Configuration */
|
|
dev->cc.bits.en = 0;
|
|
|
|
/* Controller Status */
|
|
dev->csts.bits.rdy = 0;
|
|
|
|
/* Identify Controller */
|
|
spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
|
|
cdata->vid = 0x8086;
|
|
cdata->ssvid = 0x8086;
|
|
spdk_strcpy_pad(cdata->mn, "SPDK Virtual NVMe Controller", sizeof(cdata->mn), ' ');
|
|
snprintf(sn, sizeof(sn), "NVMe_%s", dev->vdev.name);
|
|
spdk_strcpy_pad(cdata->sn, sn, sizeof(cdata->sn), ' ');
|
|
cdata->ieee[0] = 0xe4;
|
|
cdata->ieee[1] = 0xd2;
|
|
cdata->ieee[2] = 0x5c;
|
|
cdata->ver.bits.mjr = 1;
|
|
cdata->ver.bits.mnr = 0;
|
|
cdata->mdts = 5; /* 128 KiB */
|
|
cdata->rab = 6;
|
|
cdata->sqes.min = 6;
|
|
cdata->sqes.max = 6;
|
|
cdata->cqes.min = 4;
|
|
cdata->cqes.max = 4;
|
|
cdata->oncs.dsm = 1;
|
|
/* Emulated NVMe controller */
|
|
cdata->oacs.doorbell_buffer_config = 1;
|
|
|
|
vhost_nvme_ns_identify_update(dev);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t num_io_queues)
|
|
{
|
|
struct spdk_vhost_nvme_dev *dev;
|
|
int rc;
|
|
|
|
if (posix_memalign((void **)&dev, SPDK_CACHE_LINE_SIZE, sizeof(*dev))) {
|
|
return -ENOMEM;
|
|
}
|
|
memset(dev, 0, sizeof(*dev));
|
|
|
|
if (num_io_queues < 1 || num_io_queues > MAX_IO_QUEUES) {
|
|
free(dev);
|
|
return -EINVAL;
|
|
}
|
|
|
|
spdk_vhost_lock();
|
|
rc = vhost_dev_register(&dev->vdev, name, cpumask,
|
|
&spdk_vhost_nvme_device_backend);
|
|
|
|
if (rc) {
|
|
free(dev);
|
|
spdk_vhost_unlock();
|
|
return rc;
|
|
}
|
|
|
|
dev->num_io_queues = num_io_queues;
|
|
STAILQ_INIT(&dev->free_tasks);
|
|
TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, dev, tailq);
|
|
|
|
vhost_nvme_ctrlr_identify_update(dev);
|
|
|
|
SPDK_NOTICELOG("Controller %s: Constructed\n", name);
|
|
spdk_vhost_unlock();
|
|
return rc;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
int rc;
|
|
uint32_t i;
|
|
|
|
if (nvme == NULL) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
TAILQ_REMOVE(&g_nvme_ctrlrs, nvme, tailq);
|
|
for (i = 0; i < nvme->num_ns; i++) {
|
|
ns = &nvme->ns[i];
|
|
if (ns->active_ns) {
|
|
vhost_nvme_deactive_ns(ns);
|
|
}
|
|
}
|
|
|
|
rc = vhost_dev_unregister(vdev);
|
|
if (rc != 0) {
|
|
return rc;
|
|
}
|
|
|
|
free(nvme);
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev, const char *bdev_name)
|
|
{
|
|
struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev);
|
|
struct spdk_vhost_nvme_ns *ns;
|
|
struct spdk_bdev *bdev;
|
|
int rc = -1;
|
|
|
|
if (nvme == NULL) {
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (nvme->num_ns == MAX_NAMESPACE) {
|
|
SPDK_ERRLOG("Can't support %d Namespaces\n", nvme->num_ns);
|
|
return -ENOSPC;
|
|
}
|
|
|
|
bdev = spdk_bdev_get_by_name(bdev_name);
|
|
if (!bdev) {
|
|
SPDK_ERRLOG("could not find bdev %s\n", bdev_name);
|
|
return -ENODEV;
|
|
}
|
|
|
|
ns = &nvme->ns[nvme->num_ns];
|
|
rc = spdk_bdev_open(bdev, true, bdev_remove_cb, ns, &nvme->ns[nvme->num_ns].bdev_desc);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("Could not open bdev '%s', error=%d\n",
|
|
bdev_name, rc);
|
|
return rc;
|
|
}
|
|
|
|
nvme->ns[nvme->num_ns].bdev = bdev;
|
|
nvme->ns[nvme->num_ns].active_ns = 1;
|
|
nvme->ns[nvme->num_ns].nsid = nvme->num_ns + 1;
|
|
nvme->num_ns++;
|
|
|
|
vhost_nvme_ns_identify_update(nvme);
|
|
|
|
return rc;
|
|
}
|
|
|
|
int
|
|
vhost_nvme_controller_construct(void)
|
|
{
|
|
struct spdk_conf_section *sp;
|
|
const char *name;
|
|
const char *bdev_name;
|
|
const char *cpumask;
|
|
int rc, i = 0;
|
|
struct spdk_vhost_dev *vdev;
|
|
uint32_t ctrlr_num, io_queues;
|
|
|
|
for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) {
|
|
if (!spdk_conf_section_match_prefix(sp, "VhostNvme")) {
|
|
continue;
|
|
}
|
|
|
|
if (sscanf(spdk_conf_section_get_name(sp), "VhostNvme%u", &ctrlr_num) != 1) {
|
|
SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n",
|
|
spdk_conf_section_get_name(sp));
|
|
return -1;
|
|
}
|
|
|
|
name = spdk_conf_section_get_val(sp, "Name");
|
|
if (name == NULL) {
|
|
SPDK_ERRLOG("VhostNvme%u: missing Name\n", ctrlr_num);
|
|
return -1;
|
|
}
|
|
|
|
cpumask = spdk_conf_section_get_val(sp, "Cpumask");
|
|
rc = spdk_conf_section_get_intval(sp, "NumberOfQueues");
|
|
if (rc > 0) {
|
|
io_queues = rc;
|
|
} else {
|
|
io_queues = 1;
|
|
}
|
|
|
|
rc = vhost_nvme_dev_construct(name, cpumask, io_queues);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("VhostNvme%u: Construct failed\n", ctrlr_num);
|
|
return -1;
|
|
}
|
|
|
|
vdev = spdk_vhost_dev_find(name);
|
|
if (!vdev) {
|
|
return -1;
|
|
}
|
|
|
|
for (i = 0; spdk_conf_section_get_nval(sp, "Namespace", i) != NULL; i++) {
|
|
bdev_name = spdk_conf_section_get_nmval(sp, "Namespace", i, 0);
|
|
if (!bdev_name) {
|
|
SPDK_ERRLOG("namespace configuration missing bdev name\n");
|
|
break;
|
|
}
|
|
rc = vhost_nvme_dev_add_ns(vdev, bdev_name);
|
|
if (rc < 0) {
|
|
SPDK_WARNLOG("VhostNvme%u: Construct Namespace with %s failed\n",
|
|
ctrlr_num, bdev_name);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
SPDK_LOG_REGISTER_COMPONENT("vhost_nvme", SPDK_LOG_VHOST_NVME)
|