vhost_user_nvme: add vhost user nvme target to SPDK

Similar with exist vhost scsi/blk target, this commit introduces
a new target: vhost nvme I/O slave target, QEMU will present an
emulated NVMe controller to VM, the SPDK I/O slave target will
process the I/Os sent from Guest VM.

Users can follow the example configuation file to evaluate this
feature, refer to etc/spdk/vhost.conf.in [VhostNvme].

Change-Id: Ia2a8a3f719573f3268177234812bd28ed0082d5c
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/384213
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Changpeng Liu 2017-11-22 01:01:48 -05:00 committed by Jim Harris
parent 20e69cf6b1
commit 90c0e24410
10 changed files with 1494 additions and 2 deletions

View File

@ -138,3 +138,17 @@
# this cpumask. By default, it not specified, will use any core in the
# SPDK process.
#Cpumask 0x1
#[VhostNvme0]
# Define name for controller
#Name vhost.0
#NumberOfQueues 2
# Use first partition from the first NVMe device
#Namespace Nvme0n1p0
# Use first partition from the first NVMe device
#Namespace Nvme0n1p1
# Start the poller for this vhost controller on one of the cores in
# this cpumask. By default, it not specified, will use any core in the
# SPDK process.
#Cpumask 0x1

View File

@ -38,7 +38,7 @@ CFLAGS += -I.
CFLAGS += -Irte_vhost
CFLAGS += $(ENV_CFLAGS)
C_SRCS = vhost.c vhost_rpc.c vhost_scsi.c vhost_blk.c
C_SRCS = vhost.c vhost_rpc.c vhost_scsi.c vhost_blk.c vhost_nvme.c
LIBNAME = vhost

View File

@ -104,6 +104,9 @@ struct vhost_device_ops {
* is used to inform the application on such change.
*/
int (*features_changed)(int vid, uint64_t features);
int (*vhost_nvme_admin_passthrough)(int vid, void *cmd, void *cqe, void *buf);
int (*vhost_nvme_set_cq_call)(int vid, uint16_t qid, int fd);
int (*vhost_nvme_get_cap)(int vid, uint64_t *cap);
int (*new_connection)(int vid);
void (*destroy_connection)(int vid);

View File

@ -178,6 +178,7 @@ struct virtio_net {
uint64_t negotiated_features;
uint64_t protocol_features;
int vid;
uint32_t is_nvme;
uint32_t flags;
uint16_t vhost_hlen;
/* to tell if we need broadcast rarp packet */

View File

@ -35,6 +35,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
@ -78,6 +79,11 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_NET_SET_MTU] = "VHOST_USER_NET_SET_MTU",
[VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG",
[VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG",
[VHOST_USER_NVME_ADMIN] = "VHOST_USER_NVME_ADMIN",
[VHOST_USER_NVME_SET_CQ_CALL] = "VHOST_USER_NVME_SET_CQ_CALL",
[VHOST_USER_NVME_GET_CAP] = "VHOST_USER_NVME_GET_CAP",
[VHOST_USER_NVME_START_STOP] = "VHOST_USER_NVME_START_STOP",
[VHOST_USER_NVME_IO_CMD] = "VHOST_USER_NVME_IO_CMD"
};
static uint64_t
@ -548,6 +554,14 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
memcpy(&dev->mem_table, &pmsg->payload.memory, sizeof(dev->mem_table));
memcpy(dev->mem_table_fds, pmsg->fds, sizeof(dev->mem_table_fds));
dev->has_new_mem_table = 1;
/* vhost-user-nvme will not send
* set vring addr message, enable
* memory address table now.
*/
if (dev->has_new_mem_table && dev->is_nvme) {
vhost_setup_mem_table(dev);
dev->has_new_mem_table = 0;
}
return 0;
}
@ -1040,12 +1054,59 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg)
return alloc_vring_queue(dev, vring_idx);
}
static int
vhost_user_nvme_io_request_passthrough(struct virtio_net *dev,
uint16_t qid, uint16_t tail_head,
bool is_submission_queue)
{
return -1;
}
static int
vhost_user_nvme_admin_passthrough(struct virtio_net *dev,
void *cmd, void *cqe, void *buf)
{
if (dev->notify_ops->vhost_nvme_admin_passthrough) {
return dev->notify_ops->vhost_nvme_admin_passthrough(dev->vid, cmd, cqe, buf);
}
return -1;
}
static int
vhost_user_nvme_set_cq_call(struct virtio_net *dev, uint16_t qid, int fd)
{
if (dev->notify_ops->vhost_nvme_set_cq_call) {
return dev->notify_ops->vhost_nvme_set_cq_call(dev->vid, qid, fd);
}
return -1;
}
static int
vhost_user_nvme_get_cap(struct virtio_net *dev, uint64_t *cap)
{
if (dev->notify_ops->vhost_nvme_get_cap) {
return dev->notify_ops->vhost_nvme_get_cap(dev->vid, cap);
}
return -1;
}
int
vhost_user_msg_handler(int vid, int fd)
{
struct virtio_net *dev;
struct VhostUserMsg msg;
struct vhost_vring_file file;
int ret;
uint64_t cap;
uint64_t enable;
uint8_t cqe[16];
uint8_t cmd[64];
uint8_t buf[4096];
uint16_t qid, tail_head;
bool is_submission_queue;
dev = get_device(vid);
if (dev == NULL)
@ -1106,6 +1167,60 @@ vhost_user_msg_handler(int vid, int fd)
ret = 0;
}
break;
case VHOST_USER_NVME_ADMIN:
if (!dev->is_nvme) {
dev->is_nvme = 1;
}
memcpy(cmd, &msg.payload.nvme.cmd, 64);
ret = vhost_user_nvme_admin_passthrough(dev, cmd, cqe, buf);
memcpy(&msg.payload.nvme.cmd, &cqe, 16);
msg.size = 16;
/* NVMe Identify Command */
if (cmd[0] == 0x06) {
memcpy(msg.payload.nvme.buf, &buf, 4096);
msg.size += 4096;
} else if (cmd[0] == 0x09 || cmd[0] == 0x0a) {
memcpy(&msg.payload.nvme.buf, &buf, 4);
msg.size += 4096;
}
send_vhost_message(fd, &msg);
break;
case VHOST_USER_NVME_SET_CQ_CALL:
file.index = msg.payload.u64 & VHOST_USER_VRING_IDX_MASK;
file.fd = msg.fds[0];
ret = vhost_user_nvme_set_cq_call(dev, file.index, file.fd);
break;
case VHOST_USER_NVME_GET_CAP:
ret = vhost_user_nvme_get_cap(dev, &cap);
if (!ret)
msg.payload.u64 = cap;
else
msg.payload.u64 = 0;
msg.size = sizeof(msg.payload.u64);
send_vhost_message(fd, &msg);
break;
case VHOST_USER_NVME_START_STOP:
enable = msg.payload.u64;
/* device must be started before set cq call */
if (enable) {
if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
if (dev->notify_ops->new_device(dev->vid) == 0)
dev->flags |= VIRTIO_DEV_RUNNING;
}
} else {
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
dev->notify_ops->destroy_device(dev->vid);
}
}
break;
case VHOST_USER_NVME_IO_CMD:
qid = msg.payload.nvme_io.qid;
tail_head = msg.payload.nvme_io.tail_head;
is_submission_queue = (msg.payload.nvme_io.queue_type == VHOST_USER_NVME_SUBMISSION_QUEUE) ? true : false;
vhost_user_nvme_io_request_passthrough(dev, qid, tail_head, is_submission_queue);
break;
case VHOST_USER_GET_FEATURES:
msg.payload.u64 = vhost_user_get_features(dev);
msg.size = sizeof(msg.payload.u64);

View File

@ -84,6 +84,11 @@ typedef enum VhostUserRequest {
VHOST_USER_NET_SET_MTU = 20,
VHOST_USER_GET_CONFIG = 24,
VHOST_USER_SET_CONFIG = 25,
VHOST_USER_NVME_ADMIN = 27,
VHOST_USER_NVME_SET_CQ_CALL = 28,
VHOST_USER_NVME_GET_CAP = 29,
VHOST_USER_NVME_START_STOP = 30,
VHOST_USER_NVME_IO_CMD = 31,
VHOST_USER_MAX
} VhostUserRequest;
@ -119,6 +124,17 @@ typedef struct VhostUserConfig {
uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
} VhostUserConfig;
enum VhostUserNvmeQueueTypes {
VHOST_USER_NVME_SUBMISSION_QUEUE = 1,
VHOST_USER_NVME_COMPLETION_QUEUE = 2,
};
typedef struct VhostUserNvmeIO {
enum VhostUserNvmeQueueTypes queue_type;
uint32_t qid;
uint32_t tail_head;
} VhostUserNvmeIO;
typedef struct VhostUserMsg {
VhostUserRequest request;
@ -136,6 +152,14 @@ typedef struct VhostUserMsg {
VhostUserMemory memory;
VhostUserLog log;
VhostUserConfig config;
struct nvme {
union {
uint8_t req[64];
uint8_t cqe[16];
} cmd;
uint8_t buf[4096];
} nvme;
struct VhostUserNvmeIO nvme_io;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
} __attribute((packed)) VhostUserMsg;

View File

@ -79,6 +79,9 @@ const struct vhost_device_ops g_spdk_vhost_ops = {
.set_config = set_config,
.new_connection = new_connection,
.destroy_connection = destroy_connection,
.vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough,
.vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call,
.vhost_nvme_get_cap = spdk_vhost_nvme_get_cap,
};
static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER(
@ -534,6 +537,7 @@ spdk_vhost_dev_mem_unregister(struct spdk_vhost_dev *vdev)
assert(false);
}
}
}
static void
@ -882,7 +886,6 @@ spdk_vhost_event_send(struct spdk_vhost_dev *vdev, spdk_vhost_event_fn cb_fn,
ev_ctx.vdev = vdev;
ev_ctx.cb_fn = cb_fn;
ev = spdk_event_allocate(vdev->lcore, spdk_vhost_event_cb, &ev_ctx, NULL);
assert(ev);
spdk_event_call(ev);
@ -1290,6 +1293,12 @@ spdk_vhost_init(void)
return -1;
}
ret = spdk_vhost_nvme_controller_construct();
if (ret != 0) {
SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n");
return -1;
}
return 0;
}

View File

@ -254,5 +254,14 @@ void spdk_vhost_dump_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_w
void spdk_vhost_dev_backend_event_done(void *event_ctx, int response);
void spdk_vhost_lock(void);
void spdk_vhost_unlock(void);
int spdk_remove_vhost_controller(struct spdk_vhost_dev *vdev);
int spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf);
int spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd);
int spdk_vhost_nvme_get_cap(int vid, uint64_t *cap);
int spdk_vhost_nvme_controller_construct(void);
int spdk_vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues);
int spdk_vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev);
int spdk_vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev,
const char *bdev_name);
#endif /* SPDK_VHOST_INTERNAL_H */

1296
lib/vhost/vhost_nvme.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -108,6 +108,27 @@ DEFINE_STUB(spdk_env_get_current_core, uint32_t, (void), 0);
static struct spdk_vhost_dev_backend g_vdev_backend;
int spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf)
{
return 0;
}
int spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd)
{
return 0;
}
int spdk_vhost_nvme_get_cap(int vid, uint64_t *cap)
{
return 0;
}
int
spdk_vhost_nvme_controller_construct(void)
{
return 0;
}
static int
test_setup(void)
{