vhost: add a library and app for userspace vhost-scsi processing

This patch adds a library, application and test scripts for extending
SPDK to present virtio-scsi controllers to QEMU-based VMs and
process I/O submitted to devices attached to those controllers.
This functionality is dependent on QEMU patches to enable
vhost-scsi in userspace - those patches are currently working their
way through the QEMU mailing list, but temporary patches to enable
this functionality in QEMU will be made available shortly through the
SPDK github repository.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Signed-off-by: Krzysztof Jakimiak <krzysztof.jakimiak@intel.com>
Signed-off-by: Michal Kosciowski <michal.kosciowski@intel.com>
Signed-off-by: Karol Latecki <karolx.latecki@intel.com>
Signed-off-by: Piotr Pelplinski <piotr.pelplinski@intel.com>
Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Signed-off-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>

Signed-off-by: Krzysztof Jakimiak <krzysztof.jakimiak@intel.com>
Change-Id: I138e4021f0ac4b1cd9a6e4041783cdf06e6f0efb
This commit is contained in:
Piotr Pelplinski 2017-03-02 15:12:20 +01:00 committed by Jim Harris
parent 16bbcb3f36
commit 1dbf53eebf
44 changed files with 8510 additions and 11 deletions

View File

@ -38,7 +38,7 @@ DIRS-y += trace
DIRS-y += nvmf_tgt
DIRS-y += iscsi_top
ifeq ($(OS),Linux)
DIRS-y += iscsi_tgt
DIRS-y += iscsi_tgt vhost
endif
.PHONY: all clean $(DIRS-y)

62
app/vhost/Makefile Normal file
View File

@ -0,0 +1,62 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
include $(SPDK_ROOT_DIR)/mk/spdk.app.mk
include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
APP = vhost
CFLAGS += $(ENV_CFLAGS)
C_SRCS := vhost.c
SPDK_LIB_LIST = jsonrpc json rpc bdev_rpc bdev scsi net copy trace conf
SPDK_LIB_LIST += util log log_rpc event app_rpc
SPDK_LIB_LIST += vhost rte_vhost
LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \
$(COPY_MODULES_LINKER_ARGS)
LIBS += $(SPDK_LIB_LINKER_ARGS)
LIBS += $(ENV_LINKER_ARGS)
all : $(APP)
$(APP) : $(OBJS) $(SPDK_LIB_FILES) $(ENV_LIBS) $(BLOCKDEV_MODULES_FILES) $(COPY_MODULES_FILES)
$(LINK_C)
clean :
$(CLEAN_C) $(APP)
include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk

164
app/vhost/vhost.c Normal file
View File

@ -0,0 +1,164 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <getopt.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include "spdk/log.h"
#include "spdk/conf.h"
#include "spdk/event.h"
#include "spdk/vhost.h"
#define SPDK_VHOST_DEFAULT_CONFIG "/usr/local/etc/spdk/vhost.conf"
#define SPDK_VHOST_DEFAULT_ENABLE_COREDUMP true
#define SPDK_VHOST_DEFAULT_MEM_SIZE 1024
static void
vhost_app_opts_init(struct spdk_app_opts *opts)
{
spdk_app_opts_init(opts);
opts->name = "vhost";
opts->config_file = SPDK_VHOST_DEFAULT_CONFIG;
opts->dpdk_mem_size = SPDK_VHOST_DEFAULT_MEM_SIZE;
}
static void
usage(char *executable_name)
{
struct spdk_app_opts defaults;
vhost_app_opts_init(&defaults);
printf("%s [options]\n", executable_name);
printf("options:\n");
printf(" -c config config file (default: %s)\n", defaults.config_file);
printf(" -e mask tracepoint group mask for spdk trace buffers (default: 0x0)\n");
printf(" -m mask reactor core mask (default: 0x1)\n");
printf(" -l facility use specific syslog facility (default: %s)\n", defaults.log_facility);
printf(" -n channel number of memory channels used for DPDK\n");
printf(" -p core master (primary) core for DPDK\n");
printf(" -s size memory size in MB for DPDK (default: %dMB)\n", defaults.dpdk_mem_size);
printf(" -S dir directory where to create vhost sockets (default: pwd)\n");
spdk_tracelog_usage(stdout, "-t");
printf(" -h show this usage\n");
printf(" -d disable coredump file enabling\n");
printf(" -q disable notice level logging to stderr\n");
}
int
main(int argc, char *argv[])
{
struct spdk_app_opts opts = {};
char ch;
int rc;
const char *socket_path = NULL;
vhost_app_opts_init(&opts);
while ((ch = getopt(argc, argv, "c:de:l:m:p:qs:S:t:h")) != -1) {
switch (ch) {
case 'c':
opts.config_file = optarg;
break;
case 'd':
opts.enable_coredump = false;
break;
case 'e':
opts.tpoint_group_mask = optarg;
break;
case 'h':
usage(argv[0]);
exit(EXIT_SUCCESS);
case 'l':
opts.log_facility = optarg;
break;
case 'm':
opts.reactor_mask = optarg;
break;
case 'p':
opts.dpdk_master_core = strtoul(optarg, NULL, 10);
break;
case 'q':
spdk_g_notice_stderr_flag = 0;
break;
case 's':
opts.dpdk_mem_size = strtoul(optarg, NULL, 10);
break;
case 'S':
socket_path = optarg;
break;
case 't':
rc = spdk_log_set_trace_flag(optarg);
if (rc < 0) {
fprintf(stderr, "unknown flag\n");
usage(argv[0]);
exit(EXIT_FAILURE);
}
#ifndef DEBUG
fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -t flag.\n",
argv[0]);
usage(argv[0]);
exit(EXIT_FAILURE);
#endif
break;
default:
fprintf(stderr, "%s Unknown option '-%c'.\n", argv[0], ch);
usage(argv[0]);
exit(EXIT_FAILURE);
}
}
if (spdk_g_notice_stderr_flag == 1 &&
isatty(STDERR_FILENO) &&
!strncmp(ttyname(STDERR_FILENO), "/dev/tty", strlen("/dev/tty"))) {
printf("Warning: printing stderr to console terminal without -q option specified.\n");
printf("Suggest using -q to disable logging to stderr and monitor syslog, or\n");
printf("redirect stderr to a file.\n");
printf("(Delaying for 10 seconds...)\n");
sleep(10);
}
opts.shutdown_cb = spdk_vhost_shutdown_cb;
spdk_app_init(&opts);
/* Blocks until the application is exiting */
rc = spdk_app_start(spdk_vhost_startup, (void *)socket_path, NULL);
spdk_app_fini();
return rc;
}

View File

@ -137,6 +137,10 @@ timing_exit host
timing_exit nvmf
timing_enter vhost
run_test ./test/vhost/spdk_vhost.sh --integrity
timing_exit vhost
timing_enter cleanup
rbd_cleanup
./scripts/setup.sh reset

133
etc/spdk/vhost.conf.in Normal file
View File

@ -0,0 +1,133 @@
# SPDK vhost configuration file
#
# Please write all parameters using ASCII.
# The parameter must be quoted if it includes whitespace.
# Configuration syntax:
# Leading whitespace is ignored.
# Lines starting with '#' are comments.
# Lines ending with '\' are concatenated with the next line.
# Bracketed ([]) names define sections
[Global]
# Instance ID for multi-process support
# Default: 0
#InstanceID 0
# Users can restrict work items to only run on certain cores by
# specifying a ReactorMask. Default is to allow work items to run
# on core 0.
#ReactorMask 0xFFFF
# Tracepoint group mask for spdk trace buffers
# Default: 0x0 (all tracepoint groups disabled)
# Set to 0xFFFFFFFFFFFFFFFF to enable all tracepoint groups.
#TpointGroupMask 0x0
# syslog facility
LogFacility "local7"
[Rpc]
# Defines whether SPDK vhost will enable configuration via RPC.
# Default is disabled. Note that the RPC interface is not
# authenticated, so users should be careful about enabling
# RPC in non-trusted environments.
Enable No
# Listen address for the RPC service.
# May be an IP address or an absolute path to a Unix socket.
Listen 127.0.0.1
# Users may not want to use offload even it is available.
# Users may use the whitelist to initialize specified devices, IDS
# uses BUS:DEVICE.FUNCTION to identify each Ioat channel.
[Ioat]
Disable Yes
#Whitelist 00:04.0
#Whitelist 00:04.1
# Users must change this section to match the /dev/sdX devices to be
# exported as vhost scsi drives. The devices are accessed using Linux AIO.
[AIO]
#AIO /dev/sdb
#AIO /dev/sdc
# Users may change this section to create a different number or size of
# malloc LUNs.
# If the system has hardware DMA engine, it will use an IOAT
# (i.e. Crystal Beach DMA) channel to do the copy instead of memcpy.
# Of course, users can disable offload even it is available.
[Malloc]
# Number of Malloc targets
NumberOfLuns 3
# Malloc targets are 128M
LunSizeInMB 128
# Block size. Default is 512 bytes.
BlockSize 4096
# NVMe configuration options
[Nvme]
# NVMe Device Whitelist
# Users may specify which NVMe devices to claim by their PCI
# domain, bus, device, and function. The format is dddd:bb:dd.f, which is
# the same format displayed by lspci or in /sys/bus/pci/devices. The second
# argument is a "name" for the device that can be anything. The name
# is referenced later in the Subsystem section.
#
# Alternatively, the user can specify ClaimAllDevices. All
# NVMe devices will be claimed and named Nvme0, Nvme1, etc.
#BDF 0000:81:00.0 Nvme0
#BDF 0000:01:00.0 Nvme1
ClaimAllDevices
# The number of attempts per I/O when an I/O fails. Do not include
# this key to get the default behavior.
NvmeRetryCount 4
# The maximum number of NVMe controllers to claim. Do not include this key to
# claim all of them.
NumControllers 2
# Registers the application to receive timeout callback and to reset the controller.
ResetControllerOnTimeout Yes
# Timeout value.
NvmeTimeoutValue 30
# Set how often the admin queue is polled for asynchronous events.
# Units in microseconds.
AdminPollRate 100000
# The Split virtual block device slices block devices into multiple smaller bdevs.
[Split]
# Syntax:
# Split <bdev> <count> [<size_in_megabytes>]
#
# Split Nvme1n1 into two equally-sized portions, Nvme1n1p0 and Nvme1n1p1
#Split Nvme1n1 2
# Split Malloc2 into eight 1-megabyte portions, Malloc2p0 ... Malloc2p7,
# leaving the rest of the device inaccessible
#Split Malloc2 8 1
# Vhost scsi controller configuration
# Users should change the VhostScsi section(s) below to match the desired
# vhost configuration.
# Name is minimum required
[VhostScsi0]
# Define name for controller
Name vhost.0
# Assign devices from backend
# Use the first malloc device
Dev0 Malloc0
# Use the first AIO device
#Dev1 AIO0
# Use the frist Nvme device
#Dev2 Nvme0n1
# Use the third partition from second Nvme device
#Dev3 Nvme1n1p2
# Start the poller for this vhost controller on one of the cores in
# this cpumask. By default, it not specified, will use any core in the
# SPDK process.
#Cpumask 0x1
#[VhostScsi1]
# Name vhost.1
# Dev0 AIO1
# Cpumask 0x1

70
include/spdk/vhost.h Normal file
View File

@ -0,0 +1,70 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* SPDK vhost
*/
#ifndef SPDK_VHOST_H
#define SPDK_VHOST_H
#include "spdk/event.h"
#define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8
/**
* \param event event object. event arg1 is optional path to vhost socket.
*/
void spdk_vhost_startup(void *arg1, void *arg2);
void spdk_vhost_shutdown_cb(void);
/* Forward declaration */
struct spdk_vhost_scsi_ctrlr;
/**
* Get handle to next controller.
* \param prev Previous controller or NULL to get first one.
* \return handle to next controller ot NULL if prev was the last one.
*/
struct spdk_vhost_scsi_ctrlr *spdk_vhost_scsi_ctrlr_next(struct spdk_vhost_scsi_ctrlr *prev);
const char *spdk_vhost_scsi_ctrlr_get_name(struct spdk_vhost_scsi_ctrlr *ctrl);
uint64_t spdk_vhost_scsi_ctrlr_get_cpumask(struct spdk_vhost_scsi_ctrlr *ctrl);
int spdk_vhost_scsi_ctrlr_construct(const char *name, uint64_t cpumask);
int spdk_vhost_parse_core_mask(const char *mask, uint64_t *cpumask);
struct spdk_scsi_dev *spdk_vhost_scsi_ctrlr_get_dev(struct spdk_vhost_scsi_ctrlr *ctrl,
uint8_t num);
int spdk_vhost_scsi_ctrlr_add_dev(const char *name, unsigned scsi_dev_num, const char *lun_name);
#endif /* SPDK_VHOST_H */

View File

@ -37,7 +37,7 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
DIRS-y += bdev conf copy cunit event json jsonrpc \
log env_dpdk net rpc trace util nvme nvmf scsi ioat
ifeq ($(OS),Linux)
DIRS-y += iscsi
DIRS-y += iscsi vhost
endif

46
lib/vhost/Makefile Normal file
View File

@ -0,0 +1,46 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
CFLAGS += -Irte_vhost
CFLAGS += $(ENV_CFLAGS)
C_SRCS = task.c vhost.c vhost_rpc.c
LIBNAME = vhost
DIRS-y += rte_vhost
include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk

View File

@ -0,0 +1,44 @@
#
# BSD LICENSE
#
# Copyright (c) Intel Corporation.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
CFLAGS += $(ENV_CFLAGS)
# These are the DPDK vhost files copied (for now) into SPDK
C_SRCS += fd_man.c socket.c vhost_user.c virtio_net.c vhost.c
LIBNAME = rte_vhost
include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk

View File

@ -0,0 +1,299 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <rte_common.h>
#include <rte_log.h>
#include "fd_man.h"
/**
* Returns the index in the fdset for a given fd.
* If fd is -1, it means to search for a free entry.
* @return
* index for the fd, or -1 if fd isn't in the fdset.
*/
static int
fdset_find_fd(struct fdset *pfdset, int fd)
{
int i;
if (pfdset == NULL)
return -1;
for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++)
;
return i == MAX_FDS ? -1 : i;
}
static int
fdset_find_free_slot(struct fdset *pfdset)
{
return fdset_find_fd(pfdset, -1);
}
static int
fdset_add_fd(struct fdset *pfdset, int idx, int fd,
fd_cb rcb, fd_cb wcb, void *dat)
{
struct fdentry *pfdentry;
if (pfdset == NULL || idx >= MAX_FDS || fd >= FD_SETSIZE)
return -1;
pfdentry = &pfdset->fd[idx];
pfdentry->fd = fd;
pfdentry->rcb = rcb;
pfdentry->wcb = wcb;
pfdentry->dat = dat;
return 0;
}
/**
* Fill the read/write fd_set with the fds in the fdset.
* @return
* the maximum fds filled in the read/write fd_set.
*/
static int
fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
{
struct fdentry *pfdentry;
int i, maxfds = -1;
int num = MAX_FDS;
if (pfdset == NULL)
return -1;
for (i = 0; i < num; i++) {
pfdentry = &pfdset->fd[i];
if (pfdentry->fd != -1) {
int added = 0;
if (pfdentry->rcb && rfset) {
FD_SET(pfdentry->fd, rfset);
added = 1;
}
if (pfdentry->wcb && wfset) {
FD_SET(pfdentry->fd, wfset);
added = 1;
}
if (added)
maxfds = pfdentry->fd < maxfds ?
maxfds : pfdentry->fd;
}
}
return maxfds;
}
void
fdset_init(struct fdset *pfdset)
{
int i;
if (pfdset == NULL)
return;
for (i = 0; i < MAX_FDS; i++) {
pfdset->fd[i].fd = -1;
pfdset->fd[i].dat = NULL;
}
pfdset->num = 0;
}
/**
* Register the fd in the fdset with read/write handler and context.
*/
int
fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
{
int i;
if (pfdset == NULL || fd == -1)
return -1;
pthread_mutex_lock(&pfdset->fd_mutex);
/* Find a free slot in the list. */
i = fdset_find_free_slot(pfdset);
if (i == -1 || fdset_add_fd(pfdset, i, fd, rcb, wcb, dat) < 0) {
pthread_mutex_unlock(&pfdset->fd_mutex);
return -2;
}
pfdset->num++;
pthread_mutex_unlock(&pfdset->fd_mutex);
return 0;
}
/**
* Unregister the fd from the fdset.
* Returns context of a given fd or NULL.
*/
void *
fdset_del(struct fdset *pfdset, int fd)
{
int i;
void *dat = NULL;
if (pfdset == NULL || fd == -1)
return NULL;
do {
pthread_mutex_lock(&pfdset->fd_mutex);
i = fdset_find_fd(pfdset, fd);
if (i != -1 && pfdset->fd[i].busy == 0) {
/* busy indicates r/wcb is executing! */
dat = pfdset->fd[i].dat;
pfdset->fd[i].fd = -1;
pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
pfdset->fd[i].dat = NULL;
pfdset->num--;
i = -1;
}
pthread_mutex_unlock(&pfdset->fd_mutex);
} while (i != -1);
return dat;
}
/**
* Unregister the fd at the specified slot from the fdset.
*/
static void
fdset_del_slot(struct fdset *pfdset, int index)
{
if (pfdset == NULL || index < 0 || index >= MAX_FDS)
return;
pthread_mutex_lock(&pfdset->fd_mutex);
pfdset->fd[index].fd = -1;
pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL;
pfdset->fd[index].dat = NULL;
pfdset->num--;
pthread_mutex_unlock(&pfdset->fd_mutex);
}
/**
* This functions runs in infinite blocking loop until there is no fd in
* pfdset. It calls corresponding r/w handler if there is event on the fd.
*
* Before the callback is called, we set the flag to busy status; If other
* thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
* will wait until the flag is reset to zero(which indicates the callback is
* finished), then it could free the context after fdset_del.
*/
void
fdset_event_dispatch(struct fdset *pfdset)
{
fd_set rfds, wfds;
int i, maxfds;
struct fdentry *pfdentry;
int num = MAX_FDS;
fd_cb rcb, wcb;
void *dat;
int fd;
int remove1, remove2;
int ret;
if (pfdset == NULL)
return;
while (1) {
struct timeval tv;
tv.tv_sec = 1;
tv.tv_usec = 0;
FD_ZERO(&rfds);
FD_ZERO(&wfds);
pthread_mutex_lock(&pfdset->fd_mutex);
maxfds = fdset_fill(&rfds, &wfds, pfdset);
pthread_mutex_unlock(&pfdset->fd_mutex);
/*
* When select is blocked, other threads might unregister
* listenfds from and register new listenfds into fdset.
* When select returns, the entries for listenfds in the fdset
* might have been updated. It is ok if there is unwanted call
* for new listenfds.
*/
ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv);
if (ret <= 0)
continue;
for (i = 0; i < num; i++) {
remove1 = remove2 = 0;
pthread_mutex_lock(&pfdset->fd_mutex);
pfdentry = &pfdset->fd[i];
fd = pfdentry->fd;
rcb = pfdentry->rcb;
wcb = pfdentry->wcb;
dat = pfdentry->dat;
pfdentry->busy = 1;
pthread_mutex_unlock(&pfdset->fd_mutex);
if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb)
rcb(fd, dat, &remove1);
if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb)
wcb(fd, dat, &remove2);
pfdentry->busy = 0;
/*
* fdset_del needs to check busy flag.
* We don't allow fdset_del to be called in callback
* directly.
*/
/*
* When we are to clean up the fd from fdset,
* because the fd is closed in the cb,
* the old fd val could be reused by when creates new
* listen fd in another thread, we couldn't call
* fd_set_del.
*/
if (remove1 || remove2)
fdset_del_slot(pfdset, i);
}
}
}

View File

@ -0,0 +1,67 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _FD_MAN_H_
#define _FD_MAN_H_
#include <stdint.h>
#include <pthread.h>
#define MAX_FDS 1024
typedef void (*fd_cb)(int fd, void *dat, int *remove);
struct fdentry {
int fd; /* -1 indicates this entry is empty */
fd_cb rcb; /* callback when this fd is readable. */
fd_cb wcb; /* callback when this fd is writeable.*/
void *dat; /* fd context */
int busy; /* whether this entry is being used in cb. */
};
struct fdset {
struct fdentry fd[MAX_FDS];
pthread_mutex_t fd_mutex;
int num; /* current fd number of this fdset */
};
void fdset_init(struct fdset *pfdset);
int fdset_add(struct fdset *pfdset, int fd,
fd_cb rcb, fd_cb wcb, void *dat);
void *fdset_del(struct fdset *pfdset, int fd);
void fdset_event_dispatch(struct fdset *pfdset);
#endif

View File

@ -0,0 +1,193 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _VIRTIO_NET_H_
#define _VIRTIO_NET_H_
/**
* @file
* Interface to vhost net
*/
#include <stdint.h>
#include <linux/vhost.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_net.h>
#include <sys/eventfd.h>
#include <sys/socket.h>
#include <linux/if.h>
#include <rte_memory.h>
#include <rte_mempool.h>
#include <rte_ether.h>
#define RTE_VHOST_USER_CLIENT (1ULL << 0)
#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
/* Enum for virtqueue management. */
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
/**
* Device and vring operations.
*/
struct virtio_net_device_ops {
int (*new_device)(int vid); /**< Add device. */
void (*destroy_device)(int vid); /**< Remove device. */
int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
void *reserved[5]; /**< Reserved for future extension */
};
/**
* Disable features in feature_mask. Returns 0 on success.
*/
int rte_vhost_feature_disable(uint64_t feature_mask);
/**
* Enable features in feature_mask. Returns 0 on success.
*/
int rte_vhost_feature_enable(uint64_t feature_mask);
/* Returns currently supported vhost features */
uint64_t rte_vhost_feature_get(void);
int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
/**
* Register vhost driver. path could be different for multiple
* instance support.
*/
int rte_vhost_driver_register(const char *path, uint64_t flags);
/* Unregister vhost driver. This is only meaningful to vhost user. */
int rte_vhost_driver_unregister(const char *path);
/* Register callbacks. */
int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
/* Start vhost driver session blocking loop. */
int rte_vhost_driver_session_start(void);
/**
* Get the numa node from which the virtio net device's memory
* is allocated.
*
* @param vid
* virtio-net device ID
*
* @return
* The numa node, -1 on failure
*/
int rte_vhost_get_numa_node(int vid);
/**
* Get the number of queues the device supports.
*
* @param vid
* virtio-net device ID
*
* @return
* The number of queues, 0 on failure
*/
uint32_t rte_vhost_get_queue_num(int vid);
/**
* Get the virtio net device's ifname, which is the vhost-user socket
* file path.
*
* @param vid
* virtio-net device ID
* @param buf
* The buffer to stored the queried ifname
* @param len
* The length of buf
*
* @return
* 0 on success, -1 on failure
*/
int rte_vhost_get_ifname(int vid, char *buf, size_t len);
/**
* Get how many avail entries are left in the queue
*
* @param vid
* virtio-net device ID
* @param queue_id
* virtio queue index
*
* @return
* num of avail entires left
*/
uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
/**
* This function adds buffers to the virtio devices RX virtqueue. Buffers can
* be received from the physical port or from another virtual device. A packet
* count is returned to indicate the number of packets that were succesfully
* added to the RX queue.
* @param vid
* virtio-net device ID
* @param queue_id
* virtio queue index in mq case
* @param pkts
* array to contain packets to be enqueued
* @param count
* packets num to be enqueued
* @return
* num of packets enqueued
*/
uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
/**
* This function gets guest buffers from the virtio device TX virtqueue,
* construct host mbufs, copies guest buffer content to host mbufs and
* store them in pkts to be processed.
* @param vid
* virtio-net device
* @param queue_id
* virtio queue index in mq case
* @param mbuf_pool
* mbuf_pool where host mbuf is allocated.
* @param pkts
* array to contain packets to be dequeued
* @param count
* packets num to be dequeued
* @return
* num of packets dequeued
*/
uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
#endif /* _VIRTIO_NET_H_ */

View File

@ -0,0 +1,619 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdbool.h>
#include <limits.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/queue.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <rte_log.h>
#include "fd_man.h"
#include "vhost.h"
#include "vhost_user.h"
/*
* Every time rte_vhost_driver_register() is invoked, an associated
* vhost_user_socket struct will be created.
*/
struct vhost_user_socket {
char *path;
int listenfd;
int connfd;
bool is_server;
bool reconnect;
bool dequeue_zero_copy;
};
struct vhost_user_connection {
struct vhost_user_socket *vsocket;
int vid;
};
#define MAX_VHOST_SOCKET 1024
struct vhost_user {
struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
struct fdset fdset;
int vsocket_cnt;
pthread_mutex_t mutex;
};
#define MAX_VIRTIO_BACKLOG 128
static void vhost_user_server_new_connection(int fd, void *data, int *remove);
static void vhost_user_read_cb(int fd, void *dat, int *remove);
static int vhost_user_create_client(struct vhost_user_socket *vsocket);
static struct vhost_user vhost_user = {
.fdset = {
.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
.num = 0
},
.vsocket_cnt = 0,
.mutex = PTHREAD_MUTEX_INITIALIZER,
};
/* return bytes# of read on success or negative val on failure. */
int
read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
{
struct iovec iov;
struct msghdr msgh;
size_t fdsize = fd_num * sizeof(int);
char control[CMSG_SPACE(fdsize)];
struct cmsghdr *cmsg;
int ret;
memset(&msgh, 0, sizeof(msgh));
iov.iov_base = buf;
iov.iov_len = buflen;
msgh.msg_iov = &iov;
msgh.msg_iovlen = 1;
msgh.msg_control = control;
msgh.msg_controllen = sizeof(control);
ret = recvmsg(sockfd, &msgh, 0);
if (ret <= 0) {
RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
return ret;
}
if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
return -1;
}
for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
if ((cmsg->cmsg_level == SOL_SOCKET) &&
(cmsg->cmsg_type == SCM_RIGHTS)) {
memcpy(fds, CMSG_DATA(cmsg), fdsize);
break;
}
}
return ret;
}
int
send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
{
struct iovec iov;
struct msghdr msgh;
size_t fdsize = fd_num * sizeof(int);
char control[CMSG_SPACE(fdsize)];
struct cmsghdr *cmsg;
int ret;
memset(&msgh, 0, sizeof(msgh));
iov.iov_base = buf;
iov.iov_len = buflen;
msgh.msg_iov = &iov;
msgh.msg_iovlen = 1;
if (fds && fd_num > 0) {
msgh.msg_control = control;
msgh.msg_controllen = sizeof(control);
cmsg = CMSG_FIRSTHDR(&msgh);
if (cmsg == NULL) {
RTE_LOG(ERR, VHOST_CONFIG, "null cmsg\n");
return -1;
}
cmsg->cmsg_len = CMSG_LEN(fdsize);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
memcpy(CMSG_DATA(cmsg), fds, fdsize);
} else {
msgh.msg_control = NULL;
msgh.msg_controllen = 0;
}
do {
ret = sendmsg(sockfd, &msgh, 0);
} while (ret < 0 && errno == EINTR);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
return ret;
}
return ret;
}
static void
vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
{
int vid;
size_t size;
struct vhost_user_connection *conn;
int ret;
conn = malloc(sizeof(*conn));
if (conn == NULL) {
close(fd);
return;
}
vid = vhost_new_device();
if (vid == -1) {
close(fd);
free(conn);
return;
}
size = strnlen(vsocket->path, PATH_MAX);
vhost_set_ifname(vid, vsocket->path, size);
if (vsocket->dequeue_zero_copy)
vhost_enable_dequeue_zero_copy(vid);
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
vsocket->connfd = fd;
conn->vsocket = vsocket;
conn->vid = vid;
ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
NULL, conn);
if (ret < 0) {
vsocket->connfd = -1;
free(conn);
close(fd);
RTE_LOG(ERR, VHOST_CONFIG,
"failed to add fd %d into vhost server fdset\n",
fd);
}
}
/* call back when there is new vhost-user connection from client */
static void
vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
{
struct vhost_user_socket *vsocket = dat;
fd = accept(fd, NULL, NULL);
if (fd < 0)
return;
RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
vhost_user_add_connection(fd, vsocket);
}
static void
vhost_user_read_cb(int connfd, void *dat, int *remove)
{
struct vhost_user_connection *conn = dat;
struct vhost_user_socket *vsocket = conn->vsocket;
int ret;
ret = vhost_user_msg_handler(conn->vid, connfd);
if (ret < 0) {
vsocket->connfd = -1;
close(connfd);
*remove = 1;
vhost_destroy_device(conn->vid);
free(conn);
if (vsocket->reconnect)
vhost_user_create_client(vsocket);
}
}
static int
create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
{
int fd;
fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0)
return -1;
RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
is_server ? "server" : "client", fd);
if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
RTE_LOG(ERR, VHOST_CONFIG,
"vhost-user: can't set nonblocking mode for socket, fd: "
"%d (%s)\n", fd, strerror(errno));
close(fd);
return -1;
}
memset(un, 0, sizeof(*un));
un->sun_family = AF_UNIX;
strncpy(un->sun_path, path, sizeof(un->sun_path));
un->sun_path[sizeof(un->sun_path) - 1] = '\0';
return fd;
}
static int
vhost_user_create_server(struct vhost_user_socket *vsocket)
{
int fd;
int ret;
struct sockaddr_un un;
const char *path = vsocket->path;
fd = create_unix_socket(path, &un, vsocket->is_server);
if (fd < 0)
return -1;
ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"failed to bind to %s: %s; remove it and try again\n",
path, strerror(errno));
goto err;
}
RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
ret = listen(fd, MAX_VIRTIO_BACKLOG);
if (ret < 0)
goto err;
vsocket->listenfd = fd;
ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
NULL, vsocket);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"failed to add listen fd %d to vhost server fdset\n",
fd);
goto err;
}
return 0;
err:
close(fd);
return -1;
}
struct vhost_user_reconnect {
struct sockaddr_un un;
int fd;
struct vhost_user_socket *vsocket;
TAILQ_ENTRY(vhost_user_reconnect) next;
};
TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
struct vhost_user_reconnect_list {
struct vhost_user_reconnect_tailq_list head;
pthread_mutex_t mutex;
};
static struct vhost_user_reconnect_list reconn_list;
static pthread_t reconn_tid;
static int
vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
{
int ret, flags;
ret = connect(fd, un, sz);
if (ret < 0 && errno != EISCONN)
return -1;
flags = fcntl(fd, F_GETFL, 0);
if (flags < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"can't get flags for connfd %d\n", fd);
return -2;
}
if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
RTE_LOG(ERR, VHOST_CONFIG,
"can't disable nonblocking on fd %d\n", fd);
return -2;
}
return 0;
}
static void *
vhost_user_client_reconnect(void *arg __rte_unused)
{
int ret;
struct vhost_user_reconnect *reconn, *next;
while (1) {
pthread_mutex_lock(&reconn_list.mutex);
/*
* An equal implementation of TAILQ_FOREACH_SAFE,
* which does not exist on all platforms.
*/
for (reconn = TAILQ_FIRST(&reconn_list.head);
reconn != NULL; reconn = next) {
next = TAILQ_NEXT(reconn, next);
ret = vhost_user_connect_nonblock(reconn->fd,
(struct sockaddr *)&reconn->un,
sizeof(reconn->un));
if (ret == -2) {
close(reconn->fd);
RTE_LOG(ERR, VHOST_CONFIG,
"reconnection for fd %d failed\n",
reconn->fd);
goto remove_fd;
}
if (ret == -1)
continue;
RTE_LOG(INFO, VHOST_CONFIG,
"%s: connected\n", reconn->vsocket->path);
vhost_user_add_connection(reconn->fd, reconn->vsocket);
remove_fd:
TAILQ_REMOVE(&reconn_list.head, reconn, next);
free(reconn);
}
pthread_mutex_unlock(&reconn_list.mutex);
sleep(1);
}
return NULL;
}
static int
vhost_user_reconnect_init(void)
{
int ret;
pthread_mutex_init(&reconn_list.mutex, NULL);
TAILQ_INIT(&reconn_list.head);
ret = pthread_create(&reconn_tid, NULL,
vhost_user_client_reconnect, NULL);
if (ret < 0)
RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
return ret;
}
static int
vhost_user_create_client(struct vhost_user_socket *vsocket)
{
int fd;
int ret;
struct sockaddr_un un;
const char *path = vsocket->path;
struct vhost_user_reconnect *reconn;
fd = create_unix_socket(path, &un, vsocket->is_server);
if (fd < 0)
return -1;
ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
sizeof(un));
if (ret == 0) {
vhost_user_add_connection(fd, vsocket);
return 0;
}
RTE_LOG(ERR, VHOST_CONFIG,
"failed to connect to %s: %s\n",
path, strerror(errno));
if (ret == -2 || !vsocket->reconnect) {
close(fd);
return -1;
}
RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path);
reconn = malloc(sizeof(*reconn));
if (reconn == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"failed to allocate memory for reconnect\n");
close(fd);
return -1;
}
reconn->un = un;
reconn->fd = fd;
reconn->vsocket = vsocket;
pthread_mutex_lock(&reconn_list.mutex);
TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
pthread_mutex_unlock(&reconn_list.mutex);
return 0;
}
/*
* Register a new vhost-user socket; here we could act as server
* (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
* is set.
*/
int
rte_vhost_driver_register(const char *path, uint64_t flags)
{
int ret = -1;
struct vhost_user_socket *vsocket;
if (!path)
return -1;
pthread_mutex_lock(&vhost_user.mutex);
if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
RTE_LOG(ERR, VHOST_CONFIG,
"error: the number of vhost sockets reaches maximum\n");
goto out;
}
vsocket = malloc(sizeof(struct vhost_user_socket));
if (!vsocket)
goto out;
memset(vsocket, 0, sizeof(struct vhost_user_socket));
vsocket->path = strdup(path);
vsocket->connfd = -1;
vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
if (vsocket->reconnect && reconn_tid == 0) {
if (vhost_user_reconnect_init() < 0) {
free(vsocket->path);
free(vsocket);
goto out;
}
}
ret = vhost_user_create_client(vsocket);
} else {
vsocket->is_server = true;
ret = vhost_user_create_server(vsocket);
}
if (ret < 0) {
free(vsocket->path);
free(vsocket);
goto out;
}
vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
out:
pthread_mutex_unlock(&vhost_user.mutex);
return ret;
}
static bool
vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
{
int found = false;
struct vhost_user_reconnect *reconn, *next;
pthread_mutex_lock(&reconn_list.mutex);
for (reconn = TAILQ_FIRST(&reconn_list.head);
reconn != NULL; reconn = next) {
next = TAILQ_NEXT(reconn, next);
if (reconn->vsocket == vsocket) {
TAILQ_REMOVE(&reconn_list.head, reconn, next);
close(reconn->fd);
free(reconn);
found = true;
break;
}
}
pthread_mutex_unlock(&reconn_list.mutex);
return found;
}
/**
* Unregister the specified vhost socket
*/
int
rte_vhost_driver_unregister(const char *path)
{
int i;
int count;
struct vhost_user_connection *conn;
pthread_mutex_lock(&vhost_user.mutex);
for (i = 0; i < vhost_user.vsocket_cnt; i++) {
struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
if (!strcmp(vsocket->path, path)) {
if (vsocket->is_server) {
fdset_del(&vhost_user.fdset, vsocket->listenfd);
close(vsocket->listenfd);
unlink(path);
} else if (vsocket->reconnect) {
vhost_user_remove_reconnect(vsocket);
}
conn = fdset_del(&vhost_user.fdset, vsocket->connfd);
if (conn) {
RTE_LOG(INFO, VHOST_CONFIG,
"free connfd = %d for device '%s'\n",
vsocket->connfd, path);
close(vsocket->connfd);
vhost_destroy_device(conn->vid);
free(conn);
}
free(vsocket->path);
free(vsocket);
count = --vhost_user.vsocket_cnt;
vhost_user.vsockets[i] = vhost_user.vsockets[count];
vhost_user.vsockets[count] = NULL;
pthread_mutex_unlock(&vhost_user.mutex);
return 0;
}
}
pthread_mutex_unlock(&vhost_user.mutex);
return -1;
}
int
rte_vhost_driver_session_start(void)
{
fdset_event_dispatch(&vhost_user.fdset);
return 0;
}

429
lib/vhost/rte_vhost/vhost.c Normal file
View File

@ -0,0 +1,429 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/vhost.h>
#include <linux/virtio_net.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef RTE_LIBRTE_VHOST_NUMA
#include <numaif.h>
#endif
#include <rte_ethdev.h>
#include <rte_log.h>
#include <rte_string_fns.h>
#include <rte_memory.h>
#include <rte_malloc.h>
#include <rte_virtio_net.h>
#include "vhost.h"
#define VHOST_USER_F_PROTOCOL_FEATURES 30
/* Features supported by this lib. */
#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
(1ULL << VIRTIO_NET_F_CTRL_RX) | \
(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
(VHOST_SUPPORTS_MQ) | \
(1ULL << VIRTIO_F_VERSION_1) | \
(1ULL << VHOST_F_LOG_ALL) | \
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
(1ULL << VIRTIO_NET_F_HOST_TSO4) | \
(1ULL << VIRTIO_NET_F_HOST_TSO6) | \
(1ULL << VIRTIO_NET_F_CSUM) | \
(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
(1ULL << VIRTIO_NET_F_GUEST_TSO6))
uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
/* device ops to add/remove device to/from data core. */
struct virtio_net_device_ops const *notify_ops;
struct virtio_net *
get_device(int vid)
{
struct virtio_net *dev = vhost_devices[vid];
if (unlikely(!dev)) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) device not found.\n", vid);
}
return dev;
}
static void
cleanup_vq(struct vhost_virtqueue *vq, int destroy)
{
if ((vq->callfd >= 0) && (destroy != 0))
close(vq->callfd);
if (vq->kickfd >= 0)
close(vq->kickfd);
}
/*
* Unmap any memory, close any file descriptors and
* free any memory owned by a device.
*/
void
cleanup_device(struct virtio_net *dev, int destroy)
{
uint32_t i;
vhost_backend_cleanup(dev);
for (i = 0; i < dev->virt_qp_nb; i++) {
cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy);
cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy);
}
}
/*
* Release virtqueues and device memory.
*/
static void
free_device(struct virtio_net *dev)
{
uint32_t i;
struct vhost_virtqueue *rxq, *txq;
for (i = 0; i < dev->virt_qp_nb; i++) {
rxq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
txq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
rte_free(rxq->shadow_used_ring);
rte_free(txq->shadow_used_ring);
/* rxq and txq are allocated together as queue-pair */
rte_free(rxq);
}
rte_free(dev);
}
static void
init_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
{
memset(vq, 0, sizeof(struct vhost_virtqueue));
vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
/* Backends are set to -1 indicating an inactive device. */
vq->backend = -1;
/* always set the default vq pair to enabled */
if (qp_idx == 0)
vq->enabled = 1;
TAILQ_INIT(&vq->zmbuf_list);
}
static void
init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
{
uint32_t base_idx = qp_idx * VIRTIO_QNUM;
init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
}
static void
reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
{
int callfd;
callfd = vq->callfd;
init_vring_queue(vq, qp_idx);
vq->callfd = callfd;
}
static void
reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
{
uint32_t base_idx = qp_idx * VIRTIO_QNUM;
reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
}
int
alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
{
struct vhost_virtqueue *virtqueue = NULL;
uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
virtqueue = rte_malloc(NULL,
sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
if (virtqueue == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to allocate memory for virt qp:%d.\n", qp_idx);
return -1;
}
dev->virtqueue[virt_rx_q_idx] = virtqueue;
dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
init_vring_queue_pair(dev, qp_idx);
dev->virt_qp_nb += 1;
return 0;
}
/*
* Reset some variables in device structure, while keeping few
* others untouched, such as vid, ifname, virt_qp_nb: they
* should be same unless the device is removed.
*/
void
reset_device(struct virtio_net *dev)
{
uint32_t i;
dev->features = 0;
dev->protocol_features = 0;
dev->flags = 0;
for (i = 0; i < dev->virt_qp_nb; i++)
reset_vring_queue_pair(dev, i);
}
/*
* Invoked when there is a new vhost-user connection established (when
* there is a new virtio device being attached).
*/
int
vhost_new_device(void)
{
struct virtio_net *dev;
int i;
dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
if (dev == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to allocate memory for new dev.\n");
return -1;
}
for (i = 0; i < MAX_VHOST_DEVICE; i++) {
if (vhost_devices[i] == NULL)
break;
}
if (i == MAX_VHOST_DEVICE) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to find a free slot for new device.\n");
return -1;
}
vhost_devices[i] = dev;
dev->vid = i;
return i;
}
/*
* Invoked when there is the vhost-user connection is broken (when
* the virtio device is being detached).
*/
void
vhost_destroy_device(int vid)
{
struct virtio_net *dev = get_device(vid);
if (dev == NULL)
return;
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
notify_ops->destroy_device(vid);
}
cleanup_device(dev, 1);
free_device(dev);
vhost_devices[vid] = NULL;
}
void
vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
{
struct virtio_net *dev;
unsigned int len;
dev = get_device(vid);
if (dev == NULL)
return;
len = if_len > sizeof(dev->ifname) ?
sizeof(dev->ifname) : if_len;
strncpy(dev->ifname, if_name, len);
dev->ifname[sizeof(dev->ifname) - 1] = '\0';
}
void
vhost_enable_dequeue_zero_copy(int vid)
{
struct virtio_net *dev = get_device(vid);
if (dev == NULL)
return;
dev->dequeue_zero_copy = 1;
}
int
rte_vhost_get_numa_node(int vid)
{
#ifdef RTE_LIBRTE_VHOST_NUMA
struct virtio_net *dev = get_device(vid);
int numa_node;
int ret;
if (dev == NULL)
return -1;
ret = get_mempolicy(&numa_node, NULL, 0, dev,
MPOL_F_NODE | MPOL_F_ADDR);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to query numa node: %d\n", vid, ret);
return -1;
}
return numa_node;
#else
RTE_SET_USED(vid);
return -1;
#endif
}
uint32_t
rte_vhost_get_queue_num(int vid)
{
struct virtio_net *dev = get_device(vid);
if (dev == NULL)
return 0;
return dev->virt_qp_nb;
}
int
rte_vhost_get_ifname(int vid, char *buf, size_t len)
{
struct virtio_net *dev = get_device(vid);
if (dev == NULL)
return -1;
len = RTE_MIN(len, sizeof(dev->ifname));
strncpy(buf, dev->ifname, len);
buf[len - 1] = '\0';
return 0;
}
uint16_t
rte_vhost_avail_entries(int vid, uint16_t queue_id)
{
struct virtio_net *dev;
struct vhost_virtqueue *vq;
dev = get_device(vid);
if (!dev)
return 0;
vq = dev->virtqueue[queue_id];
if (!vq->enabled)
return 0;
return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
}
int
rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
{
struct virtio_net *dev = get_device(vid);
if (dev == NULL)
return -1;
if (enable) {
RTE_LOG(ERR, VHOST_CONFIG,
"guest notification isn't supported.\n");
return -1;
}
dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
return 0;
}
uint64_t rte_vhost_feature_get(void)
{
return VHOST_FEATURES;
}
int rte_vhost_feature_disable(uint64_t feature_mask)
{
VHOST_FEATURES = VHOST_FEATURES & ~feature_mask;
return 0;
}
int rte_vhost_feature_enable(uint64_t feature_mask)
{
if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) {
VHOST_FEATURES = VHOST_FEATURES | feature_mask;
return 0;
}
return -1;
}
/*
* Register ops so that we can add/remove device to data core.
*/
int
rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops)
{
notify_ops = ops;
return 0;
}

294
lib/vhost/rte_vhost/vhost.h Normal file
View File

@ -0,0 +1,294 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _VHOST_NET_CDEV_H_
#define _VHOST_NET_CDEV_H_
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <unistd.h>
#include <linux/vhost.h>
#include <rte_log.h>
#include "rte_virtio_net.h"
/* Used to indicate that the device is running on a data core */
#define VIRTIO_DEV_RUNNING 1
/* Backend value set by guest. */
#define VIRTIO_DEV_STOPPED -1
#define BUF_VECTOR_MAX 256
/**
* Structure contains buffer address, length and descriptor index
* from vring to do scatter RX.
*/
struct buf_vector {
uint64_t buf_addr;
uint32_t buf_len;
uint32_t desc_idx;
};
/*
* A structure to hold some fields needed in zero copy code path,
* mainly for associating an mbuf with the right desc_idx.
*/
struct zcopy_mbuf {
struct rte_mbuf *mbuf;
uint32_t desc_idx;
uint16_t in_use;
TAILQ_ENTRY(zcopy_mbuf) next;
};
TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf);
/**
* Structure contains variables relevant to RX/TX virtqueues.
*/
struct vhost_virtqueue {
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
uint32_t size;
uint16_t last_avail_idx;
uint16_t last_used_idx;
#define VIRTIO_INVALID_EVENTFD (-1)
#define VIRTIO_UNINITIALIZED_EVENTFD (-2)
/* Backend value to determine if device should started/stopped */
int backend;
/* Used to notify the guest (trigger interrupt) */
int callfd;
/* Currently unused as polling mode is enabled */
int kickfd;
int enabled;
/* Physical address of used ring, for logging */
uint64_t log_guest_addr;
uint16_t nr_zmbuf;
uint16_t zmbuf_size;
uint16_t last_zmbuf_idx;
struct zcopy_mbuf *zmbufs;
struct zcopy_mbuf_list zmbuf_list;
struct vring_used_elem *shadow_used_ring;
uint16_t shadow_used_idx;
} __rte_cache_aligned;
/* Old kernels have no such macro defined */
#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE
#define VIRTIO_NET_F_GUEST_ANNOUNCE 21
#endif
/*
* Make an extra wrapper for VIRTIO_NET_F_MQ and
* VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are
* introduced since kernel v3.8. This makes our
* code buildable for older kernel.
*/
#ifdef VIRTIO_NET_F_MQ
#define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
#define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ)
#else
#define VHOST_MAX_QUEUE_PAIRS 1
#define VHOST_SUPPORTS_MQ 0
#endif
/*
* Define virtio 1.0 for older kernels
*/
#ifndef VIRTIO_F_VERSION_1
#define VIRTIO_F_VERSION_1 32
#endif
struct guest_page {
uint64_t guest_phys_addr;
uint64_t host_phys_addr;
uint64_t size;
};
/**
* Device structure contains all configuration information relating
* to the device.
*/
struct virtio_net {
/* Frontend (QEMU) memory and memory region information */
struct virtio_memory *mem;
uint64_t features;
uint64_t protocol_features;
int vid;
uint32_t flags;
uint16_t vhost_hlen;
/* to tell if we need broadcast rarp packet */
rte_atomic16_t broadcast_rarp;
uint32_t virt_qp_nb;
uint32_t num_queues;
int dequeue_zero_copy;
struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
char ifname[IF_NAME_SZ];
uint64_t log_size;
uint64_t log_base;
uint64_t log_addr;
struct ether_addr mac;
uint32_t nr_guest_pages;
uint32_t max_guest_pages;
struct guest_page *guest_pages;
} __rte_cache_aligned;
/**
* Information relating to memory regions including offsets to
* addresses in QEMUs memory file.
*/
struct virtio_memory_region {
uint64_t guest_phys_addr;
uint64_t guest_user_addr;
uint64_t host_user_addr;
uint64_t size;
void *mmap_addr;
uint64_t mmap_size;
int fd;
};
/**
* Memory structure includes region and mapping information.
*/
struct virtio_memory {
uint32_t nregions;
struct virtio_memory_region regions[0];
};
/* Macros for printing using RTE_LOG */
#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
#ifdef RTE_LIBRTE_VHOST_DEBUG
#define VHOST_MAX_PRINT_BUFF 6072
#define LOG_LEVEL RTE_LOG_DEBUG
#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
#define PRINT_PACKET(device, addr, size, header) do { \
char *pkt_addr = (char *)(addr); \
unsigned int index; \
char packet[VHOST_MAX_PRINT_BUFF]; \
\
if ((header)) \
snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \
else \
snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \
for (index = 0; index < (size); index++) { \
snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
"%02hhx ", pkt_addr[index]); \
} \
snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
\
LOG_DEBUG(VHOST_DATA, "%s", packet); \
} while (0)
#else
#define LOG_LEVEL RTE_LOG_INFO
#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
#define PRINT_PACKET(device, addr, size, header) do {} while (0)
#endif
extern uint64_t VHOST_FEATURES;
#define MAX_VHOST_DEVICE 1024
extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
/* Convert guest physical Address to host virtual address */
static inline uint64_t __attribute__((always_inline))
gpa_to_vva(struct virtio_net *dev, uint64_t gpa)
{
struct virtio_memory_region *reg;
uint32_t i;
for (i = 0; i < dev->mem->nregions; i++) {
reg = &dev->mem->regions[i];
if (gpa >= reg->guest_phys_addr &&
gpa < reg->guest_phys_addr + reg->size) {
return gpa - reg->guest_phys_addr +
reg->host_user_addr;
}
}
return 0;
}
/* Convert guest physical address to host physical address */
static inline phys_addr_t __attribute__((always_inline))
gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
{
uint32_t i;
struct guest_page *page;
for (i = 0; i < dev->nr_guest_pages; i++) {
page = &dev->guest_pages[i];
if (gpa >= page->guest_phys_addr &&
gpa + size < page->guest_phys_addr + page->size) {
return gpa - page->guest_phys_addr +
page->host_phys_addr;
}
}
return 0;
}
extern struct virtio_net_device_ops const *notify_ops;
struct virtio_net *get_device(int vid);
int vhost_new_device(void);
void cleanup_device(struct virtio_net *dev, int destroy);
void reset_device(struct virtio_net *dev);
void vhost_destroy_device(int);
int alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx);
void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
void vhost_enable_dequeue_zero_copy(int vid);
/*
* Backend-specific cleanup.
*
* TODO: fix it; we have one backend now
*/
void vhost_backend_cleanup(struct virtio_net *dev);
#endif /* _VHOST_NET_CDEV_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,128 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _VHOST_NET_USER_H
#define _VHOST_NET_USER_H
#include <stdint.h>
#include <linux/vhost.h>
#include "rte_virtio_net.h"
/* refer to hw/virtio/vhost-user.c */
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_PROTOCOL_F_MQ 0
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
#define VHOST_USER_PROTOCOL_F_RARP 2
#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
(1ULL << VHOST_USER_PROTOCOL_F_RARP))
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
VHOST_USER_GET_FEATURES = 1,
VHOST_USER_SET_FEATURES = 2,
VHOST_USER_SET_OWNER = 3,
VHOST_USER_RESET_OWNER = 4,
VHOST_USER_SET_MEM_TABLE = 5,
VHOST_USER_SET_LOG_BASE = 6,
VHOST_USER_SET_LOG_FD = 7,
VHOST_USER_SET_VRING_NUM = 8,
VHOST_USER_SET_VRING_ADDR = 9,
VHOST_USER_SET_VRING_BASE = 10,
VHOST_USER_GET_VRING_BASE = 11,
VHOST_USER_SET_VRING_KICK = 12,
VHOST_USER_SET_VRING_CALL = 13,
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_GET_QUEUE_NUM = 17,
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_SEND_RARP = 19,
VHOST_USER_MAX
} VhostUserRequest;
typedef struct VhostUserMemoryRegion {
uint64_t guest_phys_addr;
uint64_t memory_size;
uint64_t userspace_addr;
uint64_t mmap_offset;
} VhostUserMemoryRegion;
typedef struct VhostUserMemory {
uint32_t nregions;
uint32_t padding;
VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;
typedef struct VhostUserLog {
uint64_t mmap_size;
uint64_t mmap_offset;
} VhostUserLog;
typedef struct VhostUserMsg {
VhostUserRequest request;
#define VHOST_USER_VERSION_MASK 0x3
#define VHOST_USER_REPLY_MASK (0x1 << 2)
uint32_t flags;
uint32_t size; /* the following payload size */
union {
#define VHOST_USER_VRING_IDX_MASK 0xff
#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
uint64_t u64;
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
VhostUserLog log;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
} __attribute((packed)) VhostUserMsg;
#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
/* The version of the protocol we support */
#define VHOST_USER_VERSION 0x1
/* vhost_user.c */
int vhost_user_msg_handler(int vid, int fd);
/* socket.c */
int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
#endif

File diff suppressed because it is too large Load Diff

162
lib/vhost/task.c Normal file
View File

@ -0,0 +1,162 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <assert.h>
#include <rte_config.h>
#include <rte_mempool.h>
#include "spdk_internal/log.h"
#include "spdk_internal/event.h"
#include "spdk/env.h"
#include "spdk/queue.h"
#include "task.h"
#undef container_of
#define container_of(ptr, type, member) ({ \
typeof(((type *)0)->member) *__mptr = (ptr); \
(type *)((char *)__mptr - offsetof(type, member)); })
typedef TAILQ_HEAD(, spdk_vhost_task) need_iovecs_tailq_t;
static struct rte_mempool *g_task_pool;
static struct rte_mempool *g_iov_buffer_pool;
need_iovecs_tailq_t g_need_iovecs[RTE_MAX_LCORE];
void
spdk_vhost_task_put(struct spdk_vhost_task *task)
{
assert(&task->scsi.iov == task->scsi.iovs);
assert(task->scsi.iovcnt == 1);
spdk_scsi_task_put(&task->scsi);
}
static void
spdk_vhost_task_free_cb(struct spdk_scsi_task *scsi_task)
{
struct spdk_vhost_task *task = container_of(scsi_task, struct spdk_vhost_task, scsi);
rte_mempool_put(g_task_pool, task);
}
struct spdk_vhost_task *
spdk_vhost_task_get(uint32_t *owner_task_ctr)
{
struct spdk_vhost_task *task;
int rc;
rc = rte_mempool_get(g_task_pool, (void **)&task);
if ((rc < 0) || !task) {
SPDK_ERRLOG("Unable to get task\n");
rte_panic("no memory\n");
}
memset(task, 0, sizeof(*task));
spdk_scsi_task_construct(&task->scsi, owner_task_ctr, NULL);
task->scsi.free_fn = spdk_vhost_task_free_cb;
return task;
}
void
spdk_vhost_enqueue_task(struct spdk_vhost_task *task)
{
need_iovecs_tailq_t *tailq = &g_need_iovecs[rte_lcore_id()];
TAILQ_INSERT_TAIL(tailq, task, iovecs_link);
}
struct spdk_vhost_task *
spdk_vhost_dequeue_task(void)
{
need_iovecs_tailq_t *tailq = &g_need_iovecs[rte_lcore_id()];
struct spdk_vhost_task *task;
if (TAILQ_EMPTY(tailq))
return NULL;
task = TAILQ_FIRST(tailq);
TAILQ_REMOVE(tailq, task, iovecs_link);
return task;
}
struct iovec *
spdk_vhost_iovec_alloc(void)
{
struct iovec *iov = NULL;
rte_mempool_get(g_iov_buffer_pool, (void **)&iov);
return iov;
}
void
spdk_vhost_iovec_free(struct iovec *iov)
{
rte_mempool_put(g_iov_buffer_pool, iov);
}
static int
spdk_vhost_subsystem_init(void)
{
g_task_pool = rte_mempool_create("vhost task pool", 16384, sizeof(struct spdk_vhost_task),
128, 0, NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
if (!g_task_pool) {
SPDK_ERRLOG("create task pool failed\n");
return -1;
}
g_iov_buffer_pool = rte_mempool_create("vhost iov buffer pool", 2048,
VHOST_SCSI_IOVS_LEN * sizeof(struct iovec),
128, 0, NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
if (!g_iov_buffer_pool) {
SPDK_ERRLOG("create iov buffer pool failed\n");
return -1;
}
for (int i = 0; i < RTE_MAX_LCORE; i++) {
TAILQ_INIT(&g_need_iovecs[i]);
}
return 0;
}
static int
spdk_vhost_subsystem_fini(void)
{
return 0;
}
SPDK_SUBSYSTEM_REGISTER(vhost, spdk_vhost_subsystem_init, spdk_vhost_subsystem_fini, NULL)
SPDK_SUBSYSTEM_DEPEND(vhost, scsi)

69
lib/vhost/task.h Normal file
View File

@ -0,0 +1,69 @@
/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SPDK_VHOST_TASK_H
#define SPDK_VHOST_TASK_H
#include "spdk/scsi.h"
/* Allocated iovec buffer len */
#define VHOST_SCSI_IOVS_LEN 128
struct spdk_vhost_task {
struct spdk_scsi_task scsi;
union {
struct virtio_scsi_cmd_resp *resp;
struct virtio_scsi_ctrl_tmf_resp *tmf_resp;
};
struct spdk_vhost_scsi_ctrlr *vdev;
struct spdk_scsi_dev *scsi_dev;
int req_idx;
struct vhost_virtqueue *vq;
TAILQ_ENTRY(spdk_vhost_task) iovecs_link;
};
void spdk_vhost_enqueue_task(struct spdk_vhost_task *task);
struct spdk_vhost_task *spdk_vhost_dequeue_task(void);
void spdk_vhost_task_put(struct spdk_vhost_task *task);
struct spdk_vhost_task *spdk_vhost_task_get(uint32_t *owner_task_ctr);
void spdk_vhost_iovec_free(struct iovec *iov);
struct iovec *spdk_vhost_iovec_alloc(void);
#endif /* SPDK_VHOST_TASK_H */

1161
lib/vhost/vhost.c Normal file

File diff suppressed because it is too large Load Diff

215
lib/vhost/vhost_rpc.c Normal file
View File

@ -0,0 +1,215 @@
/*-
* BSD LICENSE
*
* Copyright(c) Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <errno.h>
#include "spdk_internal/log.h"
#include "spdk/rpc.h"
#include "spdk/vhost.h"
#include "task.h"
static void
json_scsi_dev_write(struct spdk_json_write_ctx *ctx, struct spdk_scsi_dev *dev)
{
int l;
spdk_json_write_name(ctx, "id");
spdk_json_write_int32(ctx, (int32_t)dev->id);
spdk_json_write_name(ctx, "device_name");
spdk_json_write_string(ctx, dev->name);
spdk_json_write_name(ctx, "luns");
spdk_json_write_array_begin(ctx);
for (l = 0; l < dev->maxlun; l++) {
if (NULL == dev->lun[l])
continue;
spdk_json_write_object_begin(ctx);
spdk_json_write_name(ctx, "id");
spdk_json_write_int32(ctx, (int32_t)dev->lun[l]->id);
spdk_json_write_name(ctx, "name");
spdk_json_write_string(ctx, dev->lun[l]->name);
spdk_json_write_object_end(ctx);
}
spdk_json_write_array_end(ctx);
}
static void
spdk_rpc_get_vhost_scsi_controllers(struct spdk_jsonrpc_server_conn *conn,
const struct spdk_json_val *params,
const struct spdk_json_val *id)
{
struct spdk_json_write_ctx *w;
struct spdk_vhost_scsi_ctrlr *ctrlr = NULL;
struct spdk_scsi_dev *dev;
uint32_t i;
char buf[32];
if (params != NULL) {
spdk_jsonrpc_send_error_response(conn, id, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
"get_vhost_scsi_controllers requires no parameters");
return;
}
w = spdk_jsonrpc_begin_result(conn, id);
spdk_json_write_array_begin(w);
while ((ctrlr = spdk_vhost_scsi_ctrlr_next(ctrlr)) != NULL) {
spdk_json_write_object_begin(w);
spdk_json_write_name(w, "ctrlr");
spdk_json_write_string(w, spdk_vhost_scsi_ctrlr_get_name(ctrlr));
spdk_json_write_name(w, "cpu_mask");
snprintf(buf, sizeof(buf), "%#" PRIx64, spdk_vhost_scsi_ctrlr_get_cpumask(ctrlr));
spdk_json_write_string(w, buf);
spdk_json_write_name(w, "scsi_devs");
spdk_json_write_array_begin(w);
for (i = 0; i < SPDK_VHOST_SCSI_CTRLR_MAX_DEVS; i++) {
dev = spdk_vhost_scsi_ctrlr_get_dev(ctrlr, i);
if (!dev)
continue;
spdk_json_write_object_begin(w);
spdk_json_write_name(w, "scsi_dev_num");
spdk_json_write_uint32(w, i);
json_scsi_dev_write(w, dev);
spdk_json_write_object_end(w);
}
spdk_json_write_array_end(w); // devs
spdk_json_write_object_end(w); // ctrl
}
spdk_json_write_array_end(w);
spdk_jsonrpc_end_result(conn, w);
return;
}
SPDK_RPC_REGISTER("get_vhost_scsi_controllers", spdk_rpc_get_vhost_scsi_controllers)
struct rpc_vhost_scsi_ctrlr {
char *ctrlr;
char *cpumask;
};
static const struct spdk_json_object_decoder rpc_construct_vhost_ctrlr[] = {
{"ctrlr", offsetof(struct rpc_vhost_scsi_ctrlr, ctrlr), spdk_json_decode_string },
{"cpumask", offsetof(struct rpc_vhost_scsi_ctrlr, cpumask), spdk_json_decode_string, true},
};
static void
spdk_rpc_construct_vhost_scsi_controller(struct spdk_jsonrpc_server_conn *conn,
const struct spdk_json_val *params,
const struct spdk_json_val *id)
{
struct rpc_vhost_scsi_ctrlr req = {0};
struct spdk_json_write_ctx *w;
int rc;
uint64_t cpumask;
if (spdk_json_decode_object(params, rpc_construct_vhost_ctrlr,
sizeof(rpc_construct_vhost_ctrlr) / sizeof(*rpc_construct_vhost_ctrlr),
&req)) {
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "spdk_json_decode_object failed\n");
rc = -EINVAL;
goto invalid;
}
cpumask = spdk_app_get_core_mask();
if (req.cpumask != NULL && spdk_vhost_parse_core_mask(req.cpumask, &cpumask)) {
rc = -EINVAL;
goto invalid;
}
rc = spdk_vhost_scsi_ctrlr_construct(req.ctrlr, cpumask);
if (rc < 0) {
goto invalid;
}
w = spdk_jsonrpc_begin_result(conn, id);
spdk_json_write_bool(w, true);
spdk_jsonrpc_end_result(conn, w);
return;
invalid:
spdk_jsonrpc_send_error_response(conn, id, SPDK_JSONRPC_ERROR_INVALID_PARAMS, strerror(-rc));
}
SPDK_RPC_REGISTER("construct_vhost_scsi_controller", spdk_rpc_construct_vhost_scsi_controller)
struct rpc_add_vhost_scsi_ctrlr_lun {
char *ctrlr;
uint32_t scsi_dev_num;
char *lun_name;
};
static const struct spdk_json_object_decoder rpc_vhost_add_lun[] = {
{"ctrlr", offsetof(struct rpc_add_vhost_scsi_ctrlr_lun, ctrlr), spdk_json_decode_string },
{"scsi_dev_num", offsetof(struct rpc_add_vhost_scsi_ctrlr_lun, scsi_dev_num), spdk_json_decode_uint32},
{"lun_name", offsetof(struct rpc_add_vhost_scsi_ctrlr_lun, lun_name), spdk_json_decode_string },
};
static void
spdk_rpc_add_vhost_scsi_lun(struct spdk_jsonrpc_server_conn *conn,
const struct spdk_json_val *params,
const struct spdk_json_val *id)
{
struct rpc_add_vhost_scsi_ctrlr_lun req = {0};
struct spdk_json_write_ctx *w;
int rc;
if (spdk_json_decode_object(params, rpc_vhost_add_lun,
sizeof(rpc_vhost_add_lun) / sizeof(*rpc_vhost_add_lun),
&req)) {
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "spdk_json_decode_object failed\n");
rc = -EINVAL;
goto invalid;
}
rc = spdk_vhost_scsi_ctrlr_add_dev(req.ctrlr, req.scsi_dev_num, req.lun_name);
if (rc < 0) {
goto invalid;
}
w = spdk_jsonrpc_begin_result(conn, id);
spdk_json_write_bool(w, true);
spdk_jsonrpc_end_result(conn, w);
return;
invalid:
spdk_jsonrpc_send_error_response(conn, id, SPDK_JSONRPC_ERROR_INVALID_PARAMS, strerror(-rc));
}
SPDK_RPC_REGISTER("add_vhost_scsi_lun", spdk_rpc_add_vhost_scsi_lun)

View File

@ -35,12 +35,13 @@
# separately and wrapped in whole-archive linker args
SPDK_RPC_LIB_LIST = $(filter %_rpc,$(SPDK_LIB_LIST))
# Currently the iscsi, net, and scsi libraries contain their respective RPC methods
# Currently some libraries contain their respective RPC methods
# rather than breaking them out into separate libraries. So we must also include
# these directories in the RPC library list.
SPDK_RPC_LIB_LIST += $(filter iscsi,$(SPDK_LIB_LIST))
SPDK_RPC_LIB_LIST += $(filter net,$(SPDK_LIB_LIST))
SPDK_RPC_LIB_LIST += $(filter scsi,$(SPDK_LIB_LIST))
SPDK_RPC_LIB_LIST += $(filter vhost,$(SPDK_LIB_LIST))
SPDK_REMAINING_LIB_LIST = $(filter-out $(SPDK_RPC_LIB_LIST),$(SPDK_LIB_LIST))

View File

@ -12,9 +12,13 @@ if hash astyle; then
echo -n "Checking coding style..."
rm -f astyle.log
touch astyle.log
astyle --options=.astylerc "*.c" >> astyle.log
# Exclude rte_vhost code imported from DPDK - we want to keep the original code
# as-is to enable ongoing work to synch with a generic upstream DPDK vhost library,
# rather than making diffs more complicated by a lot of changes to follow SPDK
# coding standards.
astyle --options=.astylerc "*.c" --exclude="rte_vhost" >> astyle.log
astyle --options=.astylerc --exclude=test/cpp_headers "*.cpp" >> astyle.log
astyle --options=.astylerc "*.h" >> astyle.log
astyle --options=.astylerc "*.h" --exclude="rte_vhost" >> astyle.log
if grep -q "^Formatted" astyle.log; then
echo " errors detected"
git diff

View File

@ -417,15 +417,16 @@ p = subparsers.add_parser('get_vhost_scsi_controllers', help='List vhost control
p.set_defaults(func=get_vhost_scsi_controllers)
def construct_vhost_scsi_controller(args):
params = {
'ctrlr': args.ctrlr,
'cpumask': args.cpu_mask
}
params = {'ctrlr': args.ctrlr}
if args.cpumask:
params['cpumask'] = args.cpumask
jsonrpc_call('construct_vhost_scsi_controller', params)
p = subparsers.add_parser('construct_vhost_scsi_controller', help='Add new vhost controller')
p.add_argument('ctrlr', help='conntroller name')
p.add_argument('cpumask', help='cpu mask for this controller')
p.add_argument('ctrlr', help='controller name')
p.add_argument('--cpumask', help='cpu mask for this controller')
p.set_defaults(func=construct_vhost_scsi_controller)
def add_vhost_scsi_lun(args):

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
testdir=$(readlink -f $(dirname $0))
rootdir=$testdir/../../..
source $rootdir/scripts/autotest_common.sh
script='shopt -s nullglob; \
for entry in /sys/block/sd*; do \
disk_type="$(cat $entry/device/vendor)"; \
if [[ $disk_type == Intel* ]] || [[ $disk_type == RAWSCSI* ]] || [[ $disk_type == LIO-ORG* ]]; then \
fname=$(basename $entry); \
echo -n "$fname "; \
fi; \
done'
devs="$(echo "$script" | bash -s)"
timing_enter ext4test
trap "exit 1" SIGINT SIGTERM EXIT
for dev in $devs; do
mkfs.ext4 -F /dev/$dev
mkdir -p /mnt/${dev}dir
mount -o sync /dev/$dev /mnt/${dev}dir
rsync -qav --exclude=".git" $rootdir/ /mnt/${dev}dir/spdk
sleep 2
make -C /mnt/${dev}dir/spdk -j8 clean
make -C /mnt/${dev}dir/spdk -j8
# Print out space consumed on target device to help decide
# if/when we need to increase the size of the malloc LUN
df -h /dev/$dev
rm -rf /mnt/${dev}dir/spdk
done
for dev in $devs; do
umount /mnt/${dev}dir
rm -rf /mnt/${dev}dir
stats=( $(cat /sys/block/$dev/stat) )
echo ""
echo "$dev stats"
printf "READ IO cnt: % 8u merges: % 8u sectors: % 8u ticks: % 8u\n" \
${stats[0]} ${stats[1]} ${stats[2]} ${stats[3]}
printf "WRITE IO cnt: % 8u merges: % 8u sectors: % 8u ticks: % 8u\n" \
${stats[4]} ${stats[5]} ${stats[6]} ${stats[7]}
printf "in flight: % 8u io ticks: % 8u time in queue: % 8u\n" \
${stats[8]} ${stats[9]} ${stats[10]}
echo ""
done
trap - SIGINT SIGTERM EXIT
timing_exit ext4test

View File

@ -0,0 +1,97 @@
#!/usr/bin/env bash
testdir=$(readlink -f $(dirname $0))
rootdir=$testdir/../../..
source $rootdir/scripts/autotest_common.sh
if [ -z "$VM_IMG" ]; then
echo "VM_IMG: path to qcow2 image not provided - not running"
exit 1
fi
if [ -z "$VM_QEMU" ]; then
echo "VM_QEMU: path to qemu binary not provided - not running"
exit 1
fi
HOST_IP=192.168.122.1
VM_IP=192.168.122.254
VM_UNAME="root"
VM_PASS="root"
VM_NAME="ext4test_vm"
VM_NET_NAME="test_net"
VM_MAC="02:de:ad:de:ad:01"
VM_BAK_IMG="/tmp/ext4test_backing.img"
TIMEO=60
SSHCMD="sshpass -p $VM_PASS ssh"
SCPCMD="sshpass -p $VM_PASS scp"
function cleanup_virsh() {
virsh destroy $VM_NAME
virsh net-destroy $VM_NET_NAME
rm $VM_BAK_IMG
}
timing_enter ext4test
qemu-img create -f qcow2 -o backing_file=$VM_IMG $VM_BAK_IMG
cp $testdir/spdk_vm_base.xml $testdir/spdk_vm.xml
cp $testdir/spdk_vnet_base.xml $testdir/spdk_vnet.xml
sed -i "s@<name></name>@<name>$VM_NAME</name>@g" $testdir/spdk_vm.xml
sed -i "s@source file=''@source file='$VM_BAK_IMG'@g" $testdir/spdk_vm.xml
sed -i "s@<emulator></emulator>@<emulator>$VM_QEMU</emulator>@g" $testdir/spdk_vm.xml
sed -i "s@<name></name>@<name>$VM_NET_NAME</name>@g" $testdir/spdk_vnet.xml
trap "cleanup_virsh; killprocess $pid; exit 1" SIGINT SIGTERM EXIT
virsh net-create $testdir/spdk_vnet.xml
# Change directory and ownership because virsh has issues with
# paths that are in /root tree
cd /tmp
$rootdir/app/vhost/vhost -c $testdir/vhost.conf &
pid=$!
echo "Process pid: $pid"
sleep 10
chmod 777 /tmp/naa.123
tar --exclude '.git' --exclude 'spdk.tgz' --exclude '*.d' --exclude '*.o' -zcf /tmp/spdk_host.tgz $rootdir
virsh create $testdir/spdk_vm.xml
virsh net-update $VM_NET_NAME add ip-dhcp-host "<host mac='$VM_MAC' name='$VM_NAME' ip='$VM_IP'/>"
# Wait for VM to boot, disable trap temporarily
# so that we don't exit on first fail
echo "Trying to connect to virtual machine..."
trap - SIGINT SIGTERM EXIT
set +xe
rc=-1
while [[ $TIMEO -gt 0 && rc -ne 0 ]]; do
$SSHCMD root@$VM_IP -q -oStrictHostKeyChecking=no 'echo Hello'
rc=$?
((TIMEO-=1))
done
set -xe
trap "cleanup_virsh; killprocess $pid; exit 1" SIGINT SIGTERM EXIT
if [[ $TIMEO -eq 0 || rc -ne 0 ]]; then
echo "VM did not boot properly, exiting"
exit 1
fi
$SSHCMD root@$VM_IP 'mkdir -p /tmp/spdk'
$SCPCMD -r /tmp/spdk_host.tgz root@$VM_IP:/tmp/spdk
$SSHCMD root@$VM_IP 'cd /tmp/spdk; tar xf spdk_host.tgz'
$SSHCMD root@$VM_IP '/tmp/spdk/test/vhost/ext4test/ext4connect.sh'
#read -p "Hit enter to exit..."
trap - SIGINT SIGTERM EXIT
cleanup_virsh
rm $testdir/spdk_vm.xml
rm $testdir/spdk_vnet.xml
killprocess $pid
timing_exit ext4test

View File

@ -0,0 +1,69 @@
<?xml version="1.0"?>
<domain xmlns:qemu="http://libvirt.org/schemas/domain/qemu/1.0" type="kvm">
<name/>
<memory unit="GiB">2</memory>
<currentMemory unit="GiB">2</currentMemory>
<vcpu placement="static">4</vcpu>
<os>
<type arch="x86_64" machine="pc-i440fx-1.6">hvm</type>
<boot dev="hd"/>
</os>
<features>
<acpi/>
<apic/>
<pae/>
</features>
<cpu mode="host-model">
<model fallback="allow"/>
</cpu>
<clock offset="utc"/>
<on_poweroff>destroy</on_poweroff>
<on_reboot>restart</on_reboot>
<on_crash>destroy</on_crash>
<devices>
<emulator/>
<disk type="file" device="disk">
<driver name="qemu" type="qcow2"/>
<source file=""/>
<backingStore/>
<target dev="hda" bus="ide"/>
<address type="drive" domain="0" bus="0" slot="0" function="0"/>
</disk>
<controller type="usb" index="0">
<address type="pci" domain="0x0000" bus="0x00" slot="0x01" function="0x2"/>
</controller>
<controller type="pci" index="0" model="pci-root"/>
<interface type="network">
<mac address="02:de:ad:de:ad:01"/>
<source network="test_net"/>
<model type="virtio"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x03" function="0x0"/>
</interface>
<serial type="pty">
<target port="0"/>
</serial>
<console type="pty">
<target type="serial" port="0"/>
</console>
<input type="mouse" bus="ps2"/>
<input type="keyboard" bus="ps2"/>
<graphics type="vnc" port="-1" autoport="yes"/>
<video>
<model type="cirrus" vram="16384" heads="1"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x0"/>
</video>
<memballoon model="virtio">
<address type="pci" domain="0x0000" bus="0x00" slot="0x05" function="0x0"/>
</memballoon>
</devices>
<qemu:commandline>
<qemu:arg value="-object"/>
<qemu:arg value="memory-backend-file,id=mem,size=2048M,mem-path=/mnt/huge,share=on"/>
<qemu:arg value="-numa"/>
<qemu:arg value="node,memdev=mem"/>
<qemu:arg value="-chardev"/>
<qemu:arg value="socket,id=char0,path=/tmp/naa.123"/>
<qemu:arg value="-device"/>
<qemu:arg value="vhost-scsi-pci,id=scsi0,wwpn=naa.123,user=true,chardev=char0"/>
</qemu:commandline>
</domain>

View File

@ -0,0 +1,11 @@
<?xml version="1.0"?>
<network>
<name/>
<bridge name="virbr123"/>
<forward/>
<ip address="192.168.122.1" netmask="255.255.255.0">
<dhcp>
<range end="192.168.122.254" start="192.168.122.2"/>
</dhcp>
</ip>
</network>

View File

@ -0,0 +1,47 @@
# spdk configuration file
#
# Please write all parameters using ASCII.
# The parameter must be quoted if it includes whitespace.
#
# Configuration syntax:
# Spaces at head of line are deleted, other spaces are as separator
# Lines starting with '#' are comments and not evaluated.
# Lines ending with '\' are concatenated with the next line.
# Bracketed keys are section keys grouping the following value keys.
# Number of section key is used as a tag number.
# Ex. [TargetNode1] = TargetNode section key with tag number 1
[Global]
# Users can restrict work items to only run on certain cores by
# specifying a WorkerMask. Default is to allow work items to run
# on all cores.
#WorkerMask 0xFFFF
# Event mask for ids history buffers
# Default: 0x0 (all events disabled)
# Set to 0xFFFFFFFFFFFFFFFF to enable all events.
#EventMask 0x0
# syslog facility
LogFacility "local7"
[Rpc]
# Defines whether ids will enable configuration via RPC.
# Default is disabled. Note that the RPC interface is not
# authenticated, so users should be careful about enabling
# RPC in non-trusted environments.
Enable Yes
[Ioat]
Disable Yes
[Malloc]
NumberOfLuns 1
LunSizeInMb 512
[Nvme]
UnbindFromKernel Yes
[VhostScsi0]
Name naa.123
Dev0 Nvme0n1
Dev1 Malloc0

85
test/vhost/fiotest/README Normal file
View File

@ -0,0 +1,85 @@
Overview
---
Utility scripts for automated FIO tests of virtual machines.
Virtualization is done using QEMU software.
Requirements
---
- 'fio' and 'perf' packages must be installed in order for tests to run
- Installed fio version must be the same as fio installed on qemu guest
systems. Another solution is to provide the path to a FIO binary
in arguments for testing scripts. If fio versions are different tests will not run.
- All dependency packages for building QEMU.
- QEMU source package. By default it is expected to be in the "qemu" directory in the
root dir of the main spdk directory.
- a qemu-compatible VM image.
- RSA key for VM SSH access in $HOME/.ssh/spdk_vhost_id_rsa or in a different
directory specified by $SPDK_VHOST_SSH_KEY_FILE global variable.
Files:
---
common.sh
Header file to be included in other files.
autotest.sh
Script to perform automated fio test with given number of virtual machines
and given scenario type (virtio / kernel vhost / spdk vhost).
Can run an end-to-end test or with "--dry-run" option can just enable
virtual machines and leave them for user's manual tests.
run_vhost.sh
Run single instance of vhost application. Useful during development.
See 'run_vhost.sh --help'
run_fio.py
Script used to run fio utility on group of virtual machines
using default configuration or with parameters specified for
autotest.sh execution.
Script can also be executed with manually input parameters, resulting
in launching multiple fio jobs which are then combinations of all
parameters.
See 'python run_fio.py --help'
vm_setup.sh
Utility script used to create a virtual machine
with spcified disk/block device and cache type for tests.
Useful during development.
See 'vm_setup.sh --help'
vm_run.sh
Utility script used to enable selected virtual machines.
Can enable all or specific virtual machines from directory.
Before running this script make sure that there was at least 1
virtual machine created using vm_setup.sh script.
Useful during development.
See 'vm_run.sh --help'
vm_shutdown.sh
Utility script used to shut down all or specific virtual machines
if any remain active after test run.
Useful during development.
See 'vm_shutdown.sh --help'
vm_ssh.sh
Utility script used to connect to specific virtual machine via ssh.
Useful during development.
See 'vm_ssh.sh --help'
Examples:
---
--- Example 1, simple run:
In spdk directory execute:
./test/vhost/autotest.sh --vm=0,<path to VM image>,<device> --fio-bin=<path to fio bin>
<device> - backend used for testing, e.g. Malloc0, Nvme0n1...
<path to fio bin> - path to FIO binary
This runs tests for 1 VM using spdk vhost.
By default all jobs defined in test/vhost/fiotest/fio_jobs/ are executed sequentially.
--- Example 2, multiple VMs:
./test/vhost/autotest.sh --vm=0,<path to VM image>,<device> --vm=1,<path to VM image 2>,<device 2>
Same configuration as Example 1 but fio runs in parallel on 2 VMs

View File

@ -0,0 +1,5 @@
vhost_reactor_mask=0x1
vhost_master_core=0
qemu_mask=0x2
qemu_numa_node=0

257
test/vhost/fiotest/autotest.sh Executable file
View File

@ -0,0 +1,257 @@
#!/usr/bin/env bash
set -e
BASE_DIR=$(readlink -f $(dirname $0))
[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
dry_run=false
no_shutdown=false
fio_bin="fio"
fio_jobs="$BASE_DIR/fio_jobs/"
test_type=spdk_vhost
reuse_vms=false
force_build=false
vms=()
used_vms=""
disk_split=""
x=""
max_sectors_kb=4
function usage()
{
[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
echo "Shortcut script for doing automated test"
echo "Usage: $(basename $1) [OPTIONS]"
echo
echo "-h, --help print help and exit"
echo " --test-type=TYPE Perform specified test:"
echo " virtio - test host virtio-scsi-pci using file as disk image"
echo " kernel_vhost - use kernel driver vhost-scsi"
echo " spdk_vhost - use spdk vhost"
echo "-x set -x for script debug"
echo " --fio-bin=FIO Use specific fio binary (will be uploaded to VM)"
echo " --qemu-src=QEMU_DIR Location of the QEMU sources"
echo " --dpdk-src=DPDK_DIR Location of the DPDK sources"
echo " --fio-jobs= Fio configs to use for tests. Can point to a directory or"
echo " can point to a directory with regex mask, example: ./dir/*.job"
echo " All VMs will run the same fio job when FIO executes."
echo " (no unique jobs for specific VMs)"
echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: $TEST_DIR]"
echo " --dry-run Don't perform any tests, run only and wait for enter to terminate"
echo " --no-shutdown Don't shutdown at the end but leave envirionment working"
echo " --force-build Force SPDK rebuild with the specified DPDK path."
echo " --vm=NUM[,OS][,DISKS] VM configuration. This parameter might be used more than once:"
echo " NUM - VM number (mandatory)"
echo " OS - VM os disk path (optional)"
echo " DISKS - VM os test disks/devices path (virtio - optional, kernel_vhost - mandatory)"
echo " --disk-split By default all test types execute fio jobs on all disks which are available on guest"
echo " system. Use this option if only some of the disks should be used for testing."
echo " Example: --disk-split=4,1-3 will result in VM 1 using it's first disk (ex. /dev/sda)"
echo " and VM 2 using it's disks 1-3 (ex. /dev/sdb, /dev/sdc, /dev/sdd)"
echo " --max-sectors=NUM Set max_sectors_kb for test disk to NUM (default: $max_sectors_kb)"
exit 0
}
#default raw file is NVMe drive
while getopts 'xh-:' optchar; do
case "$optchar" in
-)
case "$OPTARG" in
help) usage $0 ;;
work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
fio-bin=*) fio_bin="--fio-bin=${OPTARG#*=}" ;;
qemu-src=*) QEMU_SRC_DIR="${OPTARG#*=}" ;;
dpdk-src=*) DPDK_SRC_DIR="${OPTARG#*=}" ;;
fio-jobs=*) fio_jobs="${OPTARG#*=}" ;;
dry-run) dry_run=true ;;
no-shutdown) no_shutdown=true ;;
test-type=*) test_type="${OPTARG#*=}" ;;
force-build) force_build=true ;;
vm=*) vms+=("${OPTARG#*=}") ;;
disk-split=*) disk_split="${OPTARG#*=}" ;;
max-sectors=*) max_sectors_kb="${OPTARG#*=}" ;;
*) usage $0 "Invalid argument '$OPTARG'" ;;
esac
;;
h) usage $0 ;;
x) set -x
x="-x" ;;
*) usage $0 "Invalid argument '$OPTARG'"
esac
done
shift $(( OPTIND - 1 ))
if [[ -d "$fio_jobs" ]]; then
fio_jobs="$fio_jobs/*.job"
fi
. $BASE_DIR/common.sh
trap 'error_exit "${FUNCNAME}" "${LINENO}"' ERR
echo "==============="
echo "INFO: checking qemu"
if [[ ! -x $INSTALL_DIR/bin/qemu-system-x86_64 ]]; then
echo "INFO: can't find $INSTALL_DIR/bin/qemu-system-x86_64 - building and installing"
if [[ ! -d $QEMU_SRC_DIR ]]; then
echo "ERROR: Cannot find qemu source in $QEMU_SRC_DIR"
exit 1
else
echo "INFO: qemu source exists $QEMU_SRC_DIR - building"
qemu_build_and_install
fi
fi
echo "==============="
echo ""
echo "INFO: checking spdk"
echo ""
if [[ ! -x $SPDK_BUILD_DIR/app/vhost/vhost ]] || $force_build ; then
echo "INFO: $SPDK_BUILD_DIR/app/vhost/vhost - building and installing"
spdk_build_and_install
fi
vm_kill_all
if [[ $test_type == "spdk_vhost" ]]; then
echo "==============="
echo ""
echo "INFO: running SPDK"
echo ""
$BASE_DIR/run_vhost.sh $x --work-dir=$TEST_DIR
echo
fi
echo "==============="
echo ""
echo "Setting up VM"
echo ""
rpc_py="python $SPDK_BUILD_DIR/scripts/rpc.py "
rpc_py+="-s 127.0.0.1 "
for vm_conf in ${vms[@]}; do
IFS=',' read -ra conf <<< "$vm_conf"
setup_cmd="$BASE_DIR/vm_setup.sh $x --work-dir=$TEST_DIR --test-type=$test_type"
if [[ x"${conf[0]}" == x"" ]] || ! assert_number ${conf[0]}; then
echo "ERROR: invalid VM configuration syntax $vm_conf"
exit 1;
fi
# Sanity check if VM is not defined twice
for vm_num in $used_vms; do
if [[ $vm_num -eq ${conf[0]} ]]; then
echo "ERROR: VM$vm_num defined more than twice ( $(printf "'%s' " "${vms[@]}"))!"
exit 1
fi
done
setup_cmd+=" -f ${conf[0]}"
used_vms+=" ${conf[0]}"
[[ x"${conf[1]}" != x"" ]] && setup_cmd+=" --os=${conf[1]}"
[[ x"${conf[2]}" != x"" ]] && setup_cmd+=" --disk=${conf[2]}"
if [[ $test_type == "spdk_vhost" ]]; then
echo "INFO: Adding device via RPC ..."
echo ""
eval $(grep "^vhost_reactor_mask=" $BASE_DIR/autotest.config)
while IFS=':' read -ra disks; do
for disk in "${disks[@]}"; do
$rpc_py construct_vhost_scsi_controller naa.$disk.${conf[0]} \
--cpumask $vhost_reactor_mask
$rpc_py add_vhost_scsi_lun naa.$disk.${conf[0]} 0 $disk
done
done <<< "${conf[2]}"
unset IFS;
$rpc_py get_vhost_scsi_controllers
fi
$setup_cmd
done
# Run everything
$BASE_DIR/vm_run.sh $x --work-dir=$TEST_DIR $used_vms
vm_wait_for_boot 600 $used_vms
echo "==============="
echo ""
echo "INFO: Testing..."
echo "INFO: Running fio jobs ..."
run_fio="python $BASE_DIR/run_fio.py "
run_fio+="$fio_bin "
run_fio+="--job-file="
for job in $fio_jobs; do
run_fio+="$job,"
done
run_fio="${run_fio::-1}"
run_fio+=" "
run_fio+="--out=$TEST_DIR "
if [[ ! $disk_split == '' ]]; then
run_fio+="--split-disks=$disk_split "
fi
# Check if all VM have disk in tha same location
DISK=""
for vm_num in $used_vms; do
vm_dir=$VM_BASE_DIR/$vm_num
host_name="VM-$vm_num-$(cat $BASE_DIR/autotest.config|grep qemu_mask|awk -F'=' '{print $2}'|sed "$(($vm_num+1))q;d")"
echo "INFO: Setting up hostname: $host_name"
vm_ssh $vm_num "hostname $host_name"
vm_start_fio_server $fio_bin $readonly $vm_num
vm_check_scsi_location $vm_num
SCSI_DISK="${SCSI_DISK::-1}"
for DISK in $SCSI_DISK; do
echo "INFO: VM$vm_num Setting max_sectors_kb=$max_sectors_kb on disk $DISK"
echo ""
vm_ssh $vm_num "echo $max_sectors_kb > /sys/block/$DISK/queue/max_sectors_kb"
done
vm_reset_scsi_devices $vm_num $SCSI_DISK
run_fio+="127.0.0.1:$(cat $vm_dir/fio_socket):"
for disk in $SCSI_DISK; do
run_fio+="$disk:"
done
run_fio="${run_fio::-1}"
run_fio+=","
done
run_fio="${run_fio%,}"
run_fio+=" "
run_fio="${run_fio::-1}"
echo -e "$run_fio"
if $dry_run; then
read -p "Enter to kill evething" xx
sleep 3
at_app_exit
exit 0
fi
$run_fio
for vm_num in $used_vms; do
vm_reset_scsi_devices $vm_num $SCSI_DISK
done
if ! $no_shutdown; then
echo "==============="
echo "INFO: Testing done -> shutting down"
at_app_exit
echo "==============="
else
echo "==============="
echo
echo "INFO: Leaving environment working!"
echo ""
echo "==============="
fi

View File

@ -0,0 +1,756 @@
set -e
BASE_DIR=$(readlink -f $(dirname $0))
MAKE="make -j$(( $(nproc) * 2 ))"
# Default running dir -> spdk/..
[[ -z "$TEST_DIR" ]] && TEST_DIR=$BASE_DIR/../../../../
TEST_DIR="$(mkdir -p $TEST_DIR && cd $TEST_DIR && echo $PWD)"
SPDK_SRC_DIR=$TEST_DIR/spdk
SPDK_BUILD_DIR=$BASE_DIR/../../../
SPDK_VHOST_SCSI_TEST_DIR=$TEST_DIR/vhost
# QEMU source and build folders
[[ -z "$QEMU_SRC_DIR" ]] && QEMU_SRC_DIR="$TEST_DIR/qemu"
QEMU_BUILD_DIR="$QEMU_SRC_DIR/build"
# DPDK source and build folders
[[ -z "$DPDK_SRC_DIR" ]] && DPDK_SRC_DIR="$TEST_DIR/dpdk"
# SSH key file
[[ -z "$SPDK_VHOST_SSH_KEY_FILE" ]] && SPDK_VHOST_SSH_KEY_FILE="$HOME/.ssh/spdk_vhost_id_rsa"
if [[ ! -e "$SPDK_VHOST_SSH_KEY_FILE" ]]; then
echo "Could not find SSH key file $SPDK_VHOST_SSH_KEY_FILE"
exit 1
fi
echo "Using SSH key file $SPDK_VHOST_SSH_KEY_FILE"
VM_CNT=0
VM_BASE_DIR="$TEST_DIR/vms"
INSTALL_DIR="$TEST_DIR/root"
mkdir -p $TEST_DIR
###
# Building functions
###
function error()
{
echo "==========="
echo -e "ERROR: $@"
echo "==========="
return 1
}
# Build QEMU from $QEMU_SRC_DIR directory in $QEMU_BUILD_DIR and install in $INSTALL_DIR
#
# NOTE: It will use CCACHE if detected.
# FIXME: quiet configuration an build
#
function qemu_build_and_install()
{
mkdir -p $QEMU_BUILD_DIR
cd $QEMU_BUILD_DIR
echo "INFO: Configuring QEMU from source in $QEMU_SRC_DIR"
if type ccache > /dev/null 2>&1; then
echo "INFO: CCACHE detected"
export CC="ccache cc"
export CXX="ccache c++"
export CPP="ccache cpp"
else
echo "INFO: CCACHE NOT detected - consider installing."
fi
$QEMU_SRC_DIR/configure --prefix=$INSTALL_DIR \
--target-list="x86_64-softmmu" \
--enable-kvm --enable-linux-aio --enable-numa
echo "INFO: Compiling and installing QEMU in $INSTALL_DIR"
$MAKE install
echo "INFO: DONE"
}
# Build SPDK using $SPDK_SRC as source directory.
function spdk_build_and_install()
{
echo "INFO: Building SPDK"
echo "checking dependencies..."
case `uname` in
FreeBSD)
local dpdk_target=x86_64-native-bsdapp-clang
;;
Linux)
local dpdk_target=x86_64-native-linuxapp-gcc
;;
*)
echo "Unknown OS in $0"
exit 1
;;
esac
if [[ ! -x $DPDK_SRC_DIR/$dpdk_target ]]; then
echo "ERROR: can't find $DPDK_SRC_DIR/$dpdk_target"
exit 1
fi
cd $SPDK_BUILD_DIR
$MAKE clean
$MAKE DPDK_DIR=$DPDK_SRC_DIR
echo "INFO: DONE"
}
function spdk_vhost_run()
{
local vhost_app="$SPDK_BUILD_DIR/app/vhost/vhost"
local vhost_log_file="$SPDK_VHOST_SCSI_TEST_DIR/vhost.log"
local vhost_pid_file="$SPDK_VHOST_SCSI_TEST_DIR/vhost.pid"
local vhost_socket="$SPDK_VHOST_SCSI_TEST_DIR/usvhost"
local vhost_conf_file="$BASE_DIR/vhost.conf"
echo "INFO: starting vhost app in background"
[[ -r "$vhost_pid_file" ]] && spdk_vhost_kill
[[ -d $SPDK_VHOST_SCSI_TEST_DIR ]] && rm -f $SPDK_VHOST_SCSI_TEST_DIR/*
mkdir -p $SPDK_VHOST_SCSI_TEST_DIR
if [[ ! -x $vhost_app ]]; then
error "application not found: $vhost_app"
return 1
fi
local cmd="$vhost_app -m $(cat $BASE_DIR/autotest.config|grep vhost_reactor_mask|awk -F'=' '{print $2}') \
-p $(cat $BASE_DIR/autotest.config|grep vhost_master_core|awk -F'=' '{print $2}') \
-c $vhost_conf_file"
echo "INFO: Loging to: $vhost_log_file"
echo "INFO: Config file: $vhost_conf_file"
echo "INFO: Socket: $vhost_socket"
echo "INFO: Command: $cmd"
( cd $SPDK_VHOST_SCSI_TEST_DIR; $cmd & echo $! >&3) 3>$vhost_pid_file 2>&1 | tee -a $vhost_log_file &
echo "INFO: waiting 25s to allow app to run..."
sleep 25
kill -0 $(cat $vhost_pid_file)
echo "INFO: vhost started - pid=$(cat $vhost_pid_file)"
}
function spdk_vhost_kill()
{
local vhost_pid_file="$SPDK_VHOST_SCSI_TEST_DIR/vhost.pid"
if [[ ! -r $vhost_pid_file ]]; then
echo "WARN: no vhost pid file found"
return 0
fi
local vhost_pid="$(cat $vhost_pid_file)"
echo "INFO: killing vhost (PID $vhost_pid) app"
if /bin/kill -INT $vhost_pid >/dev/null; then
echo "INFO: vhost app killed - waiting to exit"
while /bin/kill -0 $vhost_pid; do
echo "."
sleep 1
done
elif /bin/kill -0 $vhost_pid; then
error "vhost NOT killed - you need to kill it manually"
return 1
else
echo "INFO: vhost was no running"
fi
rm $vhost_pid_file
}
###
# Mgmt functions
###
function assert_number()
{
[[ "$1" =~ [0-9]+ ]] && return 0
echo "${FUNCNAME[1]}() - ${BASH_LINENO[1]}: ERROR Invalid or missing paramter: need number but got '$1'" > /dev/stderr
return 1;
}
# Helper to validate VM number
# param $1 VM number
#
function vm_num_is_valid()
{
[[ "$1" =~ [0-9]+ ]] && return 0
echo "${FUNCNAME[1]}() - ${BASH_LINENO[1]}: ERROR Invalid or missing paramter: vm number '$1'" > /dev/stderr
return 1;
}
# Print network socket for given VM number
# param $1 virtual machine number
#
function vm_ssh_socket()
{
vm_num_is_valid $1 || return 1
local vm_dir="$VM_BASE_DIR/$1"
cat $vm_dir/ssh_socket
}
function vm_fio_socket()
{
vm_num_is_valid $1 || return 1
local vm_dir="$VM_BASE_DIR/$1"
cat $vm_dir/fio_socket
}
# Execute ssh command on given VM
# param $1 virtual machine number
#
function vm_ssh()
{
vm_num_is_valid $1 || return 1
local ssh_config="$VM_BASE_DIR/ssh_config"
if [[ ! -f $ssh_config ]]; then
(
echo "Host *"
echo " ControlPersist=10m"
echo " ConnectTimeout=2"
echo " Compression=no"
echo " ControlMaster=auto"
echo " UserKnownHostsFile=/dev/null"
echo " StrictHostKeyChecking=no"
echo " User root"
echo " ControlPath=$VM_BASE_DIR/%r@%h:%p.ssh"
echo ""
) > $ssh_config
fi
local ssh_cmd="ssh -i $SPDK_VHOST_SSH_KEY_FILE -F $ssh_config \
-p $(vm_ssh_socket $1) 127.0.0.1"
shift
$ssh_cmd "$@"
}
# check if specified VM is running
# param $1 VM num
function vm_is_running()
{
vm_num_is_valid $1 || return 1
local vm_dir="$VM_BASE_DIR/$1"
if [[ ! -r $vm_dir/qemu.pid ]]; then
return 1
fi
local vm_pid="$(cat $vm_dir/qemu.pid)"
if /bin/kill -0 $vm_pid; then
return 0
else
if [[ $EUID -ne 0 ]]; then
echo "WARNING: not root - assuming we running since can't be checked"
return 0
fi
# not running - remove pid file
rm $vm_dir/qemu.pid
return 1
fi
}
# check if specified VM is running
# param $1 VM num
function vm_os_booted()
{
vm_num_is_valid $1 || return 1
local vm_dir="$VM_BASE_DIR/$1"
if [[ ! -r $vm_dir/qemu.pid ]]; then
error "VM $1 is not running"
return 1
fi
if ! vm_ssh $1 "true" 2>/dev/null; then
return 1
fi
return 0
}
# Shutdown given VM
# param $1 virtual machine number
# return non-zero in case of error.
function vm_shutdown()
{
vm_num_is_valid $1 || return 1
local vm_dir="$VM_BASE_DIR/$1"
if [[ ! -d "$vm_dir" ]]; then
error "VM$1 ($vm_dir) not exist - setup it first"
return 1
fi
if ! vm_is_running $1; then
echo "INFO: VM$1 ($vm_dir) is not running"
return 0
fi
echo "Shutting down virtual machine $vm_dir"
if vm_ssh $1 "nohup sh -c 'shutdown -h -P now'; exit 0"; then
echo "INFO: VM$1 is shutting down - wait a while to complete"
return 0
else
error "VM$1 shutting FAILED"
return 1
fi
}
# Kill given VM
# param $1 virtual machine number
#
function vm_kill()
{
vm_num_is_valid $1 || return 1
local vm_dir="$VM_BASE_DIR/$1"
if [[ ! -r $vm_dir/qemu.pid ]]; then
#echo "WARN: VM$1 pid not found - not killing"
return 0
fi
local vm_pid="$(cat $vm_dir/qemu.pid)"
echo "Killing virtual machine $vm_dir (pid=$vm_pid)"
# First kill should fail, second one must fail
if /bin/kill $vm_pid; then
echo "INFO: process $vm_pid killed"
rm $vm_dir/qemu.pid
elif vm_is_running $1; then
erorr "Process $vm_pid NOT killed"
return 1
fi
}
# Kills all VM in $VM_BASE_DIR
#
function vm_kill_all()
{
for vm in $VM_BASE_DIR/[0-9]*; do
vm_kill $(basename $vm)
done
}
# Shutdown all VM in $VM_BASE_DIR
#
function vm_shutdown_all()
{
for vm in $VM_BASE_DIR/[0-9]*; do
vm_shutdown $(basename $vm)
done
}
function vm_setup()
{
local OPTIND optchar a
local os=""
local qemu_args=""
local disk_type=NOT_DEFINED
local disks=""
local raw_cache=""
local force_vm=""
while getopts ':-:' optchar; do
case "$optchar" in
-)
case "$OPTARG" in
os=*) local os="${OPTARG#*=}" ;;
os-mode=*) local os_mode="${OPTARG#*=}" ;;
qemu-args=*) local qemu_args="${qemu_args} ${OPTARG#*=}" ;;
disk-type=*) local disk_type="${OPTARG#*=}" ;;
disks=*) local disks="${OPTARG#*=}" ;;
raw-cache=*) local raw_cache=",cache${OPTARG#*=}" ;;
force=*) local force_vm=${OPTARG#*=} ;;
*)
error "unknown argument $OPTARG"
return 1
esac
;;
*)
error "vm_create Unknown param $OPTARG"
return 1
;;
esac
done
# Find next directory we can use
if [[ ! -z $force_vm ]]; then
vm_num=$force_vm
vm_num_is_valid $vm_num || return 1
local vm_dir="$VM_BASE_DIR/$vm_num"
[[ -d $vm_dir ]] && echo "WARNING: removing existing VM in '$vm_dir'"
echo "rm -rf $vm_dir"
else
local vm_dir=""
for (( i=0; i<=256; i++)); do
local vm_dir="$VM_BASE_DIR/$i"
[[ ! -d $vm_dir ]] && break
done
vm_num=$i
fi
if [[ $i -eq 256 ]]; then
error "no free VM found. do some cleanup (256 VMs created, are you insane?)"
return 1
fi
echo "INFO: Creating new VM in $vm_dir"
mkdir -p $vm_dir
if [[ ! -r $os ]]; then
error "file not found: $os"
return 1
fi
# WARNING:
# each cmd+= must contain ' ${eol}' at the end
#
local eol="\\\\\n "
local task_mask=$(cat $BASE_DIR/autotest.config|grep qemu_mask|awk -F'=' '{print $2}'|sed "$(($vm_num+1))q;d")
echo "INFO: TASK MASK: $task_mask"
local cmd="taskset -a $task_mask $INSTALL_DIR/bin/qemu-system-x86_64 ${eol}"
local vm_socket_offset=$(( 10000 + 100 * vm_num ))
local ssh_socket=$(( vm_socket_offset + 0 ))
local fio_socket=$(( vm_socket_offset + 1 ))
local http_socket=$(( vm_socket_offset + 2 ))
local https_socket=$(( vm_socket_offset + 3 ))
local gdbserver_socket=$(( vm_socket_offset + 4 ))
local vnc_socket=$(( 100 + vm_num ))
local qemu_pid_file="$vm_dir/qemu.pid"
local cpu_num=0
for ((cpu=0; cpu<$(nproc --all); cpu++))
do
(($task_mask&1<<$cpu)) && ((cpu_num++)) || :
done
#-cpu host
local node_num=$(cat $BASE_DIR/autotest.config|grep qemu_numa_node|awk -F'=' '{print $2}'|sed "$(($vm_num+1))q;d")
echo "INFO: NUMA NODE: $node_num"
cmd+="-m 1024 --enable-kvm -smp $cpu_num -vga std -vnc :$vnc_socket -daemonize -snapshot ${eol}"
cmd+="-object memory-backend-file,id=mem,size=1G,mem-path=/dev/hugepages,share=on,prealloc=yes,host-nodes=$node_num,policy=bind ${eol}"
cmd+="-numa node,memdev=mem ${eol}"
cmd+="-pidfile $qemu_pid_file ${eol}"
cmd+="-serial file:$vm_dir/serial.log ${eol}"
cmd+="-D $vm_dir/qemu.log ${eol}"
cmd+="-net user,hostfwd=tcp::$ssh_socket-:22,hostfwd=tcp::$fio_socket-:8765,hostfwd=tcp::$https_socket-:443,hostfwd=tcp::$http_socket-:80 ${eol}"
cmd+="-net nic ${eol}"
cmd+="-hda $os ${eol}"
IFS=':'
if ( [[ $disks == '' ]] && [[ $disk_type == virtio* ]] ); then
disks=1
fi
for disk in $disks; do
case $disk_type in
virtio)
local raw_name="RAWSCSI"
local raw_disk=$vm_dir/test.img
if [[ ! -z $disk ]]; then
[[ ! -b $disk ]] && touch $disk
local raw_disk=$(readlink -f $disk)
fi
# Create disk file if it not exist or it is smaller than 10G
if ( [[ -f $raw_disk ]] && [[ $(stat --printf="%s" $raw_disk) -lt $((1024 * 1024 * 1024 * 10)) ]] ) || \
[[ ! -e $raw_disk ]]; then
if [[ $raw_disk =~ /dev/.* ]]; then
error \
"ERROR: Virtio disk point to missing device ($raw_disk) - \n" \
" this is probably not what you want."
return 1
fi
echo "INFO: Creating Virtio disc $raw_disk"
dd if=/dev/zero of=$raw_disk bs=1024k count=10240
else
echo "INFO: Using existing image $raw_disk"
fi
cmd+="-device virtio-scsi-pci ${eol}"
cmd+="-device scsi-hd,drive=hd$i,vendor=$raw_name ${eol}"
cmd+="-drive if=none,id=hd$i,file=$raw_disk,format=raw$raw_cache ${eol}"
;;
spdk_vhost)
echo "INFO: using socket $SPDK_VHOST_SCSI_TEST_DIR/naa.$disk.$vm_num"
cmd+="-chardev socket,id=char_$disk,path=$SPDK_VHOST_SCSI_TEST_DIR/naa.$disk.$vm_num ${eol}"
cmd+="-device vhost-scsi-pci,id=scsi_$disk,wwpn=unused,num_queues=$cpu_num,user=true,chardev=char_$disk ${eol}"
;;
kernel_vhost)
if [[ -z $disk ]]; then
error "need WWN for $disk_type"
return 1
elif [[ ! $disk =~ ^[[:alpha:]]{3}[.][[:xdigit:]]+$ ]]; then
error "$disk_type - disk(wnn)=$disk does not look like WNN number"
return 1
fi
echo "Using kernel vhost disk wwn=$disk"
cmd+=" -device vhost-scsi-pci,wwpn=$disk ${eol}"
;;
*)
error "unknown mode '$disk_type', use: virtio, spdk_vhost or kernel_vhost"
return 1
esac
done
[[ ! -z $qemu_args ]] && cmd+=" $qemu_args ${eol}"
# remove last $eol
cmd="${cmd%\\\\\\n }"
echo "Saving to $vm_dir/run.sh:"
(
echo '#!/bin/bash'
echo 'if [[ $EUID -ne 0 ]]; then '
echo ' echo "Go away user come back as root"'
echo ' exit 1'
echo 'fi';
echo
echo -e "qemu_cmd=\"$cmd\"";
echo
echo "echo 'Running VM in $vm_dir'"
echo "rm -f $qemu_pid_file"
echo '$qemu_cmd'
echo "echo 'Waiting for QEMU pid file'"
echo "[[ ! -f $qemu_pid_file ]] && sleep 1"
echo "[[ ! -f $qemu_pid_file ]] && echo 'ERROR: no qemu pid file found' && exit 1"
echo
echo "chmod +r $vm_dir/*"
echo
echo '# EOF'
) > $vm_dir/run.sh
chmod +x $vm_dir/run.sh
# Save generated sockets redirection
echo $ssh_socket > $vm_dir/ssh_socket
echo $fio_socket > $vm_dir/fio_socket
echo $http_socket > $vm_dir/http_socket
echo $https_socket > $vm_dir/https_socket
echo $gdbserver_socket > $vm_dir/gdbserver_socket
echo $vnc_socket >> $vm_dir/vnc_socket
}
function vm_run()
{
local OPTIND optchar a
local run_all=false
while getopts 'a-:' optchar; do
case "$optchar" in
a) run_all=true ;;
*)
echo "vm_run Unknown param $OPTARG"
return 1
;;
esac
done
local vms_to_run=""
if $run_all; then
shopt -s nullglob
vms_to_run=$VM_BASE_DIR/[0-9]*
else
shift $((OPTIND-1))
for vm in $@; do
vm_num_is_valid $1 || return 1
if [[ ! -x $VM_BASE_DIR/$vm/run.sh ]]; then
error "VM$vm not defined - setup it first"
return 1
fi
vms_to_run+=" $VM_BASE_DIR/$vm"
done
fi
for vm in $vms_to_run; do
if vm_is_running $(basename $vm); then
echo "WARNING: VM$(basename $vm) ($vm) already running"
continue
fi
echo "INFO: running $vm/run.sh"
if ! $vm/run.sh; then
error "FAILED to run vm $vm"
return 1
fi
done
}
# Wait for all created VMs to boot.
# param $1 max wait time
function vm_wait_for_boot()
{
assert_number $1
local all_booted=false
local timeout_time=$1
[[ $timeout_time -lt 10 ]] && timeout_time=10
local timeout_time=$(date -d "+$timeout_time seconds" +%s)
echo "Waiting for VMs to boot"
shift
if [[ "$@" == "" ]]; then
local vms_to_check="$VM_BASE_DIR/[0-9]*"
else
local vms_to_check=""
for vm in $@; do
vms_to_check+=" $VM_BASE_DIR/$vm"
done
fi
for vm in $vms_to_check; do
local vm_num=$(basename $vm)
local i=0
echo "INFO: waiting for VM$vm_num ($vm)"
while ! vm_os_booted $vm_num; do
if ! vm_is_running $vm_num; then
echo
echo "ERROR: VM $vm_num is not running"
echo "================"
echo "QEMU LOG:"
if [[ -r $vm/qemu.log ]]; then
cat $vm/qemu.log
else
echo "LOG not found"
fi
echo "VM LOG:"
if [[ -r $vm/serial.log ]]; then
cat $vm/serial.log
else
echo "LOG not found"
fi
echo "================"
return 1
fi
if [[ $(date +%s) -gt $timeout_time ]]; then
error "timeout waiting for machines to boot"
return 1
fi
if (( i > 30 )); then
local i=0
echo
fi
echo -n "."
sleep 1
done
echo ""
echo "INFO: VM$vm_num ready"
done
echo "INFO: all VMs ready"
return 0
}
function vm_start_fio_server()
{
local OPTIND optchar
local readonly=''
while getopts ':-:' optchar; do
case "$optchar" in
-)
case "$OPTARG" in
fio-bin=*) local fio_bin="${OPTARG#*=}" ;;
readonly) local readonly="--readonly" ;;
*) echo "Invalid argument '$OPTARG'" && return 1;;
esac
;;
*) echo "Invalid argument '$OPTARG'" && return 1;;
esac
done
shift $(( OPTIND - 1 ))
for vm_num in $@; do
echo "INFO: Starting fio server on VM$vm_num"
if [[ $fio_bin != "" ]]; then
cat $fio_bin | vm_ssh $vm_num 'cat > /root/fio; chmod +x /root/fio'
vm_ssh $vm_num /root/fio $readonly --eta=never --server --daemonize=/root/fio.pid
else
vm_ssh $vm_num fio $readonly --eta=never --server --daemonize=/root/fio.pid
fi
done
}
function vm_check_scsi_location()
{
# Script to find wanted disc
local script='shopt -s nullglob; \
for entry in /sys/block/sd*; do \
disk_type="$(cat $entry/device/vendor)"; \
if [[ $disk_type == INTEL* ]] || [[ $disk_type == RAWSCSI* ]] || [[ $disk_type == LIO-ORG* ]]; then \
fname=$(basename $entry); \
echo -n "$fname "; \
fi; \
done'
SCSI_DISK="$(echo "$script" | vm_ssh $1 bash -s)"
if [[ -z "$SCSI_DISK" ]]; then
error "no test disk found!"
return 1
fi
}
# Script to perform scsi device reset on all disks in VM
# param $1 VM num
# param $2..$n Disks to perform reset on
function vm_reset_scsi_devices()
{
for disk in "${@:2}"; do
echo "INFO: VM$1 Performing device reset on disk $disk"
vm_ssh $1 sg_reset /dev/$disk -vNd
sleep 2
done
}
# Shutdown or kill any running VM and SPDK APP.
#
function at_app_exit()
{
echo "INFO: APP EXITING"
echo "INFO: killing all VMs"
vm_kill_all
# Kill vhost application
echo "INFO: killing vhost app"
spdk_vhost_kill
echo "INFO: EXIT DONE"
}
function error_exit()
{
trap - ERR
set +e
echo "Error on $1 $2"
at_app_exit
exit 1
}

View File

@ -0,0 +1,18 @@
[global]
blocksize=4k
iodepth=512
iodepth_batch=128
iodepth_low=256
ioengine=libaio
size=1G
io_size=4G
filename=
group_reporting
thread
numjobs=1
direct=1
rw=randwrite
do_verify=1
verify=meta
verify_backlog=1024
[nvme-host]

View File

@ -0,0 +1,15 @@
[global]
blocksize=4k
iodepth=512
iodepth_batch=128
iodepth_low=256
ioengine=libaio
size=10G
filename=
ramp_time=10
group_reporting
thread
numjobs=1
direct=1
rw=randread
[nvme-host]

312
test/vhost/fiotest/run_fio.py Executable file
View File

@ -0,0 +1,312 @@
#!/usr/bin/env python
import os
import sys
import getopt
import subprocess
import itertools
import datetime
import signal
import re
fio_bin = "fio"
perf_vmex = False
fio_template = """
[global]
ioengine=%(ioengine)s
size=%(size)s
filename=%(filename)s
numjobs=%(numjobs)s
bs=%(blocksize)s
iodepth=%(iodepth)s
direct=%(direct)s
rw=%(testtype)s
group_reporting
thread
%(verify)s
[nvme-host]
"""
def show_help(fio_args_dict):
print("""Usage: python run_fio.py [options] [args]
Args:
[VMs] (ex. vm1_IP:vm1_port,vm2_IP:vm2_port,etc...)
[fio filename arg], ex. /dev/sda)
Options:
-h, --help Show this message.
-j, --job-files Paths to files with custom FIO jobs configuration.
-F, --fio-bin Location of FIO binary (Default "fio")
-s, --size Size of IO for job. Will be distributed among
number of numjobs (Default: %(size)s)
-t, --testtype Type of FIO test (Default: %(testtype)s)
-b, --blocksize Blocksize for FIO test (Default: %(blocksize)s)
-i, --iodepth IO depth for FIO test (Default: %(iodepth)s)
-I, --ioengine Type of FIO ioengine to use (Default: %(ioengine)s)
-n, --numjobs Number of threads for job (Default: %(numjobs)s)
-D, --direct Use non-buffered IO? (Default: %(direct)s)
-v, --verify Verify after writing to file (Default: %(verify)s)
-o, --out Directory used to save generated job files and
files with test results (Default: same dir where
this script is located)
-p, --perf-vmex Enable aggregating statistic for VMEXITS
""" % fio_args_dict)
def exec_cmd(cmd, blocking):
# Print result to STDOUT for now, we don't have json support yet.
p = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, stdin=subprocess.PIPE)
if blocking is True:
out, _ = p.communicate()
return p.returncode, out
return p
def save_file(path, mode, contents):
with open(path, mode) as fh:
fh.write(contents)
fh.close()
def prep_fio_cfg_file(out_dir, fio_cfg, vm_nb):
job_file = os.path.join(out_dir, "fio_job_vm{0}".format(vm_nb))
print "file {0} written".format(job_file)
save_file(job_file, "w", fio_cfg)
return job_file
def calc_size(size, numjobs):
return str(int(filter(lambda x: x.isdigit(), size)) / int(numjobs)) + \
filter(lambda x: x.isalpha(), size)
def cfg_product(fio_args_dict):
return (dict(zip(fio_args_dict, x)) for
x in itertools.product(*fio_args_dict.itervalues()))
def run_fio(vms, fio_cfg_file, out_path):
global perf_vmex
# Prepare command template for FIO
fio_cmd = fio_bin
fio_cmd = " ".join([fio_cmd, "--eta=never"])
print fio_cfg_file
fio_cfg_name = (os.path.basename(fio_cfg_file)).split(".")[0]
for i, vm in enumerate(vms):
print("Starting thread {0} for VM: {1}".format(i, vm))
# vm[0] = IP address, vm[1] = Port number
fio_cmd = " ".join([fio_cmd,
"--client={0},{1}".format(vm[0], vm[1])])
fio_cmd = " ".join([fio_cmd,
"--remote-config /root/fio.job{0}".format(i)])
print fio_cmd
if perf_vmex:
# Start gathering perf statistics for host and VM guests
perf_rec_file = os.path.join(out_path, "perf.data.kvm")
perf_run_cmd = "perf kvm --host --guest " + \
"-o {0} stat record -a".format(perf_rec_file)
print perf_run_cmd
perf_p = exec_cmd(perf_run_cmd, blocking=False)
# Run FIO test on VMs
rc, out = exec_cmd(fio_cmd, blocking=True)
# if for some reason output contains lines with "eta" - remove them
out = re.sub(r'.+\[eta\s+\d{2}m\:\d{2}s\]', '', out)
if rc != 0:
print(rc, out)
return rc
else:
print out
save_file(os.path.join(out_path, "".join([fio_cfg_name, ".log"])), "w", out)
# out = out[out.find("Disk"):]
# out = out[out.find(":")+2:]
# JSON format nos supported on Debian IMG for now, not parsing
# data = json.loads(out)
# pprint(data)
pass
if perf_vmex:
# Stop gathering perf statistics and prepare some result files
perf_p.send_signal(signal.SIGINT)
perf_p.wait()
perf_stat_cmd = "perf kvm --host " + \
"-i {0} stat report".format(perf_rec_file)
print(" ".join([perf_stat_cmd, "--event vmexit"]))
rc, out = exec_cmd(" ".join([perf_stat_cmd, "--event vmexit"]),
blocking=True)
print("VMexit host stats:")
print("{0}".format(out))
save_file(os.path.join(out_path, "vmexit_stats"),
"w", "{0}".format(out))
def main():
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(os.path.join(dname, "../../.."))
global fio_bin
global perf_vmex
job_file_opt = False
vms = []
split_disks = []
filenames = ""
out_dir = os.path.join(os.getcwd(), "fio_results")
fio_cfg_files = []
rc = 0
fio_args_def = {
'size': ["10G"],
'numjobs': ["1"],
'testtype': ["randread"],
'blocksize': ["4k"],
'iodepth': ["128"],
'ioengine': ["libaio"],
'direct': ["1"],
'verify': [""]
}
fio_args = fio_args_def.copy()
try:
opts, args = getopt.getopt(sys.argv[1:], "hj:t:b:i:D:n:F:I:s:v:o:S:p",
["help", "job-file=", "testtype=",
"blocksize=", "iodepth=", "direct=",
"numjobs=", "fio-bin=", "ioengine=",
"size=", "verify=", "out=",
"split-disks=", "perf"])
except:
show_help(fio_args_def)
sys.exit(1)
for o, a in opts:
print o, a
if o in ("-j", "--job-file"):
fio_cfg_files = a.split(",")
job_file_opt = True
fio_args = fio_args_def.copy()
elif o in ("-h", "--help"):
show_help(fio_args_def)
sys.exit(1)
elif o in ("-p", "--perf-vmex"):
perf_vmex = True
elif o in ("-o", "--out"):
out_dir = os.path.join(a, "fio_results")
elif o in ("-F", "--fio-bin"):
fio_bin = a
elif o in ("-S", "--split-disks"):
split_disks = [x.split("-") for x in a.split(",")]
split_disks = [[int(x) - 1 for x in y] for y in split_disks]
print split_disks
elif o in ("-s", "--size"):
fio_args["size"] = a.split(",")
elif o in ("-t", "--testtype"):
fio_args["testtype"] = a.split(",")
elif o in ("-b", "--blocksize"):
fio_args["blocksize"] = a.split(",")
elif o in ("-i", "--iodepth"):
fio_args["iodepth"] = a.split(",")
elif o in ("-D", "--direct"):
fio_args["direct"] = a.split(",")
elif o in ("-n", "--numjobs"):
fio_args["numjobs"] = a.split(",")
elif o in ("-I", "--ioengine"):
fio_args["ioengine"] = a.split(",")
elif o in ("-v", "--verify"):
fio_args["verify"] = a.split(",")
fio_args["verify"] = ["" if x in "0" else
"verify=crc32" for x in fio_args["verify"]]
if len(args) < 1:
show_help(fio_args_def)
sys.exit(1)
else:
# Get IP, Port tuples from args and filename for fio config
vms = [tuple(x.split(":")) for x in args[0].split(",")]
filenames = [["/dev/" + y for y in x[2:]] for x in vms]
vms = [x[0:2] for x in vms]
if not os.path.exists(out_dir):
os.mkdir(out_dir)
if job_file_opt is True:
for fio_cfg in fio_cfg_files:
print("Running job file: {0}".format(fio_cfg))
for i, vm in enumerate(zip(vms, filenames)):
fnames = vm[1]
if split_disks:
if len(split_disks[i]) < 2:
filename = fnames[split_disks[i][0]:split_disks[i][0] + 1]
filename = ":".join(filename)
else:
filename = fnames[split_disks[i][0]:split_disks[i][1] + 1]
filename = ":".join(filename)
else:
filename = ":".join(fnames)
a = exec_cmd("./test/vhost/fiotest/vm_ssh.sh " +
"{0} sh -c 'rm fio.job{1}'"
.format(i, i), blocking=True)
for cfg in fio_cfg.split("\n"):
with open(cfg, "r") as fh:
lines = fh.readlines()
for line in lines:
if "filename" in line:
line = "filename=" + filename
a = exec_cmd("./test/vhost/fiotest/vm_ssh.sh " +
"{0} sh -c 'echo {1} >> fio.job{2}'"
.format(i, line.strip(), i), blocking=True)
fh.close()
rc = run_fio(vms, fio_cfg, out_dir)
else:
for cfg in cfg_product(fio_args):
# Update fio "size" parameter so that total work done by
# all numjobs is equal to assigned size and not size*numjobs
cfg["size"] = calc_size(cfg["size"], cfg["numjobs"])
# Prepare this test run FIO job file
for i, vm in enumerate(zip(vms, filenames)):
fnames = vm[1]
if split_disks:
if len(split_disks[i]) < 2:
filename = fnames[split_disks[i][0]:split_disks[i][0] + 1]
filename = ":".join(filename)
else:
filename = fnames[split_disks[i][0]:split_disks[i][1] + 1]
filename = ":".join(filename)
else:
filename = ":".join(fnames)
cfg.update({"filename": filename})
fio_cfg = fio_template % cfg
fio_cfg_files.append(prep_fio_cfg_file(out_dir,
fio_cfg, i))
a = exec_cmd("./test/vhost/fiotest/vm_ssh.sh " +
"{0} sh -c 'rm fio.job{1}'"
.format(i, i), blocking=True)
for line in fio_cfg.split("\n"):
a = exec_cmd("./test/vhost/fiotest/vm_ssh.sh " +
"{0} sh -c 'echo {1} >> fio.job{2}'"
.format(i, line.strip(), i), blocking=True)
rc = run_fio(vms, cfg, out_dir)
return rc
if __name__ == "__main__":
sys.exit(main())

49
test/vhost/fiotest/run_vhost.sh Executable file
View File

@ -0,0 +1,49 @@
#!/usr/bin/env bash
BASE_DIR=$(readlink -f $(dirname $0))
[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
function usage()
{
[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
echo "Shortcut script for running vhost app."
echo "Usage: $(basename $1) [-x] [-h|--help] [--clean-build] [--work-dir=PATH]"
echo "-h, --help print help and exit"
echo "-x Set -x for script debug"
echo " --gdb Run app under gdb"
echo " --gdbserver Run app under gdb-server"
echo " --work-dir=PATH Where to find source/project. [default=$TEST_DIR]"
exit 0
}
run_in_background=false
while getopts 'xh-:' optchar; do
case "$optchar" in
-)
case "$OPTARG" in
help) usage $0 ;;
gdb) VHOST_GDB="gdb --args" ;;
gdbserver) VHOST_GDB="gdbserver 127.0.0.1:12345"
;;
work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
*) usage $0 echo "Invalid argument '$OPTARG'" ;;
esac
;;
h) usage $0 ;;
x) set -x ;;
*) usage $0 "Invalid argument '$optchar'" ;;
esac
done
if [[ $EUID -ne 0 ]]; then
echo "Go away user come back as root"
exit 1
fi
echo "INFO: $0"
echo
. $BASE_DIR/common.sh
spdk_vhost_run

View File

@ -0,0 +1,41 @@
# vhost configuration file
#
# Please write all parameters using ASCII.
# The parameter must be quoted if it includes whitespace.
#
# Configuration syntax:
# Spaces at head of line are deleted, other spaces are as separator
# Lines starting with '#' are comments and not evaluated.
# Lines ending with '\' are concatenated with the next line.
# Bracketed keys are section keys grouping the following value keys.
# Number of section key is used as a tag number.
# Ex. [TargetNode1] = TargetNode section key with tag number 1
[Global]
# Users can restrict work items to only run on certain cores by
# specifying a WorkerMask. Default is to allow work items to run
# on all cores.
#WorkerMask 0xFFFF
# Event mask for ids history buffers
# Default: 0x0 (all events disabled)
# Set to 0xFFFFFFFFFFFFFFFF to enable all events.
#EventMask 0x0
# syslog facility
LogFacility "local7"
[Rpc]
# Defines whether vhost will enable configuration via RPC.
# Default is disabled. Note that the RPC interface is not
# authenticated, so users should be careful about enabling
# RPC in non-trusted environments.
Enable Yes
[Ioat]
Disable Yes
[Nvme]
ClaimAllDevices
[Split]
Split Nvme0n1 4

48
test/vhost/fiotest/vm_run.sh Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env bash
BASE_DIR=$(readlink -f $(dirname $0))
[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
function usage()
{
[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
echo "Shortcut script for enabling VMs"
echo "Usage: $(basename $1) [OPTIONS] VM..."
echo
echo "-h, --help print help and exit"
echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: ./..]"
echo "-a Run all VMs in WORK_DIR"
echo "-x set -x for script debug"
exit 0
}
run_all=false
while getopts 'xah-:' optchar; do
case "$optchar" in
-)
case "$OPTARG" in
help) usage $0 ;;
work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
*) usage $0 "Invalid argument '$OPTARG'" ;;
esac
;;
h) usage $0 ;;
a) run_all=true ;;
x) set -x ;;
*) usage $0 "Invalid argument '$OPTARG'"
esac
done
. $BASE_DIR/common.sh
if [[ $EUID -ne 0 ]]; then
echo "Go away user come back as root"
exit 1
fi
if $run_all; then
vm_run -a
else
shift $((OPTIND-1))
echo "INFO: running VMs: $@"
vm_run "$@"
fi

78
test/vhost/fiotest/vm_setup.sh Executable file
View File

@ -0,0 +1,78 @@
#!/usr/bin/env bash
BASE_DIR=$(readlink -f $(dirname $0))
[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
function usage()
{
[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
echo "Shortcut script for setting up VMs for tests"
echo "Usage: $(basename $1) [OPTIONS] VM_NUM"
echo
echo "-h, --help print help and exit"
echo "-f VM_NUM Force VM_NUM reconfiguration if already exist"
echo " --work-dir=WORK_DIR Where to find build file. Must exit. (default: $TEST_DIR)"
echo " --test-type=TYPE Perform specified test:"
echo " virtio - test host virtio-scsi-pci using file as disk image"
echo " kernel_vhost - use kernel driver vhost-scsi"
echo " spdk_vhost - use spdk vhost"
echo " ---cache=CACHE Use CACHE for virtio test: "
echo " writethrough, writeback, none, unsafe or directsyns"
echo " Default is writethrough"
echo " --disk=PATH Disk to use in test. test specific meaning:"
echo " virtio - disk path (file or block device ex: /dev/nvme0n1)"
echo " kernel_vhost - the WWN number to be used"
echo " spdk_vhost - the socket path. Default is WORK_DIR/vhost/usvhost"
echo " --os=OS_QCOW2 Custom OS qcow2 image file"
echo " --os-mode=MODE MODE how to use provided image: default: backing"
echo " backing - create new image but use provided backing file"
echo " copy - copy provided image and use a copy"
echo " orginal - use file directly. Will modify the provided file"
echo "-x Turn on script debug (set -x)"
exit 0
}
disk=""
raw_cache=""
img_mode=""
os=""
while getopts 'xf:h-:' optchar; do
case "$optchar" in
-)
case "$OPTARG" in
help) usage $0 ;;
work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
raw-cache=*) raw_cache="--raw-cache=${OPTARG#*=}" ;;
test-type=*) test_type="${OPTARG#*=}" ;;
disk=*) disk="${OPTARG#*=}" ;;
os=*) os="${OPTARG#*=}"
if [[ ! -r "$os" ]]; then
echo "ERROR: can't read '$os'"
usage $0
fi
os="$(readlink -f $os)"
;;
os-mode=*) os_mode="--os-mode=${OPTARG#*=}" ;;
*) usage $0 "Invalid argument '$OPTARG'" ;;
esac
;;
h) usage $0 ;;
x) set -x ;;
f) force_vm_num="--force=${OPTARG#*=}" ;;
*) usage $0 "Invalid argument '$OPTARG'" ;;
esac
done
. $BASE_DIR/common.sh
[[ -z "$os" ]] && os="$TEST_DIR/debian.qcow2"
[[ $test_type == "spdk_vhost" ]] && [[ -z "$disk" ]] && disk="$SPDK_VHOST_SCSI_TEST_DIR/usvhost"
if [[ $test_type == "kernel_vhost" ]] && [[ -z "$disk" ]]; then
echo "ERROR: for $test_type '--disk=WWN' is mandatory"
exit 1
fi
vm_setup \
--os=$os \
--disk-type=$test_type \
--disks=$disk \
$wwn $raw_cache $force_vm_num $os_mode

View File

@ -0,0 +1,65 @@
#!/usr/bin/env bash
BASE_DIR=$(readlink -f $(dirname $0))
[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
function usage()
{
[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
echo "Shortcut script for shutting down VMs"
echo "Usage: $(basename $1) [OPTIONS] [VMs]"
echo
echo "-h, --help print help and exit"
echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: ./..]"
echo "-a kill/shutdown all running VMs"
echo "-k kill instead of shutdown"
exit 0
}
optspec='akh-:'
do_kill=false
all=false
while getopts "$optspec" optchar; do
case "$optchar" in
-)
case "$OPTARG" in
help) usage $0 ;;
work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
*) usage $0 "Invalid argument '$OPTARG'" ;;
esac
;;
h) usage $0 ;;
k) do_kill=true ;;
a) all=true ;;
*) usage $0 "Invalid argument '$OPTARG'"
esac
done
. $BASE_DIR/common.sh
if $do_kill && [[ $EUID -ne 0 ]]; then
echo "Go away user come back as root"
exit 1
fi
if $all; then
if do_kill; then
echo 'INFO: killing all VMs'
vm_kill_all
else
echo 'INFO: shutting down all VMs'
vm_shutdown_all
fi
else
shift $((OPTIND-1))
if do_kill; then
echo 'INFO: killing VMs: $@'
for vm in $@; do
vm_kill $vm
done
else
echo 'INFO: shutting down all VMs'
vm_shutdown_all
fi
fi

58
test/vhost/fiotest/vm_ssh.sh Executable file
View File

@ -0,0 +1,58 @@
#!/usr/bin/env bash
BASE_DIR=$(readlink -f $(dirname $0))
[[ -z "$TEST_DIR" ]] && TEST_DIR="$(cd $BASE_DIR/../../../../ && pwd)"
function usage()
{
[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
echo "Shortcut script for connecting to or executing command on selected VM"
echo "Usage: $(basename $1) [OPTIONS] VM_NUMBER"
echo
echo "-h, --help print help and exit"
echo " --work-dir=WORK_DIR Where to find build file. Must exist. [default: $TEST_DIR]"
echo "-w Don't wait for vm to boot"
echo "-x set -x for script debug"
exit 0
}
boot_wait=true
while getopts 'xwh-:' optchar; do
case "$optchar" in
-)
case "$OPTARG" in
help) usage $0 ;;
work-dir=*) TEST_DIR="${OPTARG#*=}" ;;
*) usage $0 "Invalid argument '$OPTARG'" ;;
esac ;;
h) usage $0 ;;
w) boot_wait=false ;;
x) set -x ;;
*) usage $0 "Invalid argument '$OPTARG'" ;;
esac
done
. $BASE_DIR/common.sh
shift $((OPTIND-1))
vm_num="$1"
shift
if ! vm_num_is_valid $vm_num; then
usage $0 "Invalid VM num $vm_num"
exit 1
fi
if $boot_wait; then
while ! vm_os_booted $vm_num; do
if ! vm_is_running $vm_num; then
echo "ERROR: VM$vm_num is not running"
exit 1
fi
echo "INFO: waiting for VM$vm_num to boot"
sleep 1
done
fi
vm_ssh $vm_num "$@"

40
test/vhost/spdk_vhost.sh Executable file
View File

@ -0,0 +1,40 @@
#!/usr/bin/env bash
WORKDIR=$(dirname $0)
cd $WORKDIR
param="$1"
if [ $(uname -s) = Linux ]; then
NRHUGE=4096 ./../../scripts/setup.sh
echo Running SPDK vhost fio autotest...
case $param in
-p|--performance)
echo Running performance suite...
./fiotest/autotest.sh --fio-bin=/home/sys_sgsw/fio_ubuntu \
--vm=0,/home/sys_sgsw/vhost_scsi_vm_image.qcow2,Nvme0n1p0 \
--test-type=spdk_vhost \
--fio-jobs=$WORKDIR/fiotest/fio_jobs/default_performance.job \
--qemu-src=/home/sys_sgsw/vhost_scsi/qemu
;;
-i|--integrity)
echo Running integrity suite...
./fiotest/autotest.sh --fio-bin=/home/sys_sgsw/fio_ubuntu \
--vm=0,/home/sys_sgsw/vhost_scsi_vm_image.qcow2,Nvme0n1p0:Nvme0n1p1:Nvme0n1p2:Nvme0n1p3 \
--test-type=spdk_vhost \
--fio-jobs=$WORKDIR/fiotest/fio_jobs/default_integrity.job \
--qemu-src=/home/sys_sgsw/vhost_scsi/qemu
;;
-h|--help)
echo "-i|--integrity for running an integrity test"
echo "-p|--performance for running a performance test"
echo "-h|--help prints this message"
;;
*)
echo "unknown test type"
;;
esac
fi