diff --git a/CHANGELOG.md b/CHANGELOG.md index b1af3e940e..ea86b20374 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ## v20.10: (Upcoming Release) +### vhost + +SPDK has switched to DPDK's rte_vhost library since 19.07 release, removed the internal +rte_vhost library which is used for DPDK older than 19.05, removed the experimental vhost +nvme target which depends on the internal rte_vhost library. + ### bdev A new `spdk_bdev_part_base_construct_ext` function has been added and the diff --git a/CONFIG b/CONFIG index 569e53bf74..f4a426d6d2 100644 --- a/CONFIG +++ b/CONFIG @@ -113,7 +113,6 @@ CONFIG_RBD=n # Build vhost library. CONFIG_VHOST=y -CONFIG_VHOST_INTERNAL_LIB=n # Build vhost initiator (Virtio) driver. CONFIG_VIRTIO=y diff --git a/app/spdk_tgt/Makefile b/app/spdk_tgt/Makefile index c69a1688d5..ea08539d4a 100644 --- a/app/spdk_tgt/Makefile +++ b/app/spdk_tgt/Makefile @@ -44,9 +44,6 @@ SPDK_LIB_LIST = $(ALL_MODULES_LIST) ifeq ($(OS),Linux) ifeq ($(CONFIG_VHOST),y) SPDK_LIB_LIST += vhost event_vhost -ifeq ($(CONFIG_VHOST_INTERNAL_LIB),y) -SPDK_LIB_LIST += rte_vhost -endif endif endif diff --git a/app/vhost/Makefile b/app/vhost/Makefile index 0954e223c8..c4228562c5 100644 --- a/app/vhost/Makefile +++ b/app/vhost/Makefile @@ -42,10 +42,6 @@ C_SRCS := vhost.c SPDK_LIB_LIST = $(ALL_MODULES_LIST) SPDK_LIB_LIST += vhost event_vhost -ifeq ($(CONFIG_VHOST_INTERNAL_LIB),y) -SPDK_LIB_LIST += rte_vhost -endif - SPDK_LIB_LIST += $(EVENT_BDEV_SUBSYSTEM) event_net event_scsi event SPDK_LIB_LIST += jsonrpc json rpc bdev scsi accel trace conf SPDK_LIB_LIST += thread util log diff --git a/configure b/configure index 136a2c1d3e..55e4cb163c 100755 --- a/configure +++ b/configure @@ -59,9 +59,6 @@ function usage() echo " default: /usr/src/fio" echo " vhost Build vhost target. Enabled by default." echo " No path required." - echo " internal-vhost-lib Use the internal copy of rte_vhost. By default, the upstream" - echo " rte_vhost from DPDK will be used." - echo " No path required." echo " virtio Build vhost initiator and virtio-pci bdev modules." echo " No path required." echo " pmdk Build persistent memory bdev." @@ -155,7 +152,6 @@ fi if [[ $sys_name == "FreeBSD" ]]; then # Vhost, rte_vhost library and virtio are only supported on Linux. CONFIG[VHOST]="n" - CONFIG[VHOST_INTERNAL_LIB]="n" CONFIG[VIRTIO]="n" echo "Notice: Vhost, rte_vhost library and virtio are only supported on Linux. Turning off default feature." fi @@ -331,12 +327,6 @@ for i in "$@"; do --without-vhost) CONFIG[VHOST]=n ;; - --with-internal-vhost-lib) - CONFIG[VHOST_INTERNAL_LIB]=y - ;; - --without-internal-vhost-lib) - CONFIG[VHOST_INTERNAL_LIB]=n - ;; --with-virtio) CONFIG[VIRTIO]=y ;; @@ -500,18 +490,17 @@ if [ -z "${CONFIG[ENV]}" ]; then echo "Using default DPDK in ${CONFIG[DPDK_DIR]}" fi - if [[ "${CONFIG[VHOST]}" = "y" ]] && [[ "${CONFIG[VHOST_INTERNAL_LIB]}" = "n" ]]; then + if [ "${CONFIG[VHOST]}" = "y" ]; then # We lookup "common_linux" file to check if DPDK version is >= 19.05. # "common_linux" is available since exactly DPDK 19.05 - it was renamed # from "common_linuxapp". if [ ! -f "$rootdir"/dpdk/config/common_linux ]; then - echo "Notice: Using internal, legacy rte_vhost library due to DPDK" \ - "version < 19.05" - CONFIG[VHOST_INTERNAL_LIB]=y + echo "ERROR: Vhost only supports DPDK >= 19.05. Please use newer DPDK or disable vhost library --without-vhost." + exit 1 fi fi else - if [[ "${CONFIG[VHOST]}" = "y" ]] && [[ "${CONFIG[VHOST_INTERNAL_LIB]}" = "n" ]]; then + if [ "${CONFIG[VHOST]}" = "y" ]; then # DPDK must be already built, so we can simply try to use the new rte_vhost. # It has a number of internal dependencies though, so don't try to link the # program, just compile it @@ -519,9 +508,8 @@ if [ -z "${CONFIG[ENV]}" ]; then 'int main(void) { return rte_vhost_extern_callback_register(0, NULL, NULL); }\n' \ | ${BUILD_CMD[@]} -c -Wno-deprecated-declarations -Werror \ -I"${CONFIG[DPDK_DIR]}/include" - &>/dev/null; then - echo "Notice: DPDK's rte_vhost not found or version < 19.05, using internal," \ - "legacy rte_vhost library." - CONFIG[VHOST_INTERNAL_LIB]=y + echo "ERROR: Vhost only supports DPDK >= 19.05. Please use newer DPDK or disable vhost library --without-vhost." + exit 1 fi fi fi @@ -568,10 +556,6 @@ if [[ $sys_name == "FreeBSD" ]]; then echo "Vhost is only supported on Linux." exit 1 fi - if [[ "${CONFIG[VHOST_INTERNAL_LIB]}" == "y" ]]; then - echo "Internal rte_vhost library is only supported on Linux." - exit 1 - fi if [[ "${CONFIG[VIRTIO]}" == "y" ]]; then echo "Virtio is only supported on Linux." exit 1 diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index debc5799e2..6ea5ae53e4 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -5732,83 +5732,6 @@ Example response: } ~~~ -## vhost_create_nvme_controller {#rpc_vhost_create_nvme_controller} - -Construct empty vhost NVMe controller. - -### Parameters - -Name | Optional | Type | Description ------------------------ | -------- | ----------- | ----------- -ctrlr | Required | string | Controller name -io_queues | Required | number | Number between 1 and 31 of IO queues for the controller -cpumask | Optional | string | @ref cpu_mask for this controller - -### Example - -Example request: - -~~~ -{ - "params": { - "cpumask": "0x2", - "io_queues": 4, - "ctrlr": "VhostNvme0" - }, - "jsonrpc": "2.0", - "method": "vhost_create_nvme_controller", - "id": 1 -} -~~~ - -Example response: - -~~~ -{ - "jsonrpc": "2.0", - "id": 1, - "result": true -} -~~~ - -## vhost_nvme_controller_add_ns {#rpc_vhost_nvme_controller_add_ns} - -Add namespace backed by `bdev_name` - -### Parameters - -Name | Optional | Type | Description ------------------------ | -------- | ----------- | ----------- -ctrlr | Required | string | Controller name -bdev_name | Required | string | Name of bdev to expose as a namespace -cpumask | Optional | string | @ref cpu_mask for this controller - -### Example - -Example request: - -~~~ -{ - "params": { - "bdev_name": "Malloc0", - "ctrlr": "VhostNvme0" - }, - "jsonrpc": "2.0", - "method": "vhost_nvme_controller_add_ns", - "id": 1 -} -~~~ - -Example response: - -~~~ -{ - "jsonrpc": "2.0", - "id": 1, - "result": true -} -~~~ - ## vhost_create_blk_controller {#rpc_vhost_create_blk_controller} Create vhost block controller diff --git a/doc/vhost.md b/doc/vhost.md index 71710441d2..6d834b546d 100644 --- a/doc/vhost.md +++ b/doc/vhost.md @@ -171,26 +171,6 @@ extra `-r` or `--readonly` parameter. scripts/rpc.py vhost_create_blk_controller --cpumask 0x1 -r vhost.1 Malloc0 ~~~ -### Vhost-NVMe (experimental) - -The following RPC will attach the Malloc0 bdev to the vhost.0 vhost-nvme -controller. Malloc0 will appear as Namespace 1 of vhost.0 controller. Users -can use `--cpumask` parameter to specify which cores should be used for this -controller. Users must specify the maximum I/O queues supported for the -controller, at least 1 Namespace is required for each controller. - -~~~{.sh} -$rpc_py vhost_create_nvme_controller --cpumask 0x1 vhost.2 16 -$rpc_py vhost_nvme_controller_add_ns vhost.2 Malloc0 -~~~ - -Users can use the following command to remove the controller, all the block -devices attached to controller's Namespace will be removed automatically. - -~~~{.sh} -$rpc_py vhost_delete_controller vhost.2 -~~~ - ## QEMU {#vhost_qemu_config} Now the virtual machine can be started with QEMU. The following command-line @@ -229,13 +209,6 @@ Finally, specify the SPDK vhost devices: -device vhost-user-blk-pci,id=blk0,chardev=char1 ~~~ -### Vhost-NVMe (experimental) - -~~~{.sh} --chardev socket,id=char2,path=/var/tmp/vhost.2 --device vhost-user-nvme,id=nvme0,chardev=char2,num_io_queues=4 -~~~ - ## Example output {#vhost_example} This example uses an NVMe bdev alongside Mallocs. SPDK vhost application is started diff --git a/lib/Makefile b/lib/Makefile index 9efad8fbb3..57f5e49e33 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -47,7 +47,6 @@ DIRS-$(CONFIG_IDXD) += idxd DIRS-$(CONFIG_VHOST) += vhost DIRS-$(CONFIG_VIRTIO) += virtio DIRS-$(CONFIG_REDUCE) += reduce -DIRS-$(CONFIG_VHOST_INTERNAL_LIB) += rte_vhost DIRS-$(CONFIG_RDMA) += rdma # If CONFIG_ENV is pointing at a directory in lib, build it. diff --git a/lib/env_dpdk/env.mk b/lib/env_dpdk/env.mk index 547fcf9bdf..f5e048a7e2 100644 --- a/lib/env_dpdk/env.mk +++ b/lib/env_dpdk/env.mk @@ -80,14 +80,12 @@ endif LINK_HASH=n ifeq ($(CONFIG_VHOST),y) -ifneq ($(CONFIG_VHOST_INTERNAL_LIB),y) DPDK_LIB_LIST += rte_vhost rte_net LINK_HASH=y ifneq ($(DPDK_FRAMEWORK),y) DPDK_LIB_LIST += rte_cryptodev endif endif -endif ifeq ($(CONFIG_RAID5),y) LINK_HASH=y diff --git a/lib/rte_vhost/Makefile b/lib/rte_vhost/Makefile deleted file mode 100644 index aa073c6caf..0000000000 --- a/lib/rte_vhost/Makefile +++ /dev/null @@ -1,50 +0,0 @@ -# -# BSD LICENSE -# -# Copyright (c) Intel Corporation. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# - -SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) -include $(SPDK_ROOT_DIR)/mk/spdk.common.mk - -SO_VER := 2 -SO_MINOR := 0 - -CFLAGS += -I. -CFLAGS += $(ENV_CFLAGS) -CFLAGS += -include rte_config.h -CFLAGS += -Wno-address-of-packed-member - -# These are the DPDK vhost files copied (for now) into SPDK -C_SRCS += fd_man.c socket.c vhost_user.c vhost.c - -LIBNAME = rte_vhost - -include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/rte_vhost/fd_man.c b/lib/rte_vhost/fd_man.c deleted file mode 100644 index 2ceacc9abd..0000000000 --- a/lib/rte_vhost/fd_man.c +++ /dev/null @@ -1,300 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "fd_man.h" - -#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL) - -static int -get_last_valid_idx(struct fdset *pfdset, int last_valid_idx) -{ - int i; - - for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--) - ; - - return i; -} - -static void -fdset_move(struct fdset *pfdset, int dst, int src) -{ - pfdset->fd[dst] = pfdset->fd[src]; - pfdset->rwfds[dst] = pfdset->rwfds[src]; -} - -static void -fdset_shrink_nolock(struct fdset *pfdset) -{ - int i; - int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1); - - for (i = 0; i < last_valid_idx; i++) { - if (pfdset->fd[i].fd != -1) - continue; - - fdset_move(pfdset, i, last_valid_idx); - last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1); - } - pfdset->num = last_valid_idx + 1; -} - -/* - * Find deleted fd entries and remove them - */ -static void -fdset_shrink(struct fdset *pfdset) -{ - pthread_mutex_lock(&pfdset->fd_mutex); - fdset_shrink_nolock(pfdset); - pthread_mutex_unlock(&pfdset->fd_mutex); -} - -/** - * Returns the index in the fdset for a given fd. - * @return - * index for the fd, or -1 if fd isn't in the fdset. - */ -static int -fdset_find_fd(struct fdset *pfdset, int fd) -{ - int i; - - for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++) - ; - - return i == pfdset->num ? -1 : i; -} - -static void -fdset_add_fd(struct fdset *pfdset, int idx, int fd, - fd_cb rcb, fd_cb wcb, void *dat) -{ - struct fdentry *pfdentry = &pfdset->fd[idx]; - struct pollfd *pfd = &pfdset->rwfds[idx]; - - pfdentry->fd = fd; - pfdentry->rcb = rcb; - pfdentry->wcb = wcb; - pfdentry->dat = dat; - - pfd->fd = fd; - pfd->events = rcb ? POLLIN : 0; - pfd->events |= wcb ? POLLOUT : 0; - pfd->revents = 0; -} - -void -fdset_init(struct fdset *pfdset) -{ - int i; - - if (pfdset == NULL) - return; - - for (i = 0; i < MAX_FDS; i++) { - pfdset->fd[i].fd = -1; - pfdset->fd[i].dat = NULL; - } - pfdset->num = 0; -} - -/** - * Register the fd in the fdset with read/write handler and context. - */ -int -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) -{ - int i; - - if (pfdset == NULL || fd == -1) - return -1; - - pthread_mutex_lock(&pfdset->fd_mutex); - i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; - if (i == -1) { - fdset_shrink_nolock(pfdset); - i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; - if (i == -1) { - pthread_mutex_unlock(&pfdset->fd_mutex); - return -2; - } - } - - fdset_add_fd(pfdset, i, fd, rcb, wcb, dat); - pthread_mutex_unlock(&pfdset->fd_mutex); - - return 0; -} - -/** - * Unregister the fd from the fdset. - * Returns context of a given fd or NULL. - */ -void * -fdset_del(struct fdset *pfdset, int fd) -{ - int i; - void *dat = NULL; - - if (pfdset == NULL || fd == -1) - return NULL; - - do { - pthread_mutex_lock(&pfdset->fd_mutex); - - i = fdset_find_fd(pfdset, fd); - if (i != -1 && pfdset->fd[i].busy == 0) { - /* busy indicates r/wcb is executing! */ - dat = pfdset->fd[i].dat; - pfdset->fd[i].fd = -1; - pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; - pfdset->fd[i].dat = NULL; - i = -1; - } - pthread_mutex_unlock(&pfdset->fd_mutex); - } while (i != -1); - - return dat; -} - - -/** - * This functions runs in infinite blocking loop until there is no fd in - * pfdset. It calls corresponding r/w handler if there is event on the fd. - * - * Before the callback is called, we set the flag to busy status; If other - * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it - * will wait until the flag is reset to zero(which indicates the callback is - * finished), then it could free the context after fdset_del. - */ -void * -fdset_event_dispatch(void *arg) -{ - int i; - struct pollfd *pfd; - struct fdentry *pfdentry; - fd_cb rcb, wcb; - void *dat; - int fd, numfds; - int remove1, remove2; - int need_shrink; - struct fdset *pfdset = arg; - - if (pfdset == NULL) - return NULL; - - while (1) { - - /* - * When poll is blocked, other threads might unregister - * listenfds from and register new listenfds into fdset. - * When poll returns, the entries for listenfds in the fdset - * might have been updated. It is ok if there is unwanted call - * for new listenfds. - */ - pthread_mutex_lock(&pfdset->fd_mutex); - numfds = pfdset->num; - pthread_mutex_unlock(&pfdset->fd_mutex); - - poll(pfdset->rwfds, numfds, 1000 /* millisecs */); - - need_shrink = 0; - for (i = 0; i < numfds; i++) { - pthread_mutex_lock(&pfdset->fd_mutex); - - pfdentry = &pfdset->fd[i]; - fd = pfdentry->fd; - pfd = &pfdset->rwfds[i]; - - if (fd < 0) { - need_shrink = 1; - pthread_mutex_unlock(&pfdset->fd_mutex); - continue; - } - - if (!pfd->revents) { - pthread_mutex_unlock(&pfdset->fd_mutex); - continue; - } - - remove1 = remove2 = 0; - - rcb = pfdentry->rcb; - wcb = pfdentry->wcb; - dat = pfdentry->dat; - pfdentry->busy = 1; - - pthread_mutex_unlock(&pfdset->fd_mutex); - - if (rcb && pfd->revents & (POLLIN | FDPOLLERR)) - rcb(fd, dat, &remove1); - if (wcb && pfd->revents & (POLLOUT | FDPOLLERR)) - wcb(fd, dat, &remove2); - pfdentry->busy = 0; - /* - * fdset_del needs to check busy flag. - * We don't allow fdset_del to be called in callback - * directly. - */ - /* - * When we are to clean up the fd from fdset, - * because the fd is closed in the cb, - * the old fd val could be reused by when creates new - * listen fd in another thread, we couldn't call - * fd_set_del. - */ - if (remove1 || remove2) { - pfdentry->fd = -1; - need_shrink = 1; - } - } - - if (need_shrink) - fdset_shrink(pfdset); - } - - return NULL; -} diff --git a/lib/rte_vhost/fd_man.h b/lib/rte_vhost/fd_man.h deleted file mode 100644 index 3a9d269b31..0000000000 --- a/lib/rte_vhost/fd_man.h +++ /dev/null @@ -1,69 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _FD_MAN_H_ -#define _FD_MAN_H_ -#include -#include -#include - -#define MAX_FDS 1024 - -typedef void (*fd_cb)(int fd, void *dat, int *remove); - -struct fdentry { - int fd; /* -1 indicates this entry is empty */ - fd_cb rcb; /* callback when this fd is readable. */ - fd_cb wcb; /* callback when this fd is writeable. */ - void *dat; /* fd context */ - int busy; /* whether this entry is being used in cb. */ -}; - -struct fdset { - struct pollfd rwfds[MAX_FDS]; - struct fdentry fd[MAX_FDS]; - pthread_mutex_t fd_mutex; - int num; /* current fd number of this fdset */ -}; - - -void fdset_init(struct fdset *pfdset); - -int fdset_add(struct fdset *pfdset, int fd, - fd_cb rcb, fd_cb wcb, void *dat); - -void *fdset_del(struct fdset *pfdset, int fd); - -void *fdset_event_dispatch(void *arg); - -#endif diff --git a/lib/rte_vhost/rte_vhost.h b/lib/rte_vhost/rte_vhost.h deleted file mode 100644 index b1b7f2cd80..0000000000 --- a/lib/rte_vhost/rte_vhost.h +++ /dev/null @@ -1,635 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_VHOST_H_ -#define _RTE_VHOST_H_ - -/** - * @file - * Interface to vhost-user - */ - -#include -#include -#include -#include - -#include -#include -#include - -#define RTE_VHOST_USER_CLIENT (1ULL << 0) -#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1) -#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) - -/** - * Information relating to memory regions including offsets to - * addresses in QEMUs memory file. - */ -struct rte_vhost_mem_region { - uint64_t guest_phys_addr; - uint64_t guest_user_addr; - uint64_t host_user_addr; - uint64_t size; - void *mmap_addr; - uint64_t mmap_size; - int fd; -}; - -/** - * Memory structure includes region and mapping information. - */ -struct rte_vhost_memory { - uint32_t nregions; - struct rte_vhost_mem_region regions[0]; -}; - -struct rte_vhost_inflight_desc_split { - uint8_t inflight; - uint8_t padding[5]; - uint16_t next; - uint64_t counter; -}; - -struct rte_vhost_inflight_info_split { - uint64_t features; - uint16_t version; - uint16_t desc_num; - uint16_t last_inflight_io; - uint16_t used_idx; - struct rte_vhost_inflight_desc_split desc[0]; -}; - -struct rte_vhost_resubmit_desc { - uint16_t index; - uint64_t counter; -}; - -struct rte_vhost_resubmit_info { - struct rte_vhost_resubmit_desc *resubmit_list; - uint16_t resubmit_num; -}; - -struct rte_vhost_ring_inflight { - struct rte_vhost_inflight_info_split *inflight_split; - struct rte_vhost_resubmit_info *resubmit_inflight; -}; - -struct rte_vhost_vring { - union { - struct vring_desc *desc; - struct vring_packed_desc *desc_packed; - }; - union { - struct vring_avail *avail; - struct vring_packed_desc_event *driver_event; - }; - union { - struct vring_used *used; - struct vring_packed_desc_event *device_event; - }; - uint64_t log_guest_addr; - - int callfd; - int kickfd; - uint16_t size; -}; - -/** - * Device and vring operations. - */ -struct vhost_device_ops { - int (*new_device)(int vid); /**< Add device. */ - void (*destroy_device)(int vid); /**< Remove device. */ - - int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */ - - /** - * Features could be changed after the feature negotiation. - * For example, VHOST_F_LOG_ALL will be set/cleared at the - * start/end of live migration, respectively. This callback - * is used to inform the application on such change. - */ - int (*features_changed)(int vid, uint64_t features); - int (*vhost_nvme_admin_passthrough)(int vid, void *cmd, void *cqe, void *buf); - int (*vhost_nvme_set_cq_call)(int vid, uint16_t qid, int fd); - int (*vhost_nvme_set_bar_mr)(int vid, void *bar_addr, uint64_t bar_size); - int (*vhost_nvme_get_cap)(int vid, uint64_t *cap); - - int (*new_connection)(int vid); - void (*destroy_connection)(int vid); - - int (*get_config)(int vid, uint8_t *config, uint32_t config_len); - int (*set_config)(int vid, uint8_t *config, uint32_t offset, - uint32_t len, uint32_t flags); - - void *reserved[2]; /**< Reserved for future extension */ -}; - -/** - * Convert guest physical address to host virtual address - * - * @param mem - * the guest memory regions - * @param gpa - * the guest physical address for querying - * @return - * the host virtual address on success, 0 on failure - */ -static inline uint64_t __attribute__((always_inline)) -rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa) -{ - struct rte_vhost_mem_region *reg; - uint32_t i; - - for (i = 0; i < mem->nregions; i++) { - reg = &mem->regions[i]; - if (gpa >= reg->guest_phys_addr && - gpa < reg->guest_phys_addr + reg->size) { - return gpa - reg->guest_phys_addr + - reg->host_user_addr; - } - } - - return 0; -} - -/** - * Convert guest physical address to host virtual address safely - * - * This variant of rte_vhost_gpa_to_vva() takes care all the - * requested length is mapped and contiguous in process address - * space. - * - * @param mem - * the guest memory regions - * @param gpa - * the guest physical address for querying - * @param len - * the size of the requested area to map, - * updated with actual size mapped - * @return - * the host virtual address on success, 0 on failure */ -static inline uint64_t -rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem, - uint64_t gpa, uint64_t *len) -{ - struct rte_vhost_mem_region *r; - uint32_t i; - - for (i = 0; i < mem->nregions; i++) { - r = &mem->regions[i]; - if (gpa >= r->guest_phys_addr && - gpa < r->guest_phys_addr + r->size) { - - if (unlikely(*len > r->guest_phys_addr + r->size - gpa)) - *len = r->guest_phys_addr + r->size - gpa; - - return gpa - r->guest_phys_addr + - r->host_user_addr; - } - } - *len = 0; - - return 0; -} - -#define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL)) - -/** - * Log the memory write start with given address. - * - * This function only need be invoked when the live migration starts. - * Therefore, we won't need call it at all in the most of time. For - * making the performance impact be minimum, it's suggested to do a - * check before calling it: - * - * if (unlikely(RTE_VHOST_NEED_LOG(features))) - * rte_vhost_log_write(vid, addr, len); - * - * @param vid - * vhost device ID - * @param addr - * the starting address for write - * @param len - * the length to write - */ -void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len); - -/** - * Log the used ring update start at given offset. - * - * Same as rte_vhost_log_write, it's suggested to do a check before - * calling it: - * - * if (unlikely(RTE_VHOST_NEED_LOG(features))) - * rte_vhost_log_used_vring(vid, vring_idx, offset, len); - * - * @param vid - * vhost device ID - * @param vring_idx - * the vring index - * @param offset - * the offset inside the used ring - * @param len - * the length to write - */ -void rte_vhost_log_used_vring(int vid, uint16_t vring_idx, - uint64_t offset, uint64_t len); - -int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable); - -/** - * Register vhost driver. path could be different for multiple - * instance support. - */ -int rte_vhost_driver_register(const char *path, uint64_t flags); - -/* Unregister vhost driver. This is only meaningful to vhost user. */ -int rte_vhost_driver_unregister(const char *path); - -/** - * Set the feature bits the vhost-user driver supports. - * - * @param path - * The vhost-user socket file path - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_driver_set_features(const char *path, uint64_t features); - -/** - * Enable vhost-user driver features. - * - * Note that - * - the param @features should be a subset of the feature bits provided - * by rte_vhost_driver_set_features(). - * - it must be invoked before vhost-user negotiation starts. - * - * @param path - * The vhost-user socket file path - * @param features - * Features to enable - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_driver_enable_features(const char *path, uint64_t features); - -/** - * Disable vhost-user driver features. - * - * The two notes at rte_vhost_driver_enable_features() also apply here. - * - * @param path - * The vhost-user socket file path - * @param features - * Features to disable - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_driver_disable_features(const char *path, uint64_t features); - -/** - * Get the feature bits before feature negotiation. - * - * @param path - * The vhost-user socket file path - * @param features - * A pointer to store the queried feature bits - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_driver_get_features(const char *path, uint64_t *features); - -/** - * Get the feature bits after negotiation - * - * @param vid - * Vhost device ID - * @param features - * A pointer to store the queried feature bits - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_get_negotiated_features(int vid, uint64_t *features); - -/* Register callbacks. */ -int rte_vhost_driver_callback_register(const char *path, - struct vhost_device_ops const * const ops); - -/** - * - * Start the vhost-user driver. - * - * This function triggers the vhost-user negotiation. - * - * @param path - * The vhost-user socket file path - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_driver_start(const char *path); - -/** - * Get the MTU value of the device if set in QEMU. - * - * @param vid - * virtio-net device ID - * @param mtu - * The variable to store the MTU value - * - * @return - * 0: success - * -EAGAIN: device not yet started - * -ENOTSUP: device does not support MTU feature - */ -int rte_vhost_get_mtu(int vid, uint16_t *mtu); - -/** - * Get the numa node from which the virtio net device's memory - * is allocated. - * - * @param vid - * vhost device ID - * - * @return - * The numa node, -1 on failure - */ -int rte_vhost_get_numa_node(int vid); - -/** - * Get the virtio net device's ifname, which is the vhost-user socket - * file path. - * - * @param vid - * vhost device ID - * @param buf - * The buffer to stored the queried ifname - * @param len - * The length of buf - * - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_get_ifname(int vid, char *buf, size_t len); - -/** - * Get how many avail entries are left in the queue - * - * @param vid - * vhost device ID - * @param queue_id - * virtio queue index - * - * @return - * num of avail entires left - */ -uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id); - -struct rte_mbuf; -struct rte_mempool; -/** - * This function adds buffers to the virtio devices RX virtqueue. Buffers can - * be received from the physical port or from another virtual device. A packet - * count is returned to indicate the number of packets that were succesfully - * added to the RX queue. - * @param vid - * vhost device ID - * @param queue_id - * virtio queue index in mq case - * @param pkts - * array to contain packets to be enqueued - * @param count - * packets num to be enqueued - * @return - * num of packets enqueued - */ -uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id, - struct rte_mbuf **pkts, uint16_t count); - -/** - * This function gets guest buffers from the virtio device TX virtqueue, - * construct host mbufs, copies guest buffer content to host mbufs and - * store them in pkts to be processed. - * @param vid - * vhost device ID - * @param queue_id - * virtio queue index in mq case - * @param mbuf_pool - * mbuf_pool where host mbuf is allocated. - * @param pkts - * array to contain packets to be dequeued - * @param count - * packets num to be dequeued - * @return - * num of packets dequeued - */ -uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, - struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count); - -/** - * Get guest mem table: a list of memory regions. - * - * An rte_vhost_vhost_memory object will be allocated internaly, to hold the - * guest memory regions. Application should free it at destroy_device() - * callback. - * - * @param vid - * vhost device ID - * @param mem - * To store the returned mem regions - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem); - -/** - * Get guest vring info, including the vring address, vring size, etc. - * - * @param vid - * vhost device ID - * @param vring_idx - * vring index - * @param vring - * the structure to hold the requested vring info - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, - struct rte_vhost_vring *vring); - -/** - * Set id of the last descriptors in avail and used guest vrings. - * - * In case user application operates directly on buffers, it should use this - * function on device destruction to retrieve the same values later on in device - * creation via rte_vhost_get_vhost_vring(int, uint16_t, struct rte_vhost_vring *) - * - * @param vid - * vhost device ID - * @param vring_idx - * vring index - * @param last_avail_idx - * id of the last descriptor in avail ring to be set - * @param last_used_idx - * id of the last descriptor in used ring to be set - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_set_vring_base(int vid, uint16_t queue_id, - uint16_t last_avail_idx, uint16_t last_used_idx); - -int rte_vhost_get_vring_base(int vid, uint16_t queue_id, - uint16_t *last_avail_idx, uint16_t *last_used_idx); - -/** - * Notify the guest that used descriptors have been added to the vring. - * - * @param vid - * vhost device ID - * @param vring_idx - * vring index - * @return - * 0 on success, -1 on failure - */ -int rte_vhost_vring_call(int vid, uint16_t vring_idx); - -/** - * Get guest inflight vring info, including inflight ring and resubmit list. - * - * @param vid - * vhost device ID - * @param vring_idx - * vring index - * @param vring - * the structure to hold the requested inflight vring info - * @return - * 0 on success, -1 on failure - */ -__rte_experimental -int -rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx, - struct rte_vhost_ring_inflight *vring); - -/** - * Set split inflight descriptor. - * - * This function save descriptors that has been comsumed in available - * ring - * - * @param vid - * vhost device ID - * @param vring_idx - * vring index - * @param idx - * inflight entry index - * @return - * 0 on success, -1 on failure - */ -__rte_experimental -int -rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, - uint16_t idx); - -/** - * Save the head of list that the last batch of used descriptors. - * - * @param vid - * vhost device ID - * @param vring_idx - * vring index - * @param idx - * descriptor entry index - * @return - * 0 on success, -1 on failure - */ -__rte_experimental -int -rte_vhost_set_last_inflight_io_split(int vid, - uint16_t vring_idx, uint16_t idx); - -/** - * Clear the split inflight status. - * - * @param vid - * vhost device ID - * @param vring_idx - * vring index - * @param last_used_idx - * last used idx of used ring - * @param idx - * inflight entry index - * @return - * 0 on success, -1 on failure - */ -__rte_experimental -int -rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, - uint16_t last_used_idx, uint16_t idx); - -/** - * Save the head of list that the last batch of used descriptors. - * - * @param vid - * vhost device ID - * @param vring_idx - * vring index - * @param idx - * descriptor entry index - * @return - * 0 on success, -1 on failure - */ -__rte_experimental -int -rte_vhost_set_last_inflight_io_split(int vid, - uint16_t vring_idx, uint16_t idx); - -/** - * Clear the split inflight status. - * - * @param vid - * vhost device ID - * @param vring_idx - * vring index - * @param last_used_idx - * last used idx of used ring - * @param idx - * inflight entry index - * @return - * 0 on success, -1 on failure - */ -__rte_experimental -int -rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, - uint16_t last_used_idx, uint16_t idx); -#endif /* _RTE_VHOST_H_ */ diff --git a/lib/rte_vhost/socket.c b/lib/rte_vhost/socket.c deleted file mode 100644 index ec923518be..0000000000 --- a/lib/rte_vhost/socket.c +++ /dev/null @@ -1,841 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "fd_man.h" -#include "vhost.h" -#include "vhost_user.h" - - -TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); - -/* - * Every time rte_vhost_driver_register() is invoked, an associated - * vhost_user_socket struct will be created. - */ -struct vhost_user_socket { - struct vhost_user_connection_list conn_list; - pthread_mutex_t conn_mutex; - char *path; - int socket_fd; - struct sockaddr_un un; - bool is_server; - bool reconnect; - bool dequeue_zero_copy; - - /* - * The "supported_features" indicates the feature bits the - * vhost driver supports. The "features" indicates the feature - * bits after the rte_vhost_driver_features_disable/enable(). - * It is also the final feature bits used for vhost-user - * features negotiation. - */ - uint64_t supported_features; - uint64_t features; - - struct vhost_device_ops const *notify_ops; -}; - -struct vhost_user_connection { - struct vhost_user_socket *vsocket; - int connfd; - int vid; - - TAILQ_ENTRY(vhost_user_connection) next; -}; - -#define MAX_VHOST_SOCKET 1024 -struct vhost_user { - struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; - struct fdset fdset; - int vsocket_cnt; - pthread_mutex_t mutex; -}; - -#define MAX_VIRTIO_BACKLOG 128 - -static void vhost_user_server_new_connection(int fd, void *data, int *remove); -static void vhost_user_read_cb(int fd, void *dat, int *remove); -static int create_unix_socket(struct vhost_user_socket *vsocket); -static int vhost_user_start_client(struct vhost_user_socket *vsocket); - -static struct vhost_user vhost_user = { - .fdset = { - .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, - .fd_mutex = PTHREAD_MUTEX_INITIALIZER, - .num = 0 - }, - .vsocket_cnt = 0, - .mutex = PTHREAD_MUTEX_INITIALIZER, -}; - -/* return bytes# of read on success or negative val on failure. */ -int -read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) -{ - struct iovec iov; - struct msghdr msgh; - size_t fdsize = fd_num * sizeof(int); - char control[CMSG_SPACE(fdsize)]; - struct cmsghdr *cmsg; - int ret; - - memset(&msgh, 0, sizeof(msgh)); - iov.iov_base = buf; - iov.iov_len = buflen; - - msgh.msg_iov = &iov; - msgh.msg_iovlen = 1; - msgh.msg_control = control; - msgh.msg_controllen = sizeof(control); - - ret = recvmsg(sockfd, &msgh, 0); - if (ret <= 0) { - if (ret) - RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed, %s\n", strerror(errno)); - else - RTE_LOG(INFO, VHOST_CONFIG, "peer closed\n"); - return ret; - } - - if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { - RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n"); - return -1; - } - - for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msgh, cmsg)) { - if ((cmsg->cmsg_level == SOL_SOCKET) && - (cmsg->cmsg_type == SCM_RIGHTS)) { - memcpy(fds, CMSG_DATA(cmsg), fdsize); - break; - } - } - - return ret; -} - -int -send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) -{ - - struct iovec iov; - struct msghdr msgh; - size_t fdsize = fd_num * sizeof(int); - char control[CMSG_SPACE(fdsize)]; - struct cmsghdr *cmsg; - int ret; - - memset(&msgh, 0, sizeof(msgh)); - iov.iov_base = buf; - iov.iov_len = buflen; - - msgh.msg_iov = &iov; - msgh.msg_iovlen = 1; - - if (fds && fd_num > 0) { - msgh.msg_control = control; - msgh.msg_controllen = sizeof(control); - cmsg = CMSG_FIRSTHDR(&msgh); - if (cmsg == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n"); - errno = EINVAL; - return -1; - } - cmsg->cmsg_len = CMSG_LEN(fdsize); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - memcpy(CMSG_DATA(cmsg), fds, fdsize); - } else { - msgh.msg_control = NULL; - msgh.msg_controllen = 0; - } - - do { - ret = sendmsg(sockfd, &msgh, 0); - } while (ret < 0 && errno == EINTR); - - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); - return ret; - } - - return ret; -} - -static void -vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) -{ - int vid; - size_t size; - struct vhost_user_connection *conn; - int ret; - - conn = malloc(sizeof(*conn)); - if (conn == NULL) { - close(fd); - return; - } - - vid = vhost_new_device(vsocket->features, vsocket->notify_ops); - if (vid == -1) { - goto err; - } - - size = strnlen(vsocket->path, PATH_MAX); - vhost_set_ifname(vid, vsocket->path, size); - - if (vsocket->dequeue_zero_copy) - vhost_enable_dequeue_zero_copy(vid); - - RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); - - if (vsocket->notify_ops->new_connection) { - ret = vsocket->notify_ops->new_connection(vid); - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to add vhost user connection with fd %d\n", - fd); - goto err; - } - } - - conn->connfd = fd; - conn->vsocket = vsocket; - conn->vid = vid; - ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, - NULL, conn); - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to add fd %d into vhost server fdset\n", - fd); - - if (vsocket->notify_ops->destroy_connection) - vsocket->notify_ops->destroy_connection(conn->vid); - - goto err; - } - - pthread_mutex_lock(&vsocket->conn_mutex); - TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); - pthread_mutex_unlock(&vsocket->conn_mutex); - return; - -err: - free(conn); - close(fd); -} - -/* call back when there is new vhost-user connection from client */ -static void -vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) -{ - struct vhost_user_socket *vsocket = dat; - - fd = accept(fd, NULL, NULL); - if (fd < 0) - return; - - RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd); - vhost_user_add_connection(fd, vsocket); -} - -static void -vhost_user_read_cb(int connfd, void *dat, int *remove) -{ - struct vhost_user_connection *conn = dat; - struct vhost_user_socket *vsocket = conn->vsocket; - int ret; - - ret = vhost_user_msg_handler(conn->vid, connfd); - if (ret < 0) { - *remove = 1; - vhost_destroy_device(conn->vid); - - if (vsocket->notify_ops->destroy_connection) - vsocket->notify_ops->destroy_connection(conn->vid); - - pthread_mutex_lock(&vsocket->conn_mutex); - TAILQ_REMOVE(&vsocket->conn_list, conn, next); - if (conn->connfd != -1) { - close(conn->connfd); - conn->connfd = -1; - } - pthread_mutex_unlock(&vsocket->conn_mutex); - - free(conn); - - if (vsocket->reconnect) { - create_unix_socket(vsocket); - vhost_user_start_client(vsocket); - } - } -} - -static int -create_unix_socket(struct vhost_user_socket *vsocket) -{ - int fd; - struct sockaddr_un *un = &vsocket->un; - - fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (fd < 0) - return -1; - RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", - vsocket->is_server ? "server" : "client", fd); - - if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { - RTE_LOG(ERR, VHOST_CONFIG, - "vhost-user: can't set nonblocking mode for socket, fd: " - "%d (%s)\n", fd, strerror(errno)); - close(fd); - return -1; - } - - memset(un, 0, sizeof(*un)); - un->sun_family = AF_UNIX; - strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path)); - un->sun_path[sizeof(un->sun_path) - 1] = '\0'; - - vsocket->socket_fd = fd; - return 0; -} - -static int -vhost_user_start_server(struct vhost_user_socket *vsocket) -{ - int ret; - int fd = vsocket->socket_fd; - const char *path = vsocket->path; - - ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to bind to %s: %s; remove it and try again\n", - path, strerror(errno)); - goto err; - } - RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); - - ret = listen(fd, MAX_VIRTIO_BACKLOG); - if (ret < 0) - goto err; - - ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, - NULL, vsocket); - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to add listen fd %d to vhost server fdset\n", - fd); - goto err; - } - - return 0; - -err: - close(fd); - return -1; -} - -struct vhost_user_reconnect { - struct sockaddr_un un; - int fd; - struct vhost_user_socket *vsocket; - - TAILQ_ENTRY(vhost_user_reconnect) next; -}; - -TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); -struct vhost_user_reconnect_list { - struct vhost_user_reconnect_tailq_list head; - pthread_mutex_t mutex; -}; - -static struct vhost_user_reconnect_list reconn_list; -static pthread_t reconn_tid; - -static int -vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz) -{ - int ret, flags; - - ret = connect(fd, un, sz); - if (ret < 0 && errno != EISCONN) - return -1; - - flags = fcntl(fd, F_GETFL, 0); - if (flags < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "can't get flags for connfd %d\n", fd); - return -2; - } - if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { - RTE_LOG(ERR, VHOST_CONFIG, - "can't disable nonblocking on fd %d\n", fd); - return -2; - } - return 0; -} - -static void * -vhost_user_client_reconnect(void *arg __rte_unused) -{ - int ret; - struct vhost_user_reconnect *reconn, *next; - - while (1) { - pthread_mutex_lock(&reconn_list.mutex); - - /* - * An equal implementation of TAILQ_FOREACH_SAFE, - * which does not exist on all platforms. - */ - for (reconn = TAILQ_FIRST(&reconn_list.head); - reconn != NULL; reconn = next) { - next = TAILQ_NEXT(reconn, next); - - ret = vhost_user_connect_nonblock(reconn->fd, - (struct sockaddr *)&reconn->un, - sizeof(reconn->un)); - if (ret == -2) { - close(reconn->fd); - RTE_LOG(ERR, VHOST_CONFIG, - "reconnection for fd %d failed\n", - reconn->fd); - goto remove_fd; - } - if (ret == -1) - continue; - - RTE_LOG(INFO, VHOST_CONFIG, - "%s: connected\n", reconn->vsocket->path); - vhost_user_add_connection(reconn->fd, reconn->vsocket); -remove_fd: - TAILQ_REMOVE(&reconn_list.head, reconn, next); - free(reconn); - } - - pthread_mutex_unlock(&reconn_list.mutex); - sleep(1); - } - - return NULL; -} - -static int -vhost_user_reconnect_init(void) -{ - int ret; - - pthread_mutex_init(&reconn_list.mutex, NULL); - TAILQ_INIT(&reconn_list.head); - - ret = pthread_create(&reconn_tid, NULL, - vhost_user_client_reconnect, NULL); - if (ret < 0) - RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); - - return ret; -} - -static int -vhost_user_start_client(struct vhost_user_socket *vsocket) -{ - int ret; - int fd = vsocket->socket_fd; - const char *path = vsocket->path; - struct vhost_user_reconnect *reconn; - - ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un, - sizeof(vsocket->un)); - if (ret == 0) { - vhost_user_add_connection(fd, vsocket); - return 0; - } - - RTE_LOG(WARNING, VHOST_CONFIG, - "failed to connect to %s: %s\n", - path, strerror(errno)); - - if (ret == -2 || !vsocket->reconnect) { - close(fd); - return -1; - } - - RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path); - reconn = malloc(sizeof(*reconn)); - if (reconn == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to allocate memory for reconnect\n"); - close(fd); - return -1; - } - reconn->un = vsocket->un; - reconn->fd = fd; - reconn->vsocket = vsocket; - pthread_mutex_lock(&reconn_list.mutex); - TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); - pthread_mutex_unlock(&reconn_list.mutex); - - return 0; -} - -static struct vhost_user_socket * -find_vhost_user_socket(const char *path) -{ - int i; - - for (i = 0; i < vhost_user.vsocket_cnt; i++) { - struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; - - if (!strcmp(vsocket->path, path)) - return vsocket; - } - - return NULL; -} - -int -rte_vhost_driver_disable_features(const char *path, uint64_t features) -{ - struct vhost_user_socket *vsocket; - - pthread_mutex_lock(&vhost_user.mutex); - vsocket = find_vhost_user_socket(path); - if (vsocket) - vsocket->features &= ~features; - pthread_mutex_unlock(&vhost_user.mutex); - - return vsocket ? 0 : -1; -} - -int -rte_vhost_driver_enable_features(const char *path, uint64_t features) -{ - struct vhost_user_socket *vsocket; - - pthread_mutex_lock(&vhost_user.mutex); - vsocket = find_vhost_user_socket(path); - if (vsocket) { - if ((vsocket->supported_features & features) != features) { - /* - * trying to enable features the driver doesn't - * support. - */ - pthread_mutex_unlock(&vhost_user.mutex); - return -1; - } - vsocket->features |= features; - } - pthread_mutex_unlock(&vhost_user.mutex); - - return vsocket ? 0 : -1; -} - -int -rte_vhost_driver_set_features(const char *path, uint64_t features) -{ - struct vhost_user_socket *vsocket; - - pthread_mutex_lock(&vhost_user.mutex); - vsocket = find_vhost_user_socket(path); - if (vsocket) { - vsocket->supported_features = features; - vsocket->features = features; - } - pthread_mutex_unlock(&vhost_user.mutex); - - return vsocket ? 0 : -1; -} - -int -rte_vhost_driver_get_features(const char *path, uint64_t *features) -{ - struct vhost_user_socket *vsocket; - - pthread_mutex_lock(&vhost_user.mutex); - vsocket = find_vhost_user_socket(path); - if (vsocket) - *features = vsocket->features; - pthread_mutex_unlock(&vhost_user.mutex); - - if (!vsocket) { - RTE_LOG(ERR, VHOST_CONFIG, - "socket file %s is not registered yet.\n", path); - return -1; - } else { - return 0; - } -} - -/* - * Register a new vhost-user socket; here we could act as server - * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag - * is set. - */ -int -rte_vhost_driver_register(const char *path, uint64_t flags) -{ - int ret = -1; - struct vhost_user_socket *vsocket; - - if (!path) - return -1; - - pthread_mutex_lock(&vhost_user.mutex); - - if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { - RTE_LOG(ERR, VHOST_CONFIG, - "error: the number of vhost sockets reaches maximum\n"); - goto out; - } - - vsocket = malloc(sizeof(struct vhost_user_socket)); - if (!vsocket) - goto out; - memset(vsocket, 0, sizeof(struct vhost_user_socket)); - vsocket->path = strdup(path); - if (!vsocket->path) { - free(vsocket); - goto out; - } - TAILQ_INIT(&vsocket->conn_list); - vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; - - /* - * Set the supported features correctly for the builtin vhost-user - * net driver. - * - * Applications know nothing about features the builtin virtio net - * driver (virtio_net.c) supports, thus it's not possible for them - * to invoke rte_vhost_driver_set_features(). To workaround it, here - * we set it unconditionally. If the application want to implement - * another vhost-user driver (say SCSI), it should call the - * rte_vhost_driver_set_features(), which will overwrite following - * two values. - */ - vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES; - vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES; - - if ((flags & RTE_VHOST_USER_CLIENT) != 0) { - vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); - if (vsocket->reconnect && reconn_tid == 0) { - if (vhost_user_reconnect_init() < 0) { - free(vsocket->path); - free(vsocket); - goto out; - } - } - } else { - vsocket->is_server = true; - } - ret = create_unix_socket(vsocket); - if (ret < 0) { - free(vsocket->path); - free(vsocket); - goto out; - } - - pthread_mutex_init(&vsocket->conn_mutex, NULL); - vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; - -out: - pthread_mutex_unlock(&vhost_user.mutex); - - return ret; -} - -static bool -vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) -{ - int found = false; - struct vhost_user_reconnect *reconn, *next; - - pthread_mutex_lock(&reconn_list.mutex); - - for (reconn = TAILQ_FIRST(&reconn_list.head); - reconn != NULL; reconn = next) { - next = TAILQ_NEXT(reconn, next); - - if (reconn->vsocket == vsocket) { - TAILQ_REMOVE(&reconn_list.head, reconn, next); - close(reconn->fd); - free(reconn); - found = true; - break; - } - } - pthread_mutex_unlock(&reconn_list.mutex); - return found; -} - -/** - * Unregister the specified vhost socket - */ -int -rte_vhost_driver_unregister(const char *path) -{ - int i; - int count; - struct vhost_user_connection *conn; - - pthread_mutex_lock(&vhost_user.mutex); - - for (i = 0; i < vhost_user.vsocket_cnt; i++) { - struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; - - if (!strcmp(vsocket->path, path)) { - if (vsocket->is_server) { - fdset_del(&vhost_user.fdset, vsocket->socket_fd); - close(vsocket->socket_fd); - unlink(path); - } else if (vsocket->reconnect) { - vhost_user_remove_reconnect(vsocket); - } - - pthread_mutex_lock(&vsocket->conn_mutex); - TAILQ_FOREACH(conn, &vsocket->conn_list, next) { - close(conn->connfd); - conn->connfd = -1; - } - pthread_mutex_unlock(&vsocket->conn_mutex); - - do { - pthread_mutex_lock(&vsocket->conn_mutex); - conn = TAILQ_FIRST(&vsocket->conn_list); - pthread_mutex_unlock(&vsocket->conn_mutex); - } while (conn != NULL); - - free(vsocket->path); - free(vsocket); - - count = --vhost_user.vsocket_cnt; - vhost_user.vsockets[i] = vhost_user.vsockets[count]; - vhost_user.vsockets[count] = NULL; - pthread_mutex_unlock(&vhost_user.mutex); - - return 0; - } - } - pthread_mutex_unlock(&vhost_user.mutex); - - return -1; -} - -/* - * Register ops so that we can add/remove device to data core. - */ -int -rte_vhost_driver_callback_register(const char *path, - struct vhost_device_ops const * const ops) -{ - struct vhost_user_socket *vsocket; - - pthread_mutex_lock(&vhost_user.mutex); - vsocket = find_vhost_user_socket(path); - if (vsocket) - vsocket->notify_ops = ops; - pthread_mutex_unlock(&vhost_user.mutex); - - return vsocket ? 0 : -1; -} - -struct vhost_device_ops const * -vhost_driver_callback_get(const char *path) -{ - struct vhost_user_socket *vsocket; - - pthread_mutex_lock(&vhost_user.mutex); - vsocket = find_vhost_user_socket(path); - pthread_mutex_unlock(&vhost_user.mutex); - - return vsocket ? vsocket->notify_ops : NULL; -} - -int -rte_vhost_driver_start(const char *path) -{ - struct vhost_user_socket *vsocket; - static pthread_t fdset_tid; - - pthread_mutex_lock(&vhost_user.mutex); - vsocket = find_vhost_user_socket(path); - pthread_mutex_unlock(&vhost_user.mutex); - - if (!vsocket) - return -1; - - if (fdset_tid == 0) { - rte_cpuset_t orig_cpuset; - rte_cpuset_t tmp_cpuset; - long num_cores, i; - int ret; - - CPU_ZERO(&tmp_cpuset); - num_cores = sysconf(_SC_NPROCESSORS_CONF); - /* Create a mask containing all CPUs */ - for (i = 0; i < num_cores; i++) { - CPU_SET(i, &tmp_cpuset); - } - - rte_thread_get_affinity(&orig_cpuset); - rte_thread_set_affinity(&tmp_cpuset); - ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch, - &vhost_user.fdset); - rte_thread_set_affinity(&orig_cpuset); - if (ret < 0) - RTE_LOG(ERR, VHOST_CONFIG, - "failed to create fdset handling thread"); - } - - if (vsocket->is_server) - return vhost_user_start_server(vsocket); - else - return vhost_user_start_client(vsocket); -} diff --git a/lib/rte_vhost/vhost.c b/lib/rte_vhost/vhost.c deleted file mode 100644 index 8e875c585f..0000000000 --- a/lib/rte_vhost/vhost.c +++ /dev/null @@ -1,565 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#ifdef RTE_LIBRTE_VHOST_NUMA -#include -#endif - -#include -#include -#include -#include -#include -#include - -#include "vhost.h" - -struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; - -struct virtio_net * -get_device(int vid) -{ - struct virtio_net *dev = vhost_devices[vid]; - - if (unlikely(!dev)) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) device not found.\n", vid); - } - - return dev; -} - -static void -cleanup_vq(struct vhost_virtqueue *vq, int destroy) -{ - if ((vq->callfd >= 0) && (destroy != 0)) - close(vq->callfd); - if (vq->kickfd >= 0) - close(vq->kickfd); -} - -/* - * Unmap any memory, close any file descriptors and - * free any memory owned by a device. - */ -void -cleanup_device(struct virtio_net *dev, int destroy) -{ - uint32_t i; - - vhost_backend_cleanup(dev); - - for (i = 0; i < dev->nr_vring; i++) - cleanup_vq(dev->virtqueue[i], destroy); -} - -/* - * Release virtqueues and device memory. - */ -static void -free_device(struct virtio_net *dev) -{ - uint32_t i; - struct vhost_virtqueue *vq; - - for (i = 0; i < dev->nr_vring; i++) { - vq = dev->virtqueue[i]; - - rte_free(vq->shadow_used_ring); - - rte_free(vq); - } - - rte_free(dev); -} - -static void -init_vring_queue(struct vhost_virtqueue *vq) -{ - memset(vq, 0, sizeof(struct vhost_virtqueue)); - - vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; - vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; - - /* Backends are set to -1 indicating an inactive device. */ - vq->backend = -1; - - /* - * always set the vq to enabled; this is to keep compatibility - * with the old QEMU, whereas there is no SET_VRING_ENABLE message. - */ - vq->enabled = 1; - - TAILQ_INIT(&vq->zmbuf_list); -} - -static void -reset_vring_queue(struct vhost_virtqueue *vq) -{ - int callfd; - - callfd = vq->callfd; - init_vring_queue(vq); - vq->callfd = callfd; -} - -int -alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) -{ - struct vhost_virtqueue *vq; - - vq = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0); - if (vq == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to allocate memory for vring:%u.\n", vring_idx); - return -1; - } - - dev->virtqueue[vring_idx] = vq; - init_vring_queue(vq); - - dev->nr_vring += 1; - - return 0; -} - -/* - * Reset some variables in device structure, while keeping few - * others untouched, such as vid, ifname, nr_vring: they - * should be same unless the device is removed. - */ -void -reset_device(struct virtio_net *dev) -{ - uint32_t i; - - dev->negotiated_features = 0; - dev->protocol_features = 0; - dev->flags = 0; - - for (i = 0; i < dev->nr_vring; i++) - reset_vring_queue(dev->virtqueue[i]); -} - -/* - * Invoked when there is a new vhost-user connection established (when - * there is a new virtio device being attached). - */ -int -vhost_new_device(uint64_t features, struct vhost_device_ops const *ops) -{ - struct virtio_net *dev; - int i; - - dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0); - if (dev == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to allocate memory for new dev.\n"); - return -1; - } - - for (i = 0; i < MAX_VHOST_DEVICE; i++) { - if (vhost_devices[i] == NULL) - break; - } - if (i == MAX_VHOST_DEVICE) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to find a free slot for new device.\n"); - rte_free(dev); - return -1; - } - - vhost_devices[i] = dev; - dev->vid = i; - dev->features = features; - dev->notify_ops = ops; - - return i; -} - -/* - * Invoked when there is the vhost-user connection is broken (when - * the virtio device is being detached). - */ -void -vhost_destroy_device(int vid) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return; - - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(vid); - } - - cleanup_device(dev, 1); - free_device(dev); - - vhost_devices[vid] = NULL; -} - -void -vhost_set_ifname(int vid, const char *if_name, unsigned int if_len) -{ - struct virtio_net *dev; - unsigned int len; - - dev = get_device(vid); - if (dev == NULL) - return; - - len = if_len > sizeof(dev->ifname) ? - sizeof(dev->ifname) : if_len; - - strncpy(dev->ifname, if_name, len); - dev->ifname[sizeof(dev->ifname) - 1] = '\0'; -} - -void -vhost_enable_dequeue_zero_copy(int vid) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return; - - dev->dequeue_zero_copy = 1; -} - -int -rte_vhost_get_mtu(int vid, uint16_t *mtu) -{ - struct virtio_net *dev = get_device(vid); - - if (!dev) - return -ENODEV; - - if (!(dev->flags & VIRTIO_DEV_READY)) - return -EAGAIN; - - if (!(dev->negotiated_features & VIRTIO_NET_F_MTU)) - return -ENOTSUP; - - *mtu = dev->mtu; - - return 0; -} - -int -rte_vhost_get_numa_node(int vid) -{ -#ifdef RTE_LIBRTE_VHOST_NUMA - struct virtio_net *dev = get_device(vid); - int numa_node; - int ret; - - if (dev == NULL) - return -1; - - ret = get_mempolicy(&numa_node, NULL, 0, dev, - MPOL_F_NODE | MPOL_F_ADDR); - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to query numa node: %d\n", vid, ret); - return -1; - } - - return numa_node; -#else - RTE_SET_USED(vid); - return -1; -#endif -} - -int -rte_vhost_get_ifname(int vid, char *buf, size_t len) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return -1; - - len = RTE_MIN(len, sizeof(dev->ifname)); - - strncpy(buf, dev->ifname, len); - buf[len - 1] = '\0'; - - return 0; -} - -int -rte_vhost_get_negotiated_features(int vid, uint64_t *features) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (!dev) - return -1; - - *features = dev->negotiated_features; - return 0; -} - -int -rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem) -{ - struct virtio_net *dev; - struct rte_vhost_memory *m; - size_t size; - - dev = get_device(vid); - if (!dev) - return -1; - - size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region); - m = malloc(sizeof(struct rte_vhost_memory) + size); - if (!m) - return -1; - - m->nregions = dev->mem->nregions; - memcpy(m->regions, dev->mem->regions, size); - *mem = m; - - return 0; -} - -int -rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx, - struct rte_vhost_vring *vring) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - - dev = get_device(vid); - if (!dev) - return -1; - - if (vring_idx >= VHOST_MAX_VRING) - return -1; - - vq = dev->virtqueue[vring_idx]; - if (!vq) - return -1; - - vring->desc = vq->desc; - vring->avail = vq->avail; - vring->used = vq->used; - vring->log_guest_addr = vq->log_guest_addr; - - vring->callfd = vq->callfd; - vring->kickfd = vq->kickfd; - vring->size = vq->size; - - return 0; -} - -uint16_t -rte_vhost_avail_entries(int vid, uint16_t queue_id) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - - dev = get_device(vid); - if (!dev) - return 0; - - vq = dev->virtqueue[queue_id]; - if (!vq->enabled) - return 0; - - return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx; -} - -int -rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return -1; - - if (enable) { - RTE_LOG(ERR, VHOST_CONFIG, - "guest notification isn't supported.\n"); - return -1; - } - - dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY; - return 0; -} - -void -rte_vhost_log_write(int vid, uint64_t addr, uint64_t len) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return; - - vhost_log_write(dev, addr, len); -} - -void -rte_vhost_log_used_vring(int vid, uint16_t vring_idx, - uint64_t offset, uint64_t len) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - - dev = get_device(vid); - if (dev == NULL) - return; - - if (vring_idx >= VHOST_MAX_VRING) - return; - vq = dev->virtqueue[vring_idx]; - if (!vq) - return; - - vhost_log_used_vring(dev, vq, offset, len); -} - -int -rte_vhost_set_vring_base(int vid, uint16_t vring_idx, - uint16_t last_avail_idx, uint16_t last_used_idx) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - - dev = get_device(vid); - if (!dev) - return -1; - - if (vring_idx >= VHOST_MAX_VRING) - return -1; - - vq = dev->virtqueue[vring_idx]; - if (!vq) - return -1; - - vq->last_avail_idx = last_avail_idx; - vq->last_used_idx = last_used_idx; - - return 0; -} - -int -rte_vhost_get_vring_base(int vid, uint16_t vring_idx, - uint16_t *last_avail_idx, uint16_t *last_used_idx) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - - dev = get_device(vid); - if (!dev) - return -1; - - if (vring_idx >= VHOST_MAX_VRING) - return -1; - - vq = dev->virtqueue[vring_idx]; - if (!vq) - return -1; - - *last_avail_idx = vq->last_avail_idx; - *last_used_idx = vq->last_used_idx; - - return 0; -} - -int -rte_vhost_vring_call(int vid, uint16_t vring_idx) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - - dev = get_device(vid); - if(!dev) - return -1; - - if (vring_idx >= VHOST_MAX_VRING) - return -1; - - vq = dev->virtqueue[vring_idx]; - if (!vq) - return -1; - - /* Ensure all our used ring changes are visible to the guest at the time - * of interrupt. - * TODO: this is currently an sfence on x86. For other architectures we - * will most likely need an smp_mb(), but smp_mb() is an overkill for x86. - */ - rte_wmb(); - - if (vq->callfd != -1) { - eventfd_write(vq->callfd, (eventfd_t)1); - return 0; - } - - return -1; -} - -int -rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, - uint16_t idx) -{ - return 0; -} - -int -rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx, - uint16_t last_used_idx, uint16_t idx) -{ - return 0; -} - -int -rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, - uint16_t idx) -{ - return 0; -} - -int -rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx, - struct rte_vhost_ring_inflight *vring) -{ - return 0; -} diff --git a/lib/rte_vhost/vhost.h b/lib/rte_vhost/vhost.h deleted file mode 100644 index d738dba7f0..0000000000 --- a/lib/rte_vhost/vhost.h +++ /dev/null @@ -1,330 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _VHOST_NET_CDEV_H_ -#define _VHOST_NET_CDEV_H_ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "rte_vhost.h" -#include "vhost_user.h" - -/* Used to indicate that the device is running on a data core */ -#define VIRTIO_DEV_RUNNING 1 -/* Used to indicate that the device is ready to operate */ -#define VIRTIO_DEV_READY 2 - -/* Backend value set by guest. */ -#define VIRTIO_DEV_STOPPED -1 - -#define BUF_VECTOR_MAX 256 - -/** - * Structure contains buffer address, length and descriptor index - * from vring to do scatter RX. - */ -struct buf_vector { - uint64_t buf_addr; - uint32_t buf_len; - uint32_t desc_idx; -}; - -/* - * A structure to hold some fields needed in zero copy code path, - * mainly for associating an mbuf with the right desc_idx. - */ -struct zcopy_mbuf { - struct rte_mbuf *mbuf; - uint32_t desc_idx; - uint16_t in_use; - - TAILQ_ENTRY(zcopy_mbuf) next; -}; -TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf); - -/** - * Structure contains variables relevant to RX/TX virtqueues. - */ -struct vhost_virtqueue { - struct vring_desc *desc; - struct vring_avail *avail; - struct vring_used *used; - uint32_t size; - - uint16_t last_avail_idx; - uint16_t last_used_idx; -#define VIRTIO_INVALID_EVENTFD (-1) -#define VIRTIO_UNINITIALIZED_EVENTFD (-2) - - /* Backend value to determine if device should started/stopped */ - int backend; - /* Used to notify the guest (trigger interrupt) */ - int callfd; - /* Currently unused as polling mode is enabled */ - int kickfd; - int enabled; - - /* Physical address of used ring, for logging */ - uint64_t log_guest_addr; - - uint16_t nr_zmbuf; - uint16_t zmbuf_size; - uint16_t last_zmbuf_idx; - struct zcopy_mbuf *zmbufs; - struct zcopy_mbuf_list zmbuf_list; - - struct vring_used_elem *shadow_used_ring; - uint16_t shadow_used_idx; -} __rte_cache_aligned; - -/* Old kernels have no such macros defined */ -#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE - #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 -#endif - -#ifndef VIRTIO_NET_F_MQ - #define VIRTIO_NET_F_MQ 22 -#endif - -#define VHOST_MAX_VRING 0x100 -#define VHOST_MAX_QUEUE_PAIRS 0x80 - -#ifndef VIRTIO_NET_F_MTU - #define VIRTIO_NET_F_MTU 3 -#endif - -/* - * Define virtio 1.0 for older kernels - */ -#ifndef VIRTIO_F_VERSION_1 - #define VIRTIO_F_VERSION_1 32 -#endif - -#define VHOST_USER_F_PROTOCOL_FEATURES 30 - -/* Features supported by this builtin vhost-user net driver. */ -#define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ - (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ - (1ULL << VIRTIO_NET_F_CTRL_RX) | \ - (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \ - (1ULL << VIRTIO_NET_F_MQ) | \ - (1ULL << VIRTIO_F_VERSION_1) | \ - (1ULL << VHOST_F_LOG_ALL) | \ - (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ - (1ULL << VIRTIO_NET_F_HOST_TSO4) | \ - (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ - (1ULL << VIRTIO_NET_F_CSUM) | \ - (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ - (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \ - (1ULL << VIRTIO_NET_F_MTU)) - - -struct guest_page { - uint64_t guest_phys_addr; - uint64_t host_phys_addr; - uint64_t size; -}; - -/* struct ether_addr was renamed to struct rte_ether_addr at one point */ -#ifdef RTE_ETHER_ADDR_LEN -struct ether_addr { - uint8_t addr_bytes[RTE_ETHER_ADDR_LEN]; -} __attribute__((__packed__)); -#endif - -/** - * Device structure contains all configuration information relating - * to the device. - */ -struct virtio_net { - /* Frontend (QEMU) memory and memory region information */ - struct rte_vhost_memory *mem; - uint64_t features; - uint64_t negotiated_features; - uint64_t protocol_features; - int vid; - uint32_t is_nvme; - uint32_t flags; - uint16_t vhost_hlen; - /* to tell if we need broadcast rarp packet */ - rte_atomic16_t broadcast_rarp; - uint32_t nr_vring; - int dequeue_zero_copy; - struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; -#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) - char ifname[IF_NAME_SZ]; - uint64_t log_size; - uint64_t log_base; - uint64_t log_addr; - struct ether_addr mac; - uint16_t mtu; - - struct vhost_device_ops const *notify_ops; - - uint32_t nr_guest_pages; - uint32_t max_guest_pages; - struct guest_page *guest_pages; - int has_new_mem_table; - void *bar_addr; - uint64_t bar_size; - struct VhostUserMemory mem_table; - int mem_table_fds[VHOST_MEMORY_MAX_NREGIONS]; -} __rte_cache_aligned; - - -#define VHOST_LOG_PAGE 4096 - -static inline void __attribute__((always_inline)) -vhost_log_page(uint8_t *log_base, uint64_t page) -{ - log_base[page / 8] |= 1 << (page % 8); -} - -static inline void __attribute__((always_inline)) -vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) -{ - uint64_t page; - - if (likely(((dev->negotiated_features & (1ULL << VHOST_F_LOG_ALL)) == 0) || - !dev->log_base || !len)) - return; - - if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) - return; - - /* To make sure guest memory updates are committed before logging */ - rte_smp_wmb(); - - page = addr / VHOST_LOG_PAGE; - while (page * VHOST_LOG_PAGE < addr + len) { - vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); - page += 1; - } -} - -static inline void __attribute__((always_inline)) -vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint64_t offset, uint64_t len) -{ - vhost_log_write(dev, vq->log_guest_addr + offset, len); -} - -/* Macros for printing using RTE_LOG */ -#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 -#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1 - -#ifdef RTE_LIBRTE_VHOST_DEBUG -#define VHOST_MAX_PRINT_BUFF 6072 -#define VHOST_LOG_LEVEL RTE_LOG_DEBUG -#define VHOST_LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) -#define PRINT_PACKET(device, addr, size, header) do { \ - char *pkt_addr = (char *)(addr); \ - unsigned int index; \ - char packet[VHOST_MAX_PRINT_BUFF]; \ - \ - if ((header)) \ - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \ - else \ - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \ - for (index = 0; index < (size); index++) { \ - snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \ - "%02hhx ", pkt_addr[index]); \ - } \ - snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \ - \ - VHOST_LOG_DEBUG(VHOST_DATA, "%s", packet); \ -} while (0) -#else -#define VHOST_LOG_LEVEL RTE_LOG_INFO -#define VHOST_LOG_DEBUG(log_type, fmt, args...) do {} while (0) -#define PRINT_PACKET(device, addr, size, header) do {} while (0) -#endif - -extern uint64_t VHOST_FEATURES; -#define MAX_VHOST_DEVICE 1024 -extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; - -/* Convert guest physical address to host physical address */ -static inline phys_addr_t __attribute__((always_inline)) -gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) -{ - uint32_t i; - struct guest_page *page; - - for (i = 0; i < dev->nr_guest_pages; i++) { - page = &dev->guest_pages[i]; - - if (gpa >= page->guest_phys_addr && - gpa + size < page->guest_phys_addr + page->size) { - return gpa - page->guest_phys_addr + - page->host_phys_addr; - } - } - - return 0; -} - -struct virtio_net *get_device(int vid); - -int vhost_new_device(uint64_t features, struct vhost_device_ops const *ops); -void cleanup_device(struct virtio_net *dev, int destroy); -void reset_device(struct virtio_net *dev); -void vhost_destroy_device(int); - -int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); - -void vhost_set_ifname(int, const char *if_name, unsigned int if_len); -void vhost_enable_dequeue_zero_copy(int vid); - -struct vhost_device_ops const *vhost_driver_callback_get(const char *path); - -/* - * Backend-specific cleanup. - * - * TODO: fix it; we have one backend now - */ -void vhost_backend_cleanup(struct virtio_net *dev); - -#endif /* _VHOST_NET_CDEV_H_ */ diff --git a/lib/rte_vhost/vhost_user.c b/lib/rte_vhost/vhost_user.c deleted file mode 100644 index a07483fcfa..0000000000 --- a/lib/rte_vhost/vhost_user.c +++ /dev/null @@ -1,1426 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef RTE_LIBRTE_VHOST_NUMA -#include -#endif - -#include -#include -#include - -#include "vhost.h" -#include "vhost_user.h" - -#define VIRTIO_MIN_MTU 68 -#define VIRTIO_MAX_MTU 65535 - -static const char *vhost_message_str[VHOST_USER_MAX] = { - [VHOST_USER_NONE] = "VHOST_USER_NONE", - [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", - [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", - [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", - [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", - [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", - [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", - [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", - [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", - [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", - [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", - [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", - [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", - [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", - [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", - [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", - [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", - [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", - [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", - [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", - [VHOST_USER_NET_SET_MTU] = "VHOST_USER_NET_SET_MTU", - [VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG", - [VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG", - [VHOST_USER_NVME_ADMIN] = "VHOST_USER_NVME_ADMIN", - [VHOST_USER_NVME_SET_CQ_CALL] = "VHOST_USER_NVME_SET_CQ_CALL", - [VHOST_USER_NVME_GET_CAP] = "VHOST_USER_NVME_GET_CAP", - [VHOST_USER_NVME_START_STOP] = "VHOST_USER_NVME_START_STOP", - [VHOST_USER_NVME_SET_BAR_MR] = "VHOST_USER_NVME_SET_BAR_MR" -}; - -static uint64_t -get_blk_size(int fd) -{ - struct stat stat; - int ret; - - ret = fstat(fd, &stat); - return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; -} - -static void -free_mem_region(struct virtio_net *dev) -{ - uint32_t i; - struct rte_vhost_mem_region *reg; - - if (!dev || !dev->mem) - return; - - for (i = 0; i < dev->mem->nregions; i++) { - reg = &dev->mem->regions[i]; - if (reg->host_user_addr) { - munmap(reg->mmap_addr, reg->mmap_size); - close(reg->fd); - } - } -} - -void -vhost_backend_cleanup(struct virtio_net *dev) -{ - uint32_t i; - - if (dev->has_new_mem_table) { - for (i = 0; i < dev->mem_table.nregions; i++) { - close(dev->mem_table_fds[i]); - } - dev->has_new_mem_table = 0; - } - if (dev->mem) { - free_mem_region(dev); - rte_free(dev->mem); - dev->mem = NULL; - } - - free(dev->guest_pages); - dev->guest_pages = NULL; - - if (dev->log_addr) { - munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); - dev->log_addr = 0; - } - if (dev->bar_addr) { - munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size); - dev->bar_addr = NULL; - dev->bar_size = 0; - } -} - -/* - * This function just returns success at the moment unless - * the device hasn't been initialised. - */ -static int -vhost_user_set_owner(void) -{ - return 0; -} - -static int -vhost_user_reset_owner(struct virtio_net *dev) -{ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - - cleanup_device(dev, 0); - reset_device(dev); - return 0; -} - -/* - * The features that we support are requested. - */ -static uint64_t -vhost_user_get_features(struct virtio_net *dev) -{ - return dev->features; -} - -/* - * We receive the negotiated features supported by us and the virtio device. - */ -static int -vhost_user_set_features(struct virtio_net *dev, uint64_t features) -{ - uint64_t vhost_features = 0; - - vhost_features = vhost_user_get_features(dev); - if (features & ~vhost_features) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) received invalid negotiated features.\n", - dev->vid); - return -1; - } - - if ((dev->flags & VIRTIO_DEV_RUNNING) && dev->negotiated_features != features) { - if (dev->notify_ops->features_changed) { - dev->notify_ops->features_changed(dev->vid, features); - } else { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - } - - dev->negotiated_features = features; - if (dev->negotiated_features & - ((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) { - dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); - } else { - dev->vhost_hlen = sizeof(struct virtio_net_hdr); - } - VHOST_LOG_DEBUG(VHOST_CONFIG, - "(%d) mergeable RX buffers %s, virtio 1 %s\n", - dev->vid, - (dev->negotiated_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", - (dev->negotiated_features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off"); - - return 0; -} - -/* - * The virtio device sends us the size of the descriptor ring. - */ -static int -vhost_user_set_vring_num(struct virtio_net *dev, - VhostUserMsg *msg) -{ - struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; - - vq->size = msg->payload.state.num; - - if (dev->dequeue_zero_copy) { - vq->nr_zmbuf = 0; - vq->last_zmbuf_idx = 0; - vq->zmbuf_size = vq->size; - vq->zmbufs = rte_zmalloc(NULL, vq->zmbuf_size * - sizeof(struct zcopy_mbuf), 0); - if (vq->zmbufs == NULL) { - RTE_LOG(WARNING, VHOST_CONFIG, - "failed to allocate mem for zero copy; " - "zero copy is force disabled\n"); - dev->dequeue_zero_copy = 0; - } - } - - vq->shadow_used_ring = rte_malloc(NULL, - vq->size * sizeof(struct vring_used_elem), - RTE_CACHE_LINE_SIZE); - if (!vq->shadow_used_ring) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to allocate memory for shadow used ring.\n"); - return -1; - } - - return 0; -} - -/* - * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the - * same numa node as the memory of vring descriptor. - */ -#ifdef RTE_LIBRTE_VHOST_NUMA -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index) -{ - int oldnode, newnode; - struct virtio_net *old_dev; - struct vhost_virtqueue *old_vq, *vq; - int ret; - - old_dev = dev; - vq = old_vq = dev->virtqueue[index]; - - ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc, - MPOL_F_NODE | MPOL_F_ADDR); - - /* check if we need to reallocate vq */ - ret |= get_mempolicy(&oldnode, NULL, 0, old_vq, - MPOL_F_NODE | MPOL_F_ADDR); - if (ret) { - RTE_LOG(ERR, VHOST_CONFIG, - "Unable to get vq numa information.\n"); - return dev; - } - if (oldnode != newnode) { - RTE_LOG(INFO, VHOST_CONFIG, - "reallocate vq from %d to %d node\n", oldnode, newnode); - vq = rte_malloc_socket(NULL, sizeof(*vq), 0, newnode); - if (!vq) - return dev; - - memcpy(vq, old_vq, sizeof(*vq)); - rte_free(old_vq); - } - - /* check if we need to reallocate dev */ - ret = get_mempolicy(&oldnode, NULL, 0, old_dev, - MPOL_F_NODE | MPOL_F_ADDR); - if (ret) { - RTE_LOG(ERR, VHOST_CONFIG, - "Unable to get dev numa information.\n"); - goto out; - } - if (oldnode != newnode) { - RTE_LOG(INFO, VHOST_CONFIG, - "reallocate dev from %d to %d node\n", - oldnode, newnode); - dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode); - if (!dev) { - dev = old_dev; - goto out; - } - - memcpy(dev, old_dev, sizeof(*dev)); - rte_free(old_dev); - } - -out: - dev->virtqueue[index] = vq; - vhost_devices[dev->vid] = dev; - - return dev; -} -#else -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index __rte_unused) -{ - return dev; -} -#endif - -/* - * Converts QEMU virtual address to Vhost virtual address. This function is - * used to convert the ring addresses to our address space. - */ -static uint64_t -qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len) -{ - struct rte_vhost_mem_region *reg; - uint32_t i; - - /* Find the region where the address lives. */ - for (i = 0; i < dev->mem->nregions; i++) { - reg = &dev->mem->regions[i]; - - if (qva >= reg->guest_user_addr && - qva < reg->guest_user_addr + reg->size) { - - if (unlikely(*len > reg->guest_user_addr + reg->size - qva)) - *len = reg->guest_user_addr + reg->size - qva; - - return qva - reg->guest_user_addr + - reg->host_user_addr; - } - } - - return 0; -} - -static int vhost_setup_mem_table(struct virtio_net *dev); - -/* - * The virtio device sends us the desc, used and avail ring addresses. - * This function then converts these to our address space. - */ -static int -vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg) -{ - struct vhost_virtqueue *vq; - uint64_t len; - - /* Remove from the data plane. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - - if (dev->has_new_mem_table) { - vhost_setup_mem_table(dev); - dev->has_new_mem_table = 0; - } - - if (dev->mem == NULL) - return -1; - - /* addr->index refers to the queue index. The txq 1, rxq is 0. */ - vq = dev->virtqueue[msg->payload.addr.index]; - - /* The addresses are converted from QEMU virtual to Vhost virtual. */ - len = sizeof(struct vring_desc) * vq->size; - vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev, - msg->payload.addr.desc_user_addr, &len); - if (vq->desc == 0 || len != sizeof(struct vring_desc) * vq->size) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to map desc ring.\n", - dev->vid); - return -1; - } - - dev = numa_realloc(dev, msg->payload.addr.index); - vq = dev->virtqueue[msg->payload.addr.index]; - - len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size; - vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev, - msg->payload.addr.avail_user_addr, &len); - if (vq->avail == 0 || - len != sizeof(struct vring_avail) - + sizeof(uint16_t) * vq->size) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to find avail ring address.\n", - dev->vid); - return -1; - } - - len = sizeof(struct vring_used) + - sizeof(struct vring_used_elem) * vq->size; - vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev, - msg->payload.addr.used_user_addr, &len); - if (vq->used == 0 || len != sizeof(struct vring_used) + - sizeof(struct vring_used_elem) * vq->size) { - - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to find used ring address.\n", - dev->vid); - return -1; - } - - if (vq->last_used_idx != vq->used->idx) { - RTE_LOG(WARNING, VHOST_CONFIG, - "last_used_idx (%u) and vq->used->idx (%u) mismatches; " - "some packets maybe resent for Tx and dropped for Rx\n", - vq->last_used_idx, vq->used->idx); - vq->last_used_idx = vq->used->idx; - vq->last_avail_idx = vq->used->idx; - } - - vq->log_guest_addr = msg->payload.addr.log_guest_addr; - - VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", - dev->vid, vq->desc); - VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", - dev->vid, vq->avail); - VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", - dev->vid, vq->used); - VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", - dev->vid, vq->log_guest_addr); - - return 0; -} - -/* - * The virtio device sends us the available ring last used index. - */ -static int -vhost_user_set_vring_base(struct virtio_net *dev, - VhostUserMsg *msg) -{ - /* Remove from the data plane. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - - dev->virtqueue[msg->payload.state.index]->last_used_idx = msg->payload.state.num; - dev->virtqueue[msg->payload.state.index]->last_avail_idx = msg->payload.state.num; - - return 0; -} - -static void -add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, - uint64_t host_phys_addr, uint64_t size) -{ - struct guest_page *page, *last_page; - - if (dev->nr_guest_pages == dev->max_guest_pages) { - dev->max_guest_pages = RTE_MAX(8U, dev->max_guest_pages * 2); - dev->guest_pages = realloc(dev->guest_pages, - dev->max_guest_pages * sizeof(*page)); - } - - if (dev->nr_guest_pages > 0) { - last_page = &dev->guest_pages[dev->nr_guest_pages - 1]; - /* merge if the two pages are continuous */ - if (host_phys_addr == last_page->host_phys_addr + - last_page->size) { - last_page->size += size; - return; - } - } - - page = &dev->guest_pages[dev->nr_guest_pages++]; - page->guest_phys_addr = guest_phys_addr; - page->host_phys_addr = host_phys_addr; - page->size = size; -} - -static void -add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, - uint64_t page_size) -{ - uint64_t reg_size = reg->size; - uint64_t host_user_addr = reg->host_user_addr; - uint64_t guest_phys_addr = reg->guest_phys_addr; - uint64_t host_phys_addr; - uint64_t size; - - host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr); - size = page_size - (guest_phys_addr & (page_size - 1)); - size = RTE_MIN(size, reg_size); - - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); - host_user_addr += size; - guest_phys_addr += size; - reg_size -= size; - - while (reg_size > 0) { - size = RTE_MIN(reg_size, page_size); - host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t) - host_user_addr); - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); - - host_user_addr += size; - guest_phys_addr += size; - reg_size -= size; - } -} - -#ifdef RTE_LIBRTE_VHOST_DEBUG -/* TODO: enable it only in debug mode? */ -static void -dump_guest_pages(struct virtio_net *dev) -{ - uint32_t i; - struct guest_page *page; - - for (i = 0; i < dev->nr_guest_pages; i++) { - page = &dev->guest_pages[i]; - - RTE_LOG(INFO, VHOST_CONFIG, - "guest physical page region %u\n" - "\t guest_phys_addr: %" PRIx64 "\n" - "\t host_phys_addr : %" PRIx64 "\n" - "\t size : %" PRIx64 "\n", - i, - page->guest_phys_addr, - page->host_phys_addr, - page->size); - } -} -#else -#define dump_guest_pages(dev) -#endif - -static int -vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) -{ - uint32_t i; - - if (dev->has_new_mem_table) { - /* - * The previous mem table was not consumed, so close the - * file descriptors from that mem table before copying - * the new one. - */ - for (i = 0; i < dev->mem_table.nregions; i++) { - close(dev->mem_table_fds[i]); - } - } - - memcpy(&dev->mem_table, &pmsg->payload.memory, sizeof(dev->mem_table)); - memcpy(dev->mem_table_fds, pmsg->fds, sizeof(dev->mem_table_fds)); - dev->has_new_mem_table = 1; - /* vhost-user-nvme will not send - * set vring addr message, enable - * memory address table now. - */ - if (dev->has_new_mem_table && dev->is_nvme) { - vhost_setup_mem_table(dev); - dev->has_new_mem_table = 0; - } - - return 0; -} - - static int -vhost_setup_mem_table(struct virtio_net *dev) -{ - struct VhostUserMemory memory = dev->mem_table; - struct rte_vhost_mem_region *reg; - struct vhost_virtqueue *vq; - void *mmap_addr; - uint64_t mmap_size; - uint64_t mmap_offset; - uint64_t alignment; - uint32_t i; - int fd; - - if (dev->mem) { - free_mem_region(dev); - rte_free(dev->mem); - dev->mem = NULL; - } - - for (i = 0; i < dev->nr_vring; i++) { - vq = dev->virtqueue[i]; - /* Those addresses won't be valid anymore in host address space - * after setting new mem table. Initiator need to resend these - * addresses. - */ - vq->desc = NULL; - vq->avail = NULL; - vq->used = NULL; - } - - dev->nr_guest_pages = 0; - if (!dev->guest_pages) { - dev->max_guest_pages = 8; - dev->guest_pages = malloc(dev->max_guest_pages * - sizeof(struct guest_page)); - } - - dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) + - sizeof(struct rte_vhost_mem_region) * memory.nregions, 0); - if (dev->mem == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to allocate memory for dev->mem\n", - dev->vid); - return -1; - } - dev->mem->nregions = memory.nregions; - - for (i = 0; i < memory.nregions; i++) { - fd = dev->mem_table_fds[i]; - reg = &dev->mem->regions[i]; - - reg->guest_phys_addr = memory.regions[i].guest_phys_addr; - reg->guest_user_addr = memory.regions[i].userspace_addr; - reg->size = memory.regions[i].memory_size; - reg->fd = fd; - - mmap_offset = memory.regions[i].mmap_offset; - mmap_size = reg->size + mmap_offset; - - /* mmap() without flag of MAP_ANONYMOUS, should be called - * with length argument aligned with hugepagesz at older - * longterm version Linux, like 2.6.32 and 3.2.72, or - * mmap() will fail with EINVAL. - * - * to avoid failure, make sure in caller to keep length - * aligned. - */ - alignment = get_blk_size(fd); - if (alignment == (uint64_t)-1) { - RTE_LOG(ERR, VHOST_CONFIG, - "couldn't get hugepage size through fstat\n"); - goto err_mmap; - } - mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); - - mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, 0); - - if (mmap_addr == MAP_FAILED) { - RTE_LOG(ERR, VHOST_CONFIG, - "mmap region %u failed.\n", i); - goto err_mmap; - } - - if (madvise(mmap_addr, mmap_size, MADV_DONTDUMP) != 0) { - RTE_LOG(INFO, VHOST_CONFIG, - "MADV_DONTDUMP advice setting failed.\n"); - } - - reg->mmap_addr = mmap_addr; - reg->mmap_size = mmap_size; - reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + - mmap_offset; - - if (dev->dequeue_zero_copy) - add_guest_pages(dev, reg, alignment); - - RTE_LOG(INFO, VHOST_CONFIG, - "guest memory region %u, size: 0x%" PRIx64 "\n" - "\t guest physical addr: 0x%" PRIx64 "\n" - "\t guest virtual addr: 0x%" PRIx64 "\n" - "\t host virtual addr: 0x%" PRIx64 "\n" - "\t mmap addr : 0x%" PRIx64 "\n" - "\t mmap size : 0x%" PRIx64 "\n" - "\t mmap align: 0x%" PRIx64 "\n" - "\t mmap off : 0x%" PRIx64 "\n", - i, reg->size, - reg->guest_phys_addr, - reg->guest_user_addr, - reg->host_user_addr, - (uint64_t)(uintptr_t)mmap_addr, - mmap_size, - alignment, - mmap_offset); - } - - dump_guest_pages(dev); - - return 0; - -err_mmap: - free_mem_region(dev); - rte_free(dev->mem); - dev->mem = NULL; - return -1; -} - -static int -vq_is_ready(struct vhost_virtqueue *vq) -{ - return vq && vq->desc && - vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD && - vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD && - vq->kickfd != VIRTIO_INVALID_EVENTFD && - vq->callfd != VIRTIO_INVALID_EVENTFD; -} - -static int -virtio_is_ready(struct virtio_net *dev) -{ - struct vhost_virtqueue *vq; - uint32_t i; - - if (dev->nr_vring == 0) - return 0; - - for (i = 0; i < dev->nr_vring; i++) { - vq = dev->virtqueue[i]; - - if (vq_is_ready(vq)) { - RTE_LOG(INFO, VHOST_CONFIG, - "virtio is now ready for processing.\n"); - return 1; - } - } - - return 0; -} - -static void -vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg) -{ - struct vhost_vring_file file; - struct vhost_virtqueue *vq; - - /* Remove from the data plane. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - - file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) - file.fd = VIRTIO_INVALID_EVENTFD; - else - file.fd = pmsg->fds[0]; - RTE_LOG(INFO, VHOST_CONFIG, - "vring call idx:%d file:%d\n", file.index, file.fd); - - vq = dev->virtqueue[file.index]; - if (vq->callfd >= 0) - close(vq->callfd); - - vq->callfd = file.fd; -} - -static void -vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg) -{ - struct vhost_vring_file file; - struct vhost_virtqueue *vq; - - /* Remove from the data plane. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - - file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) - file.fd = VIRTIO_INVALID_EVENTFD; - else - file.fd = pmsg->fds[0]; - RTE_LOG(INFO, VHOST_CONFIG, - "vring kick idx:%d file:%d\n", file.index, file.fd); - - vq = dev->virtqueue[file.index]; - if (vq->kickfd >= 0) - close(vq->kickfd); - vq->kickfd = file.fd; -} - -static void -free_zmbufs(struct vhost_virtqueue *vq) -{ - struct zcopy_mbuf *zmbuf, *next; - - for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list); - zmbuf != NULL; zmbuf = next) { - next = TAILQ_NEXT(zmbuf, next); - - rte_pktmbuf_free(zmbuf->mbuf); - TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); - } - - rte_free(vq->zmbufs); -} - -/* - * when virtio is stopped, qemu will send us the GET_VRING_BASE message. - */ -static int -vhost_user_get_vring_base(struct virtio_net *dev, - VhostUserMsg *msg) -{ - struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; - - /* We have to stop the queue (virtio) if it is running. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - - dev->flags &= ~VIRTIO_DEV_READY; - - /* Here we are safe to get the last used index */ - msg->payload.state.num = vq->last_used_idx; - - RTE_LOG(INFO, VHOST_CONFIG, - "vring base idx:%d file:%d\n", msg->payload.state.index, msg->payload.state.num); - /* - * Based on current qemu vhost-user implementation, this message is - * sent and only sent in vhost_vring_stop. - * TODO: cleanup the vring, it isn't usable since here. - */ - if (vq->kickfd >= 0) - close(vq->kickfd); - - vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; - - if (vq->callfd >= 0) - close(vq->callfd); - - vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; - - if (dev->dequeue_zero_copy) - free_zmbufs(vq); - rte_free(vq->shadow_used_ring); - vq->shadow_used_ring = NULL; - - return 0; -} - -/* - * when virtio queues are ready to work, qemu will send us to - * enable the virtio queue pair. - */ -static int -vhost_user_set_vring_enable(struct virtio_net *dev, - VhostUserMsg *msg) -{ - int enable = (int)msg->payload.state.num; - - RTE_LOG(INFO, VHOST_CONFIG, - "set queue enable: %d to qp idx: %d\n", - enable, msg->payload.state.index); - - if (dev->notify_ops->vring_state_changed) - dev->notify_ops->vring_state_changed(dev->vid, msg->payload.state.index, enable); - - dev->virtqueue[msg->payload.state.index]->enabled = enable; - - return 0; -} - -static void -vhost_user_set_protocol_features(struct virtio_net *dev, - uint64_t protocol_features) -{ - if (protocol_features & ~VHOST_USER_PROTOCOL_FEATURES) - return; - - /* Remove from the data plane. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - - dev->protocol_features = protocol_features; -} - -static int -vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg) -{ - int fd = msg->fds[0]; - uint64_t size, off; - void *addr; - - if (fd < 0) { - RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd); - return -1; - } - - if (msg->size != sizeof(VhostUserLog)) { - RTE_LOG(ERR, VHOST_CONFIG, - "invalid log base msg size: %"PRId32" != %d\n", - msg->size, (int)sizeof(VhostUserLog)); - return -1; - } - - /* Remove from the data plane. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - - size = msg->payload.log.mmap_size; - off = msg->payload.log.mmap_offset; - RTE_LOG(INFO, VHOST_CONFIG, - "log mmap size: %"PRId64", offset: %"PRId64"\n", - size, off); - - /* - * mmap from 0 to workaround a hugepage mmap bug: mmap will - * fail when offset is not page size aligned. - */ - addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - close(fd); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); - return -1; - } - - /* - * Free previously mapped log memory on occasionally - * multiple VHOST_USER_SET_LOG_BASE. - */ - if (dev->log_addr) { - munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); - } - dev->log_addr = (uint64_t)(uintptr_t)addr; - dev->log_base = dev->log_addr + off; - dev->log_size = size; - - return 0; -} - -/* - * An rarp packet is constructed and broadcasted to notify switches about - * the new location of the migrated VM, so that packets from outside will - * not be lost after migration. - * - * However, we don't actually "send" a rarp packet here, instead, we set - * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it. - */ -static int -vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg) -{ - uint8_t *mac = (uint8_t *)&msg->payload.u64; - - RTE_LOG(DEBUG, VHOST_CONFIG, - ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", - mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); - memcpy(dev->mac.addr_bytes, mac, 6); - - /* - * Set the flag to inject a RARP broadcast packet at - * rte_vhost_dequeue_burst(). - * - * rte_smp_wmb() is for making sure the mac is copied - * before the flag is set. - */ - rte_smp_wmb(); - rte_atomic16_set(&dev->broadcast_rarp, 1); - - return 0; -} - -static int -vhost_user_net_set_mtu(struct virtio_net *dev, struct VhostUserMsg *msg) -{ - if (msg->payload.u64 < VIRTIO_MIN_MTU || - msg->payload.u64 > VIRTIO_MAX_MTU) { - RTE_LOG(ERR, VHOST_CONFIG, "Invalid MTU size (%"PRIu64")\n", - msg->payload.u64); - - return -1; - } - - dev->mtu = msg->payload.u64; - - return 0; -} - -/* return bytes# of read on success or negative val on failure. */ -static int -read_vhost_message(int sockfd, struct VhostUserMsg *msg) -{ - int ret; - - ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, - msg->fds, VHOST_MEMORY_MAX_NREGIONS); - if (ret <= 0) - return ret; - - if (msg && msg->size) { - if (msg->size > sizeof(msg->payload)) { - RTE_LOG(ERR, VHOST_CONFIG, - "invalid msg size: %d\n", msg->size); - return -1; - } - ret = read(sockfd, &msg->payload, msg->size); - if (ret <= 0) - return ret; - if (ret != (int)msg->size) { - RTE_LOG(ERR, VHOST_CONFIG, - "read control message failed\n"); - return -1; - } - } - - return ret; -} - -static int -send_vhost_message(int sockfd, struct VhostUserMsg *msg) -{ - int ret; - - if (!msg) - return 0; - - msg->flags &= ~VHOST_USER_VERSION_MASK; - msg->flags &= ~VHOST_USER_NEED_REPLY; - msg->flags |= VHOST_USER_VERSION; - msg->flags |= VHOST_USER_REPLY_MASK; - - ret = send_fd_message(sockfd, (char *)msg, - VHOST_USER_HDR_SIZE + msg->size, NULL, 0); - - return ret; -} - -/* - * Allocate a queue pair if it hasn't been allocated yet - */ -static int -vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg) -{ - uint16_t vring_idx; - - switch (msg->request) { - case VHOST_USER_SET_VRING_KICK: - case VHOST_USER_SET_VRING_CALL: - case VHOST_USER_SET_VRING_ERR: - vring_idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - break; - case VHOST_USER_SET_VRING_NUM: - case VHOST_USER_SET_VRING_BASE: - case VHOST_USER_SET_VRING_ENABLE: - vring_idx = msg->payload.state.index; - break; - case VHOST_USER_SET_VRING_ADDR: - vring_idx = msg->payload.addr.index; - break; - default: - return 0; - } - - if (vring_idx >= VHOST_MAX_VRING) { - RTE_LOG(ERR, VHOST_CONFIG, - "invalid vring index: %u\n", vring_idx); - return -1; - } - - if (dev->virtqueue[vring_idx]) - return 0; - - return alloc_vring_queue(dev, vring_idx); -} - -static int -vhost_user_nvme_admin_passthrough(struct virtio_net *dev, - void *cmd, void *cqe, void *buf) -{ - if (dev->notify_ops->vhost_nvme_admin_passthrough) { - return dev->notify_ops->vhost_nvme_admin_passthrough(dev->vid, cmd, cqe, buf); - } - - return -1; -} - -static int -vhost_user_nvme_set_cq_call(struct virtio_net *dev, uint16_t qid, int fd) -{ - if (dev->notify_ops->vhost_nvme_set_cq_call) { - return dev->notify_ops->vhost_nvme_set_cq_call(dev->vid, qid, fd); - } - - return -1; -} - -static int -vhost_user_nvme_get_cap(struct virtio_net *dev, uint64_t *cap) -{ - if (dev->notify_ops->vhost_nvme_get_cap) { - return dev->notify_ops->vhost_nvme_get_cap(dev->vid, cap); - } - - return -1; -} - -static int -vhost_user_nvme_set_bar_mr(struct virtio_net *dev, struct VhostUserMsg *pmsg) -{ - struct VhostUserMemory mem_table; - int fd = pmsg->fds[0]; - void *mmap_addr; - uint64_t mmap_size; - uint64_t mmap_offset; - uint64_t alignment; - struct rte_vhost_mem_region reg; - int ret = 0; - - memcpy(&mem_table, &pmsg->payload.memory, sizeof(mem_table)); - - reg.guest_phys_addr = mem_table.regions[0].guest_phys_addr; - reg.guest_user_addr = mem_table.regions[0].userspace_addr; - reg.size = mem_table.regions[0].memory_size; - reg.fd = fd; - mmap_offset = mem_table.regions[0].mmap_offset; - mmap_size = reg.size + mmap_offset; - - alignment = get_blk_size(fd); - if (alignment == (uint64_t)-1) { - RTE_LOG(ERR, VHOST_CONFIG, - "couldn't get hugepage size through fstat\n"); - return -1; - } - mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); - - mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, 0); - - if (mmap_addr == MAP_FAILED) { - RTE_LOG(ERR, VHOST_CONFIG, - "mmap region failed.\n"); - return -1; - } - - if (madvise(mmap_addr, mmap_size, MADV_DONTDUMP) != 0) { - RTE_LOG(INFO, VHOST_CONFIG, - "MADV_DONTDUMP advice setting failed.\n"); - } - - reg.mmap_addr = mmap_addr; - reg.mmap_size = mmap_size; - reg.host_user_addr = (uint64_t)(uintptr_t)mmap_addr + - mmap_offset; - - RTE_LOG(INFO, VHOST_CONFIG, - "BAR memory region %u, size: 0x%" PRIx64 "\n" - "\t guest physical addr: 0x%" PRIx64 "\n" - "\t guest virtual addr: 0x%" PRIx64 "\n" - "\t host virtual addr: 0x%" PRIx64 "\n" - "\t mmap addr : 0x%" PRIx64 "\n" - "\t mmap size : 0x%" PRIx64 "\n" - "\t mmap align: 0x%" PRIx64 "\n" - "\t mmap off : 0x%" PRIx64 "\n", - 0, reg.size, - reg.guest_phys_addr, - reg.guest_user_addr, - reg.host_user_addr, - (uint64_t)(uintptr_t)mmap_addr, - mmap_size, - alignment, - mmap_offset); - - if (dev->bar_addr) { - munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size); - } - dev->bar_addr = (void *)(uintptr_t)reg.host_user_addr; - dev->bar_size = reg.mmap_size; - - if (dev->notify_ops->vhost_nvme_set_bar_mr) { - ret = dev->notify_ops->vhost_nvme_set_bar_mr(dev->vid, dev->bar_addr, dev->bar_size); - if (ret) { - munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size); - dev->bar_addr = NULL; - dev->bar_size = 0; - } - } - - return ret; -} - -int -vhost_user_msg_handler(int vid, int fd) -{ - struct virtio_net *dev; - struct VhostUserMsg msg; - struct vhost_vring_file file; - int ret; - uint64_t cap; - uint64_t enable; - uint8_t cqe[16]; - uint8_t cmd[64]; - uint8_t buf[4096]; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - ret = read_vhost_message(fd, &msg); - if (ret <= 0 || msg.request >= VHOST_USER_MAX) { - if (ret < 0) - RTE_LOG(ERR, VHOST_CONFIG, - "vhost read message failed\n"); - else if (ret == 0) - RTE_LOG(INFO, VHOST_CONFIG, - "vhost peer closed\n"); - else - RTE_LOG(ERR, VHOST_CONFIG, - "vhost read incorrect message\n"); - - return -1; - } - - RTE_LOG(INFO, VHOST_CONFIG, "%s: read message %s\n", - dev->ifname, vhost_message_str[msg.request]); - - ret = vhost_user_check_and_alloc_queue_pair(dev, &msg); - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to alloc queue\n"); - return -1; - } - - switch (msg.request) { - case VHOST_USER_GET_CONFIG: - if (dev->notify_ops->get_config(dev->vid, - msg.payload.config.region, - msg.payload.config.size) != 0) { - msg.size = sizeof(uint64_t); - } - send_vhost_message(fd, &msg); - break; - case VHOST_USER_SET_CONFIG: - if ((dev->notify_ops->set_config(dev->vid, - msg.payload.config.region, - msg.payload.config.offset, - msg.payload.config.size, - msg.payload.config.flags)) != 0) { - ret = 1; - } else { - ret = 0; - } - break; - case VHOST_USER_NVME_ADMIN: - if (!dev->is_nvme) { - dev->is_nvme = 1; - } - memcpy(cmd, msg.payload.nvme.cmd.req, sizeof(cmd)); - ret = vhost_user_nvme_admin_passthrough(dev, cmd, cqe, buf); - memcpy(msg.payload.nvme.cmd.cqe, cqe, sizeof(cqe)); - msg.size = sizeof(cqe); - /* NVMe Identify Command */ - if (cmd[0] == 0x06) { - memcpy(msg.payload.nvme.buf, &buf, 4096); - msg.size += 4096; - } - send_vhost_message(fd, &msg); - break; - case VHOST_USER_NVME_SET_CQ_CALL: - file.index = msg.payload.u64 & VHOST_USER_VRING_IDX_MASK; - file.fd = msg.fds[0]; - ret = vhost_user_nvme_set_cq_call(dev, file.index, file.fd); - break; - case VHOST_USER_NVME_GET_CAP: - ret = vhost_user_nvme_get_cap(dev, &cap); - if (!ret) - msg.payload.u64 = cap; - else - msg.payload.u64 = 0; - msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); - break; - case VHOST_USER_NVME_START_STOP: - enable = msg.payload.u64; - /* device must be started before set cq call */ - if (enable) { - if (!(dev->flags & VIRTIO_DEV_RUNNING)) { - if (dev->notify_ops->new_device(dev->vid) == 0) - dev->flags |= VIRTIO_DEV_RUNNING; - } - } else { - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - dev->notify_ops->destroy_device(dev->vid); - } - } - break; - case VHOST_USER_NVME_SET_BAR_MR: - ret = vhost_user_nvme_set_bar_mr(dev, &msg); - break; - case VHOST_USER_GET_FEATURES: - msg.payload.u64 = vhost_user_get_features(dev); - msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); - break; - case VHOST_USER_SET_FEATURES: - vhost_user_set_features(dev, msg.payload.u64); - break; - - case VHOST_USER_GET_PROTOCOL_FEATURES: - msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES; - msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); - break; - case VHOST_USER_SET_PROTOCOL_FEATURES: - vhost_user_set_protocol_features(dev, msg.payload.u64); - break; - - case VHOST_USER_SET_OWNER: - vhost_user_set_owner(); - break; - case VHOST_USER_RESET_OWNER: - vhost_user_reset_owner(dev); - break; - - case VHOST_USER_SET_MEM_TABLE: - ret = vhost_user_set_mem_table(dev, &msg); - break; - - case VHOST_USER_SET_LOG_BASE: - vhost_user_set_log_base(dev, &msg); - - /* it needs a reply */ - msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); - break; - case VHOST_USER_SET_LOG_FD: - close(msg.fds[0]); - RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n"); - break; - - case VHOST_USER_SET_VRING_NUM: - vhost_user_set_vring_num(dev, &msg); - break; - case VHOST_USER_SET_VRING_ADDR: - vhost_user_set_vring_addr(dev, &msg); - break; - case VHOST_USER_SET_VRING_BASE: - vhost_user_set_vring_base(dev, &msg); - break; - - case VHOST_USER_GET_VRING_BASE: - vhost_user_get_vring_base(dev, &msg); - msg.size = sizeof(msg.payload.state); - send_vhost_message(fd, &msg); - break; - - case VHOST_USER_SET_VRING_KICK: - vhost_user_set_vring_kick(dev, &msg); - break; - case VHOST_USER_SET_VRING_CALL: - vhost_user_set_vring_call(dev, &msg); - break; - - case VHOST_USER_SET_VRING_ERR: - if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) - close(msg.fds[0]); - RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n"); - break; - - case VHOST_USER_GET_QUEUE_NUM: - msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS; - msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); - break; - - case VHOST_USER_SET_VRING_ENABLE: - vhost_user_set_vring_enable(dev, &msg); - break; - case VHOST_USER_SEND_RARP: - vhost_user_send_rarp(dev, &msg); - break; - - case VHOST_USER_NET_SET_MTU: - ret = vhost_user_net_set_mtu(dev, &msg); - break; - - default: - ret = -1; - break; - - } - - if (msg.flags & VHOST_USER_NEED_REPLY) { - msg.payload.u64 = !!ret; - msg.size = sizeof(msg.payload.u64); - send_vhost_message(fd, &msg); - } - - if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) { - dev->flags |= VIRTIO_DEV_READY; - - if (!(dev->flags & VIRTIO_DEV_RUNNING)) { - if (dev->dequeue_zero_copy) { - RTE_LOG(INFO, VHOST_CONFIG, - "dequeue zero copy is enabled\n"); - } - - if (dev->notify_ops->new_device(dev->vid) == 0) - dev->flags |= VIRTIO_DEV_RUNNING; - } - } - - return 0; -} diff --git a/lib/rte_vhost/vhost_user.h b/lib/rte_vhost/vhost_user.h deleted file mode 100644 index d20574b64b..0000000000 --- a/lib/rte_vhost/vhost_user.h +++ /dev/null @@ -1,171 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _VHOST_NET_USER_H -#define _VHOST_NET_USER_H - -#include -#include - -#include "rte_vhost.h" - -/* refer to hw/virtio/vhost-user.c */ - -#define VHOST_MEMORY_MAX_NREGIONS 8 - -/* - * Maximum size of virtio device config space - */ -#define VHOST_USER_MAX_CONFIG_SIZE 256 - -#define VHOST_USER_PROTOCOL_F_MQ 0 -#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 -#define VHOST_USER_PROTOCOL_F_RARP 2 -#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 -#define VHOST_USER_PROTOCOL_F_NET_MTU 4 -#define VHOST_USER_PROTOCOL_F_CONFIG 9 - -#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ - (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ - (1ULL << VHOST_USER_PROTOCOL_F_RARP) | \ - (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ - (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \ - (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)) - -typedef enum VhostUserRequest { - VHOST_USER_NONE = 0, - VHOST_USER_GET_FEATURES = 1, - VHOST_USER_SET_FEATURES = 2, - VHOST_USER_SET_OWNER = 3, - VHOST_USER_RESET_OWNER = 4, - VHOST_USER_SET_MEM_TABLE = 5, - VHOST_USER_SET_LOG_BASE = 6, - VHOST_USER_SET_LOG_FD = 7, - VHOST_USER_SET_VRING_NUM = 8, - VHOST_USER_SET_VRING_ADDR = 9, - VHOST_USER_SET_VRING_BASE = 10, - VHOST_USER_GET_VRING_BASE = 11, - VHOST_USER_SET_VRING_KICK = 12, - VHOST_USER_SET_VRING_CALL = 13, - VHOST_USER_SET_VRING_ERR = 14, - VHOST_USER_GET_PROTOCOL_FEATURES = 15, - VHOST_USER_SET_PROTOCOL_FEATURES = 16, - VHOST_USER_GET_QUEUE_NUM = 17, - VHOST_USER_SET_VRING_ENABLE = 18, - VHOST_USER_SEND_RARP = 19, - VHOST_USER_NET_SET_MTU = 20, - VHOST_USER_GET_CONFIG = 24, - VHOST_USER_SET_CONFIG = 25, - VHOST_USER_NVME_ADMIN = 80, - VHOST_USER_NVME_SET_CQ_CALL = 81, - VHOST_USER_NVME_GET_CAP = 82, - VHOST_USER_NVME_START_STOP = 83, - VHOST_USER_NVME_IO_CMD = 84, - VHOST_USER_NVME_SET_BAR_MR = 85, - VHOST_USER_MAX -} VhostUserRequest; - -typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_NONE = 0, - VHOST_USER_SLAVE_IOTLB_MSG = 1, - VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, - VHOST_USER_SLAVE_MAX -} VhostUserSlaveRequest; - -typedef struct VhostUserMemoryRegion { - uint64_t guest_phys_addr; - uint64_t memory_size; - uint64_t userspace_addr; - uint64_t mmap_offset; -} VhostUserMemoryRegion; - -typedef struct VhostUserMemory { - uint32_t nregions; - uint32_t padding; - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; -} VhostUserMemory; - -typedef struct VhostUserLog { - uint64_t mmap_size; - uint64_t mmap_offset; -} VhostUserLog; - -typedef struct VhostUserConfig { - uint32_t offset; - uint32_t size; - uint32_t flags; - uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; -} VhostUserConfig; - -typedef struct VhostUserMsg { - VhostUserRequest request; - -#define VHOST_USER_VERSION_MASK 0x3 -#define VHOST_USER_REPLY_MASK (0x1 << 2) -#define VHOST_USER_NEED_REPLY (0x1 << 3) - uint32_t flags; - uint32_t size; /* the following payload size */ - union { -#define VHOST_USER_VRING_IDX_MASK 0xff -#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) - uint64_t u64; - struct vhost_vring_state state; - struct vhost_vring_addr addr; - VhostUserMemory memory; - VhostUserLog log; - VhostUserConfig config; - struct nvme { - union { - uint8_t req[64]; - uint8_t cqe[16]; - } cmd; - uint8_t buf[4096]; - } nvme; - } payload; - int fds[VHOST_MEMORY_MAX_NREGIONS]; -} __attribute((packed)) VhostUserMsg; - -#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) - -/* The version of the protocol we support */ -#define VHOST_USER_VERSION 0x1 - - -/* vhost_user.c */ -int vhost_user_msg_handler(int vid, int fd); - -/* socket.c */ -int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num); -int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num); - -#endif diff --git a/lib/vhost/Makefile b/lib/vhost/Makefile index 1fe9b6e400..ca77041b95 100644 --- a/lib/vhost/Makefile +++ b/lib/vhost/Makefile @@ -42,11 +42,6 @@ CFLAGS += $(ENV_CFLAGS) C_SRCS = vhost.c vhost_rpc.c vhost_scsi.c vhost_blk.c rte_vhost_compat.c -ifeq ($(CONFIG_VHOST_INTERNAL_LIB),y) -C_SRCS += vhost_nvme.c -CFLAGS := -I../rte_vhost $(CFLAGS) -endif - LIBNAME = vhost SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_vhost.map) diff --git a/lib/vhost/rte_vhost_compat.c b/lib/vhost/rte_vhost_compat.c index b4573ad042..e1747cc127 100644 --- a/lib/vhost/rte_vhost_compat.c +++ b/lib/vhost/rte_vhost_compat.c @@ -138,18 +138,8 @@ static const struct vhost_device_ops g_spdk_vhost_ops = { .destroy_device = stop_device, .new_connection = new_connection, .destroy_connection = destroy_connection, -#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB - .get_config = vhost_get_config_cb, - .set_config = vhost_set_config_cb, - .vhost_nvme_admin_passthrough = vhost_nvme_admin_passthrough, - .vhost_nvme_set_cq_call = vhost_nvme_set_cq_call, - .vhost_nvme_get_cap = vhost_nvme_get_cap, - .vhost_nvme_set_bar_mr = vhost_nvme_set_bar_mr, -#endif }; -#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB - static enum rte_vhost_msg_result extern_vhost_pre_msg_handler(int vid, void *_msg) { @@ -319,24 +309,12 @@ vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession) } } -#else /* SPDK_CONFIG_VHOST_INTERNAL_LIB */ - -void -vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession) -{ - /* nothing to do. all the changes are already incorporated into rte_vhost */ -} - -#endif - int vhost_register_unix_socket(const char *path, const char *ctrl_name, uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features) { struct stat file_stat; -#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB uint64_t features = 0; -#endif /* Register vhost driver to handle vhost messages. */ if (stat(path, &file_stat) != -1) { @@ -372,11 +350,9 @@ vhost_register_unix_socket(const char *path, const char *ctrl_name, return -EIO; } -#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB rte_vhost_driver_get_protocol_features(path, &features); features |= protocol_features; rte_vhost_driver_set_protocol_features(path, features); -#endif if (rte_vhost_driver_start(path) != 0) { SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n", diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c index a17004f4cc..1c89508030 100644 --- a/lib/vhost/vhost.c +++ b/lib/vhost/vhost.c @@ -1288,56 +1288,6 @@ out: return rc; } -#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB -int -vhost_get_config_cb(int vid, uint8_t *config, uint32_t len) -{ - struct spdk_vhost_session *vsession; - struct spdk_vhost_dev *vdev; - int rc = -1; - - pthread_mutex_lock(&g_vhost_mutex); - vsession = vhost_session_find_by_vid(vid); - if (vsession == NULL) { - SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); - goto out; - } - - vdev = vsession->vdev; - if (vdev->backend->vhost_get_config) { - rc = vdev->backend->vhost_get_config(vdev, config, len); - } - -out: - pthread_mutex_unlock(&g_vhost_mutex); - return rc; -} - -int -vhost_set_config_cb(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags) -{ - struct spdk_vhost_session *vsession; - struct spdk_vhost_dev *vdev; - int rc = -1; - - pthread_mutex_lock(&g_vhost_mutex); - vsession = vhost_session_find_by_vid(vid); - if (vsession == NULL) { - SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid); - goto out; - } - - vdev = vsession->vdev; - if (vdev->backend->vhost_set_config) { - rc = vdev->backend->vhost_set_config(vdev, config, offset, size, flags); - } - -out: - pthread_mutex_unlock(&g_vhost_mutex); - return rc; -} -#endif - int spdk_vhost_set_socket_path(const char *basename) { @@ -1520,14 +1470,6 @@ spdk_vhost_init(spdk_vhost_init_cb init_cb) goto out; } -#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB - ret = vhost_nvme_controller_construct(); - if (ret != 0) { - SPDK_ERRLOG("Cannot construct vhost NVMe controllers\n"); - goto out; - } -#endif - spdk_cpuset_zero(&g_vhost_core_mask); /* iterate threads instead of using SPDK_ENV_FOREACH_CORE to ensure that threads are really diff --git a/lib/vhost/vhost_blk.c b/lib/vhost/vhost_blk.c index d794e49dd7..eeb788012f 100644 --- a/lib/vhost/vhost_blk.c +++ b/lib/vhost/vhost_blk.c @@ -60,12 +60,8 @@ (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI)) /* Vhost-blk support protocol features */ -#ifndef SPDK_CONFIG_VHOST_INTERNAL_LIB #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) -#else -#define SPDK_VHOST_BLK_PROTOCOL_FEATURES (1ULL << VHOST_USER_PROTOCOL_F_CONFIG) -#endif struct spdk_vhost_blk_task { struct spdk_bdev_io *bdev_io; diff --git a/lib/vhost/vhost_internal.h b/lib/vhost/vhost_internal.h index d6aaddc180..9030caeed7 100644 --- a/lib/vhost/vhost_internal.h +++ b/lib/vhost/vhost_internal.h @@ -393,12 +393,6 @@ int vhost_start_device_cb(int vid); int vhost_stop_device_cb(int vid); int vhost_destroy_connection_cb(int vid); -#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB -int vhost_get_config_cb(int vid, uint8_t *config, uint32_t len); -int vhost_set_config_cb(int vid, uint8_t *config, uint32_t offset, - uint32_t size, uint32_t flags); -#endif - /* * Memory registration functions used in start/stop device callbacks */ @@ -481,16 +475,4 @@ int vhost_get_negotiated_features(int vid, uint64_t *negotiated_features); int remove_vhost_controller(struct spdk_vhost_dev *vdev); -#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB -int vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf); -int vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd); -int vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size); -int vhost_nvme_get_cap(int vid, uint64_t *cap); -int vhost_nvme_controller_construct(void); -int vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues); -int vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev); -int vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev, - const char *bdev_name); -#endif - #endif /* SPDK_VHOST_INTERNAL_H */ diff --git a/lib/vhost/vhost_nvme.c b/lib/vhost/vhost_nvme.c deleted file mode 100644 index 5beac0ce98..0000000000 --- a/lib/vhost/vhost_nvme.c +++ /dev/null @@ -1,1500 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "spdk/stdinc.h" - -#include "spdk/nvme.h" -#include "spdk/env.h" -#include "spdk/conf.h" -#include "spdk/util.h" -#include "spdk/string.h" -#include "spdk/thread.h" -#include "spdk/barrier.h" -#include "spdk/vhost.h" -#include "spdk/bdev.h" -#include "spdk/version.h" -#include "spdk/nvme_spec.h" -#include "spdk/likely.h" - -#include "vhost_internal.h" - -#define MAX_IO_QUEUES 31 -#define MAX_IOVS 64 -#define MAX_NAMESPACE 8 -#define MAX_QUEUE_ENTRIES_SUPPORTED 256 -#define MAX_BATCH_IO 8 - -struct spdk_vhost_nvme_sq { - uint16_t sqid; - uint16_t size; - uint16_t cqid; - bool valid; - struct spdk_nvme_cmd *sq_cmd; - uint16_t sq_head; - uint16_t sq_tail; -}; - -struct spdk_vhost_nvme_cq { - uint8_t phase; - uint16_t size; - uint16_t cqid; - bool valid; - volatile struct spdk_nvme_cpl *cq_cqe; - uint16_t cq_head; - uint16_t guest_signaled_cq_head; - uint32_t need_signaled_cnt; - STAILQ_HEAD(, spdk_vhost_nvme_task) cq_full_waited_tasks; - bool irq_enabled; - int virq; -}; - -struct spdk_vhost_nvme_ns { - struct spdk_bdev *bdev; - uint32_t block_size; - uint64_t capacity; - uint32_t nsid; - uint32_t active_ns; - struct spdk_bdev_desc *bdev_desc; - struct spdk_io_channel *bdev_io_channel; - struct spdk_nvme_ns_data nsdata; -}; - -struct spdk_vhost_nvme_task { - struct spdk_nvme_cmd cmd; - struct spdk_vhost_nvme_dev *nvme; - uint16_t sqid; - uint16_t cqid; - - /** array of iovecs to transfer. */ - struct iovec iovs[MAX_IOVS]; - - /** Number of iovecs in iovs array. */ - int iovcnt; - - /** Current iovec position. */ - int iovpos; - - /** Offset in current iovec. */ - uint32_t iov_offset; - - /* for bdev_io_wait */ - struct spdk_bdev_io_wait_entry bdev_io_wait; - struct spdk_vhost_nvme_sq *sq; - struct spdk_vhost_nvme_ns *ns; - - /* parent pointer. */ - struct spdk_vhost_nvme_task *parent; - uint8_t dnr; - uint8_t sct; - uint8_t sc; - uint32_t num_children; - STAILQ_ENTRY(spdk_vhost_nvme_task) stailq; -}; - -struct spdk_vhost_nvme_dev { - struct spdk_vhost_dev vdev; - - uint32_t num_io_queues; - union spdk_nvme_cap_register cap; - union spdk_nvme_cc_register cc; - union spdk_nvme_csts_register csts; - struct spdk_nvme_ctrlr_data cdata; - - uint32_t num_sqs; - uint32_t num_cqs; - - uint32_t num_ns; - struct spdk_vhost_nvme_ns ns[MAX_NAMESPACE]; - - volatile uint32_t *bar; - volatile uint32_t *bar_db; - uint64_t bar_size; - bool dataplane_started; - - volatile uint32_t *dbbuf_dbs; - volatile uint32_t *dbbuf_eis; - struct spdk_vhost_nvme_sq sq_queue[MAX_IO_QUEUES + 1]; - struct spdk_vhost_nvme_cq cq_queue[MAX_IO_QUEUES + 1]; - - /* The one and only session associated with this device */ - struct spdk_vhost_session *vsession; - - TAILQ_ENTRY(spdk_vhost_nvme_dev) tailq; - STAILQ_HEAD(, spdk_vhost_nvme_task) free_tasks; - struct spdk_poller *requestq_poller; - struct spdk_poller *stop_poller; -}; - -static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend; - -/* - * Report the SPDK version as the firmware revision. - * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts. - */ -#define FW_VERSION SPDK_VERSION_MAJOR_STRING SPDK_VERSION_MINOR_STRING SPDK_VERSION_PATCH_STRING - -static int -nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq, - struct spdk_vhost_nvme_task *task); - -static struct spdk_vhost_nvme_dev * -to_nvme_dev(struct spdk_vhost_dev *vdev) -{ - if (vdev->backend != &spdk_vhost_nvme_device_backend) { - SPDK_ERRLOG("%s: not a vhost-nvme device\n", vdev->name); - return NULL; - } - - return SPDK_CONTAINEROF(vdev, struct spdk_vhost_nvme_dev, vdev); -} - -static TAILQ_HEAD(, spdk_vhost_nvme_dev) g_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_ctrlrs); - -static inline unsigned int sq_offset(unsigned int qid, uint32_t db_stride) -{ - return qid * 2 * db_stride; -} - -static inline unsigned int cq_offset(unsigned int qid, uint32_t db_stride) -{ - return (qid * 2 + 1) * db_stride; -} - -static void -nvme_inc_cq_head(struct spdk_vhost_nvme_cq *cq) -{ - cq->cq_head++; - if (cq->cq_head >= cq->size) { - cq->cq_head = 0; - cq->phase = !cq->phase; - } -} - -static bool -nvme_cq_is_full(struct spdk_vhost_nvme_cq *cq) -{ - return ((cq->cq_head + 1) % cq->size == cq->guest_signaled_cq_head); -} - -static void -nvme_inc_sq_head(struct spdk_vhost_nvme_sq *sq) -{ - sq->sq_head = (sq->sq_head + 1) % sq->size; -} - -static struct spdk_vhost_nvme_sq * -vhost_nvme_get_sq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid) -{ - if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) { - return NULL; - } - - return &dev->sq_queue[qid]; -} - -static struct spdk_vhost_nvme_cq * -vhost_nvme_get_cq_from_qid(struct spdk_vhost_nvme_dev *dev, uint16_t qid) -{ - if (spdk_unlikely(!qid || qid > MAX_IO_QUEUES)) { - return NULL; - } - - return &dev->cq_queue[qid]; -} - -static inline uint32_t -vhost_nvme_get_queue_head(struct spdk_vhost_nvme_dev *nvme, uint32_t offset) -{ - if (nvme->dataplane_started) { - return nvme->dbbuf_dbs[offset]; - - } else if (nvme->bar) { - return nvme->bar_db[offset]; - } - - assert(0); - - return 0; -} - -static void * -vhost_nvme_gpa_to_vva(void *priv, uint64_t addr, uint64_t len) -{ - struct spdk_vhost_session *vsession = priv; - - return vhost_gpa_to_vva(vsession, addr, len); -} - -static int -vhost_nvme_map_prps(struct spdk_vhost_nvme_dev *nvme, struct spdk_nvme_cmd *cmd, - struct spdk_vhost_nvme_task *task, uint32_t len) -{ - int err; - - err = spdk_nvme_map_prps(nvme->vsession, cmd, task->iovs, len, 4096, - vhost_nvme_gpa_to_vva); - if (spdk_unlikely(err < 0)) { - return err; - } - task->iovcnt = err; - return 0; -} - -static void -nvme_cq_signal_fd(struct spdk_vhost_nvme_dev *nvme) -{ - struct spdk_vhost_nvme_cq *cq; - uint32_t qid, cq_head; - - assert(nvme != NULL); - - for (qid = 1; qid <= MAX_IO_QUEUES; qid++) { - cq = vhost_nvme_get_cq_from_qid(nvme, qid); - if (!cq || !cq->valid) { - continue; - } - - cq_head = vhost_nvme_get_queue_head(nvme, cq_offset(qid, 1)); - if (cq->irq_enabled && cq->need_signaled_cnt && (cq->cq_head != cq_head)) { - eventfd_write(cq->virq, (eventfd_t)1); - cq->need_signaled_cnt = 0; - } - } -} - -static void -vhost_nvme_task_complete(struct spdk_vhost_nvme_task *task) -{ - struct spdk_vhost_nvme_dev *nvme = task->nvme; - struct spdk_nvme_cpl cqe = {0}; - struct spdk_vhost_nvme_cq *cq; - struct spdk_vhost_nvme_sq *sq; - struct spdk_nvme_cmd *cmd = &task->cmd; - uint16_t cqid = task->cqid; - uint16_t sqid = task->sqid; - - cq = vhost_nvme_get_cq_from_qid(nvme, cqid); - sq = vhost_nvme_get_sq_from_qid(nvme, sqid); - if (spdk_unlikely(!cq || !sq)) { - return; - } - - cq->guest_signaled_cq_head = vhost_nvme_get_queue_head(nvme, cq_offset(cqid, 1)); - if (spdk_unlikely(nvme_cq_is_full(cq))) { - STAILQ_INSERT_TAIL(&cq->cq_full_waited_tasks, task, stailq); - return; - } - - cqe.sqid = sqid; - cqe.sqhd = sq->sq_head; - cqe.cid = cmd->cid; - cqe.status.dnr = task->dnr; - cqe.status.sct = task->sct; - cqe.status.sc = task->sc; - cqe.status.p = !cq->phase; - cq->cq_cqe[cq->cq_head] = cqe; - spdk_smp_wmb(); - cq->cq_cqe[cq->cq_head].status.p = cq->phase; - - nvme_inc_cq_head(cq); - cq->need_signaled_cnt++; - - /* MMIO Controll */ - if (nvme->dataplane_started) { - nvme->dbbuf_eis[cq_offset(cqid, 1)] = (uint32_t)(cq->guest_signaled_cq_head - 1); - } - - STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq); -} - -static void -blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) -{ - struct spdk_vhost_nvme_task *task = cb_arg; - struct spdk_nvme_cmd *cmd = &task->cmd; - int sc, sct; - uint32_t cdw0; - - assert(bdev_io != NULL); - - spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); - spdk_bdev_free_io(bdev_io); - - task->dnr = !success; - task->sct = sct; - task->sc = sc; - - if (spdk_unlikely(!success)) { - SPDK_ERRLOG("I/O error, sector %u\n", cmd->cdw10); - } - - vhost_nvme_task_complete(task); -} - -static void -blk_unmap_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) -{ - struct spdk_vhost_nvme_task *child = cb_arg; - struct spdk_vhost_nvme_task *task = child->parent; - struct spdk_vhost_nvme_dev *nvme = task->nvme; - int sct, sc; - uint32_t cdw0; - - assert(bdev_io != NULL); - - task->num_children--; - if (!success) { - task->dnr = 1; - spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); - task->sct = sct; - task->sc = sc; - } - - spdk_bdev_free_io(bdev_io); - - if (!task->num_children) { - vhost_nvme_task_complete(task); - } - - STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq); -} - -static struct spdk_vhost_nvme_ns * -vhost_nvme_get_ns_from_nsid(struct spdk_vhost_nvme_dev *dev, uint32_t nsid) -{ - if (spdk_unlikely(!nsid || nsid > dev->num_ns)) { - return NULL; - } - - return &dev->ns[nsid - 1]; -} - -static void -vhost_nvme_resubmit_task(void *arg) -{ - struct spdk_vhost_nvme_task *task = (struct spdk_vhost_nvme_task *)arg; - int rc; - - rc = nvme_process_sq(task->nvme, task->sq, task); - if (rc) { - SPDK_DEBUGLOG(vhost_nvme, "vhost_nvme: task resubmit failed, rc = %d.\n", rc); - } -} - -static int -vhost_nvme_queue_task(struct spdk_vhost_nvme_task *task) -{ - int rc; - - task->bdev_io_wait.bdev = task->ns->bdev; - task->bdev_io_wait.cb_fn = vhost_nvme_resubmit_task; - task->bdev_io_wait.cb_arg = task; - - rc = spdk_bdev_queue_io_wait(task->ns->bdev, task->ns->bdev_io_channel, &task->bdev_io_wait); - if (rc != 0) { - SPDK_ERRLOG("Queue io failed in vhost_nvme_queue_task, rc=%d.\n", rc); - task->dnr = 1; - task->sct = SPDK_NVME_SCT_GENERIC; - task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; - vhost_nvme_task_complete(task); - } - - return rc; -} - -static int -nvme_process_sq(struct spdk_vhost_nvme_dev *nvme, struct spdk_vhost_nvme_sq *sq, - struct spdk_vhost_nvme_task *task) -{ - struct spdk_vhost_nvme_task *child; - struct spdk_nvme_cmd *cmd = &task->cmd; - struct spdk_vhost_nvme_ns *ns; - int ret = -1; - uint32_t len, nlba, block_size; - uint64_t slba; - struct spdk_nvme_dsm_range *range; - uint16_t i, num_ranges = 0; - - task->nvme = nvme; - task->dnr = 0; - task->sct = 0; - task->sc = 0; - - ns = vhost_nvme_get_ns_from_nsid(nvme, cmd->nsid); - if (spdk_unlikely(!ns)) { - task->dnr = 1; - task->sct = SPDK_NVME_SCT_GENERIC; - task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; - vhost_nvme_task_complete(task); - return -1; - } - - block_size = ns->block_size; - task->num_children = 0; - task->cqid = sq->cqid; - task->sqid = sq->sqid; - - task->ns = ns; - - if (spdk_unlikely(!ns->active_ns)) { - task->dnr = 1; - task->sct = SPDK_NVME_SCT_GENERIC; - task->sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; - vhost_nvme_task_complete(task); - return -1; - } - - /* valid only for Read/Write commands */ - nlba = (cmd->cdw12 & 0xffff) + 1; - slba = cmd->cdw11; - slba = (slba << 32) | cmd->cdw10; - - if (cmd->opc == SPDK_NVME_OPC_READ || cmd->opc == SPDK_NVME_OPC_WRITE || - cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) { - if (cmd->psdt != SPDK_NVME_PSDT_PRP) { - SPDK_DEBUGLOG(vhost_nvme, "Invalid PSDT %u%ub in command\n", - cmd->psdt >> 1, cmd->psdt & 1u); - task->dnr = 1; - task->sct = SPDK_NVME_SCT_GENERIC; - task->sc = SPDK_NVME_SC_INVALID_FIELD; - vhost_nvme_task_complete(task); - return -1; - } - - if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) { - num_ranges = (cmd->cdw10 & 0xff) + 1; - len = num_ranges * sizeof(struct spdk_nvme_dsm_range); - } else { - len = nlba * block_size; - } - - ret = vhost_nvme_map_prps(nvme, cmd, task, len); - if (spdk_unlikely(ret != 0)) { - SPDK_ERRLOG("nvme command map prps failed\n"); - task->dnr = 1; - task->sct = SPDK_NVME_SCT_GENERIC; - task->sc = SPDK_NVME_SC_INVALID_FIELD; - vhost_nvme_task_complete(task); - return -1; - } - } - - switch (cmd->opc) { - case SPDK_NVME_OPC_READ: - ret = spdk_bdev_readv(ns->bdev_desc, ns->bdev_io_channel, - task->iovs, task->iovcnt, slba * block_size, - nlba * block_size, blk_request_complete_cb, task); - break; - case SPDK_NVME_OPC_WRITE: - ret = spdk_bdev_writev(ns->bdev_desc, ns->bdev_io_channel, - task->iovs, task->iovcnt, slba * block_size, - nlba * block_size, blk_request_complete_cb, task); - break; - case SPDK_NVME_OPC_FLUSH: - ret = spdk_bdev_flush(ns->bdev_desc, ns->bdev_io_channel, - 0, ns->capacity, - blk_request_complete_cb, task); - break; - case SPDK_NVME_OPC_DATASET_MANAGEMENT: - range = (struct spdk_nvme_dsm_range *)task->iovs[0].iov_base; - for (i = 0; i < num_ranges; i++) { - if (!STAILQ_EMPTY(&nvme->free_tasks)) { - child = STAILQ_FIRST(&nvme->free_tasks); - STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq); - } else { - SPDK_ERRLOG("No free task now\n"); - ret = -1; - break; - } - task->num_children++; - child->parent = task; - ret = spdk_bdev_unmap(ns->bdev_desc, ns->bdev_io_channel, - range[i].starting_lba * block_size, - range[i].length * block_size, - blk_unmap_complete_cb, child); - if (ret) { - STAILQ_INSERT_TAIL(&nvme->free_tasks, child, stailq); - break; - } - } - break; - default: - ret = -1; - break; - } - - if (spdk_unlikely(ret)) { - if (ret == -ENOMEM) { - SPDK_DEBUGLOG(vhost_nvme, "No memory, start to queue io.\n"); - task->sq = sq; - ret = vhost_nvme_queue_task(task); - } else { - /* post error status to cqe */ - SPDK_ERRLOG("Error Submission For Command %u, ret %d\n", cmd->opc, ret); - task->dnr = 1; - task->sct = SPDK_NVME_SCT_GENERIC; - task->sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; - vhost_nvme_task_complete(task); - } - } - - return ret; -} - -static int -nvme_worker(void *arg) -{ - struct spdk_vhost_nvme_dev *nvme = (struct spdk_vhost_nvme_dev *)arg; - struct spdk_vhost_nvme_sq *sq; - struct spdk_vhost_nvme_cq *cq; - struct spdk_vhost_nvme_task *task; - uint32_t qid, dbbuf_sq; - int ret; - int count = -1; - - if (spdk_unlikely(!nvme->num_sqs)) { - return SPDK_POLLER_IDLE; - } - - if (spdk_unlikely(!nvme->dataplane_started && !nvme->bar)) { - return SPDK_POLLER_IDLE; - } - - for (qid = 1; qid <= MAX_IO_QUEUES; qid++) { - - sq = vhost_nvme_get_sq_from_qid(nvme, qid); - if (!sq->valid) { - continue; - } - cq = vhost_nvme_get_cq_from_qid(nvme, sq->cqid); - if (spdk_unlikely(!cq)) { - return SPDK_POLLER_BUSY; - } - cq->guest_signaled_cq_head = vhost_nvme_get_queue_head(nvme, cq_offset(sq->cqid, 1)); - if (spdk_unlikely(!STAILQ_EMPTY(&cq->cq_full_waited_tasks) && - !nvme_cq_is_full(cq))) { - task = STAILQ_FIRST(&cq->cq_full_waited_tasks); - STAILQ_REMOVE_HEAD(&cq->cq_full_waited_tasks, stailq); - vhost_nvme_task_complete(task); - } - - dbbuf_sq = vhost_nvme_get_queue_head(nvme, sq_offset(qid, 1)); - sq->sq_tail = (uint16_t)dbbuf_sq; - count = 0; - - while (sq->sq_head != sq->sq_tail) { - if (spdk_unlikely(!sq->sq_cmd)) { - break; - } - if (spdk_likely(!STAILQ_EMPTY(&nvme->free_tasks))) { - task = STAILQ_FIRST(&nvme->free_tasks); - STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq); - } else { - return SPDK_POLLER_BUSY; - } - - task->cmd = sq->sq_cmd[sq->sq_head]; - nvme_inc_sq_head(sq); - - /* processing IO */ - ret = nvme_process_sq(nvme, sq, task); - if (spdk_unlikely(ret)) { - SPDK_ERRLOG("QID %u CID %u, SQ HEAD %u, DBBUF SQ TAIL %u\n", qid, task->cmd.cid, sq->sq_head, - sq->sq_tail); - } - - /* MMIO Control */ - if (nvme->dataplane_started) { - nvme->dbbuf_eis[sq_offset(qid, 1)] = (uint32_t)(sq->sq_head - 1); - } - - /* Maximum batch I/Os to pick up at once */ - if (count++ == MAX_BATCH_IO) { - break; - } - } - } - - /* Completion Queue */ - nvme_cq_signal_fd(nvme); - - return count; -} - -static int -vhost_nvme_doorbell_buffer_config(struct spdk_vhost_nvme_dev *nvme, - struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl) -{ - struct spdk_vhost_session *vsession = nvme->vsession; - uint64_t dbs_dma_addr, eis_dma_addr; - - dbs_dma_addr = cmd->dptr.prp.prp1; - eis_dma_addr = cmd->dptr.prp.prp2; - - if ((dbs_dma_addr % 4096) || (eis_dma_addr % 4096)) { - return -1; - } - /* Guest Physical Address to Host Virtual Address */ - nvme->dbbuf_dbs = vhost_gpa_to_vva(vsession, dbs_dma_addr, 4096); - nvme->dbbuf_eis = vhost_gpa_to_vva(vsession, eis_dma_addr, 4096); - if (!nvme->dbbuf_dbs || !nvme->dbbuf_eis) { - return -1; - } - /* zeroed the doorbell buffer memory */ - memset((void *)nvme->dbbuf_dbs, 0, 4096); - memset((void *)nvme->dbbuf_eis, 0, 4096); - - cpl->status.sc = 0; - cpl->status.sct = 0; - - /* Data plane started */ - nvme->dataplane_started = true; - - return 0; -} - -static int -vhost_nvme_create_io_sq(struct spdk_vhost_nvme_dev *nvme, - struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl) -{ - uint16_t qid, qsize, cqid; - uint64_t dma_addr; - uint64_t requested_len; - struct spdk_vhost_nvme_cq *cq; - struct spdk_vhost_nvme_sq *sq; - - /* physical contiguous */ - if (!(cmd->cdw11 & 0x1)) { - return -1; - } - - cqid = (cmd->cdw11 >> 16) & 0xffff; - qid = cmd->cdw10 & 0xffff; - qsize = (cmd->cdw10 >> 16) & 0xffff; - dma_addr = cmd->dptr.prp.prp1; - if (!dma_addr || dma_addr % 4096) { - return -1; - } - - sq = vhost_nvme_get_sq_from_qid(nvme, qid); - cq = vhost_nvme_get_cq_from_qid(nvme, cqid); - if (!sq || !cq) { - SPDK_DEBUGLOG(vhost_nvme, "User requested invalid QID %u or CQID %u\n", - qid, cqid); - cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; - cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; - return -1; - } - - sq->sqid = qid; - sq->cqid = cqid; - sq->size = qsize + 1; - sq->sq_head = sq->sq_tail = 0; - requested_len = sizeof(struct spdk_nvme_cmd) * sq->size; - sq->sq_cmd = vhost_gpa_to_vva(nvme->vsession, dma_addr, requested_len); - if (!sq->sq_cmd) { - return -1; - } - nvme->num_sqs++; - sq->valid = true; - if (nvme->bar) { - nvme->bar_db[sq_offset(qid, 1)] = 0; - } - - cpl->status.sc = 0; - cpl->status.sct = 0; - return 0; -} - -static int -vhost_nvme_delete_io_sq(struct spdk_vhost_nvme_dev *nvme, - struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl) -{ - uint16_t qid; - struct spdk_vhost_nvme_sq *sq; - - qid = cmd->cdw10 & 0xffff; - sq = vhost_nvme_get_sq_from_qid(nvme, qid); - if (!sq) { - return -1; - } - - /* We didn't see scenarios when deleting submission - * queue while I/O is running against the submisson - * queue for now, otherwise, we must ensure the poller - * will not run with this submission queue. - */ - nvme->num_sqs--; - sq->valid = false; - - memset(sq, 0, sizeof(*sq)); - sq->sq_cmd = NULL; - - cpl->status.sc = 0; - cpl->status.sct = 0; - - return 0; -} - -static int -vhost_nvme_create_io_cq(struct spdk_vhost_nvme_dev *nvme, - struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl) -{ - uint16_t qsize, qid; - uint64_t dma_addr; - struct spdk_vhost_nvme_cq *cq; - uint64_t requested_len; - - /* physical contiguous */ - if (!(cmd->cdw11 & 0x1)) { - return -1; - } - - qid = cmd->cdw10 & 0xffff; - qsize = (cmd->cdw10 >> 16) & 0xffff; - dma_addr = cmd->dptr.prp.prp1; - if (!dma_addr || dma_addr % 4096) { - return -1; - } - - cq = vhost_nvme_get_cq_from_qid(nvme, qid); - if (!cq) { - SPDK_DEBUGLOG(vhost_nvme, "User requested invalid QID %u\n", qid); - cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; - cpl->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER; - return -1; - } - cq->cqid = qid; - cq->size = qsize + 1; - cq->phase = 1; - cq->irq_enabled = (cmd->cdw11 >> 1) & 0x1; - /* Setup virq through vhost messages */ - cq->virq = -1; - cq->cq_head = 0; - cq->guest_signaled_cq_head = 0; - cq->need_signaled_cnt = 0; - requested_len = sizeof(struct spdk_nvme_cpl) * cq->size; - cq->cq_cqe = vhost_gpa_to_vva(nvme->vsession, dma_addr, requested_len); - if (!cq->cq_cqe) { - return -1; - } - nvme->num_cqs++; - cq->valid = true; - if (nvme->bar) { - nvme->bar_db[cq_offset(qid, 1)] = 0; - } - STAILQ_INIT(&cq->cq_full_waited_tasks); - - cpl->status.sc = 0; - cpl->status.sct = 0; - return 0; -} - -static int -vhost_nvme_delete_io_cq(struct spdk_vhost_nvme_dev *nvme, - struct spdk_nvme_cmd *cmd, struct spdk_nvme_cpl *cpl) -{ - uint16_t qid; - struct spdk_vhost_nvme_cq *cq; - - qid = cmd->cdw10 & 0xffff; - cq = vhost_nvme_get_cq_from_qid(nvme, qid); - if (!cq) { - return -1; - } - nvme->num_cqs--; - cq->valid = false; - - memset(cq, 0, sizeof(*cq)); - cq->cq_cqe = NULL; - - cpl->status.sc = 0; - cpl->status.sct = 0; - return 0; -} - -static struct spdk_vhost_nvme_dev * -vhost_nvme_get_by_name(int vid) -{ - struct spdk_vhost_nvme_dev *nvme; - struct spdk_vhost_dev *vdev; - struct spdk_vhost_session *vsession; - - TAILQ_FOREACH(nvme, &g_nvme_ctrlrs, tailq) { - vdev = &nvme->vdev; - TAILQ_FOREACH(vsession, &vdev->vsessions, tailq) { - if (vsession->vid == vid) { - return nvme; - } - } - } - - return NULL; -} - -int -vhost_nvme_get_cap(int vid, uint64_t *cap) -{ - struct spdk_vhost_nvme_dev *nvme; - - nvme = vhost_nvme_get_by_name(vid); - if (!nvme) { - return -1; - } - - *cap = nvme->cap.raw; - return 0; -} - -int -vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf) -{ - struct spdk_nvme_cmd *req = (struct spdk_nvme_cmd *)cmd; - struct spdk_nvme_cpl *cpl = (struct spdk_nvme_cpl *)cqe; - struct spdk_vhost_nvme_ns *ns; - int ret = 0; - struct spdk_vhost_nvme_dev *nvme; - - nvme = vhost_nvme_get_by_name(vid); - if (!nvme) { - return -1; - } - - SPDK_DEBUGLOG(vhost_nvme, "Admin Command Opcode %u\n", req->opc); - switch (req->opc) { - case SPDK_NVME_OPC_IDENTIFY: - if (req->cdw10 == SPDK_NVME_IDENTIFY_CTRLR) { - memcpy(buf, &nvme->cdata, sizeof(struct spdk_nvme_ctrlr_data)); - - } else if (req->cdw10 == SPDK_NVME_IDENTIFY_NS) { - ns = vhost_nvme_get_ns_from_nsid(nvme, req->nsid); - if (!ns) { - cpl->status.sc = SPDK_NVME_SC_NAMESPACE_ID_UNAVAILABLE; - cpl->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC; - break; - } - memcpy(buf, &ns->nsdata, sizeof(struct spdk_nvme_ns_data)); - } - /* successfully */ - cpl->status.sc = 0; - cpl->status.sct = 0; - break; - case SPDK_NVME_OPC_CREATE_IO_CQ: - ret = vhost_nvme_create_io_cq(nvme, req, cpl); - break; - case SPDK_NVME_OPC_DELETE_IO_CQ: - ret = vhost_nvme_delete_io_cq(nvme, req, cpl); - break; - case SPDK_NVME_OPC_CREATE_IO_SQ: - ret = vhost_nvme_create_io_sq(nvme, req, cpl); - break; - case SPDK_NVME_OPC_DELETE_IO_SQ: - ret = vhost_nvme_delete_io_sq(nvme, req, cpl); - break; - case SPDK_NVME_OPC_GET_FEATURES: - case SPDK_NVME_OPC_SET_FEATURES: - if (req->cdw10 == SPDK_NVME_FEAT_NUMBER_OF_QUEUES) { - cpl->status.sc = 0; - cpl->status.sct = 0; - cpl->cdw0 = (nvme->num_io_queues - 1) | ((nvme->num_io_queues - 1) << 16); - } else { - cpl->status.sc = SPDK_NVME_SC_INVALID_FIELD; - cpl->status.sct = SPDK_NVME_SCT_GENERIC; - } - break; - case SPDK_NVME_OPC_DOORBELL_BUFFER_CONFIG: - ret = vhost_nvme_doorbell_buffer_config(nvme, req, cpl); - break; - case SPDK_NVME_OPC_ABORT: - /* TODO: ABORT failed fow now */ - cpl->cdw0 = 1; - cpl->status.sc = 0; - cpl->status.sct = 0; - break; - } - - if (ret) { - SPDK_ERRLOG("Admin Passthrough Failed with %u\n", req->opc); - } - - return 0; -} - -int -vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size) -{ - struct spdk_vhost_nvme_dev *nvme; - - nvme = vhost_nvme_get_by_name(vid); - if (!nvme) { - return -1; - } - - nvme->bar = (volatile uint32_t *)(uintptr_t)(bar_addr); - /* BAR0 SQ/CQ doorbell registers start from offset 0x1000 */ - nvme->bar_db = (volatile uint32_t *)(uintptr_t)(bar_addr + 0x1000ull); - nvme->bar_size = bar_size; - - return 0; -} - -int -vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd) -{ - struct spdk_vhost_nvme_dev *nvme; - struct spdk_vhost_nvme_cq *cq; - - nvme = vhost_nvme_get_by_name(vid); - if (!nvme) { - return -1; - } - - cq = vhost_nvme_get_cq_from_qid(nvme, qid); - if (!cq) { - return -1; - } - if (cq->irq_enabled) { - cq->virq = fd; - } else { - SPDK_ERRLOG("NVMe Qid %d Disabled IRQ\n", qid); - } - - return 0; -} - -static void -free_task_pool(struct spdk_vhost_nvme_dev *nvme) -{ - struct spdk_vhost_nvme_task *task; - - while (!STAILQ_EMPTY(&nvme->free_tasks)) { - task = STAILQ_FIRST(&nvme->free_tasks); - STAILQ_REMOVE_HEAD(&nvme->free_tasks, stailq); - spdk_free(task); - } -} - -static int -alloc_task_pool(struct spdk_vhost_nvme_dev *nvme) -{ - uint32_t entries, i; - struct spdk_vhost_nvme_task *task; - - entries = nvme->num_io_queues * MAX_QUEUE_ENTRIES_SUPPORTED; - - for (i = 0; i < entries; i++) { - task = spdk_zmalloc(sizeof(struct spdk_vhost_nvme_task), - SPDK_CACHE_LINE_SIZE, NULL, - SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); - if (task == NULL) { - SPDK_ERRLOG("Controller %s alloc task pool failed\n", - nvme->vdev.name); - free_task_pool(nvme); - return -1; - } - STAILQ_INSERT_TAIL(&nvme->free_tasks, task, stailq); - } - - return 0; -} - -static int -vhost_nvme_start_cb(struct spdk_vhost_dev *vdev, - struct spdk_vhost_session *vsession, void *unused) -{ - struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev); - struct spdk_vhost_nvme_ns *ns_dev; - uint32_t i; - int rc = 0; - - if (nvme == NULL) { - rc = -1; - goto out; - } - - rc = alloc_task_pool(nvme); - if (rc) { - goto out; - } - - SPDK_NOTICELOG("Start Device %u, Path %s, lcore %d\n", vsession->vid, - vdev->path, spdk_env_get_current_core()); - - for (i = 0; i < nvme->num_ns; i++) { - ns_dev = &nvme->ns[i]; - ns_dev->bdev_io_channel = spdk_bdev_get_io_channel(ns_dev->bdev_desc); - if (!ns_dev->bdev_io_channel) { - rc = -1; - goto out; - } - } - - nvme->vsession = vsession; - /* Start the NVMe Poller */ - nvme->requestq_poller = SPDK_POLLER_REGISTER(nvme_worker, nvme, 0); - -out: - vhost_session_start_done(vsession, rc); - return rc; -} - -static int -vhost_nvme_start(struct spdk_vhost_session *vsession) -{ - if (vsession->vdev->active_session_num > 0) { - /* We're trying to start a second session */ - SPDK_ERRLOG("Vhost-NVMe devices can support only one simultaneous connection.\n"); - return -1; - } - - return vhost_session_send_event(vsession, vhost_nvme_start_cb, - 3, "start session"); -} - -static void -vhost_nvme_deactive_ns(struct spdk_vhost_nvme_ns *ns) -{ - ns->active_ns = 0; - spdk_bdev_close(ns->bdev_desc); - ns->bdev_desc = NULL; - ns->bdev = NULL; -} - -static void -bdev_remove_cb(void *remove_ctx) -{ - struct spdk_vhost_nvme_ns *ns = remove_ctx; - - SPDK_NOTICELOG("Removing NS %u, Block Device %s\n", - ns->nsid, spdk_bdev_get_name(ns->bdev)); - - vhost_nvme_deactive_ns(ns); -} - -static int -destroy_device_poller_cb(void *arg) -{ - struct spdk_vhost_nvme_dev *nvme = arg; - struct spdk_vhost_nvme_ns *ns_dev; - uint32_t i; - - SPDK_DEBUGLOG(vhost_nvme, "Destroy device poller callback\n"); - - /* FIXME wait for pending I/Os to complete */ - - if (spdk_vhost_trylock() != 0) { - return SPDK_POLLER_BUSY; - } - - for (i = 0; i < nvme->num_ns; i++) { - ns_dev = &nvme->ns[i]; - if (ns_dev->bdev_io_channel) { - spdk_put_io_channel(ns_dev->bdev_io_channel); - ns_dev->bdev_io_channel = NULL; - } - } - /* Clear BAR space */ - if (nvme->bar) { - memset((void *)nvme->bar, 0, nvme->bar_size); - } - nvme->num_sqs = 0; - nvme->num_cqs = 0; - nvme->dbbuf_dbs = NULL; - nvme->dbbuf_eis = NULL; - nvme->dataplane_started = false; - - spdk_poller_unregister(&nvme->stop_poller); - vhost_session_stop_done(nvme->vsession, 0); - - spdk_vhost_unlock(); - return SPDK_POLLER_BUSY; -} - -static int -vhost_nvme_stop_cb(struct spdk_vhost_dev *vdev, - struct spdk_vhost_session *vsession, void *unused) -{ - struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev); - - if (nvme == NULL) { - vhost_session_stop_done(vsession, -1); - return -1; - } - - free_task_pool(nvme); - SPDK_NOTICELOG("Stopping Device %u, Path %s\n", vsession->vid, vdev->path); - - spdk_poller_unregister(&nvme->requestq_poller); - nvme->stop_poller = SPDK_POLLER_REGISTER(destroy_device_poller_cb, nvme, 1000); - - return 0; -} - -static int -vhost_nvme_stop(struct spdk_vhost_session *vsession) -{ - return vhost_session_send_event(vsession, vhost_nvme_stop_cb, - 3, "start session"); -} - -static void -vhost_nvme_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) -{ - struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev); - struct spdk_vhost_nvme_ns *ns_dev; - uint32_t i; - - if (nvme == NULL) { - return; - } - - spdk_json_write_named_array_begin(w, "namespaces"); - - for (i = 0; i < nvme->num_ns; i++) { - ns_dev = &nvme->ns[i]; - if (!ns_dev->active_ns) { - continue; - } - - spdk_json_write_object_begin(w); - spdk_json_write_named_uint32(w, "nsid", ns_dev->nsid); - spdk_json_write_named_string(w, "bdev", spdk_bdev_get_name(ns_dev->bdev)); - spdk_json_write_object_end(w); - } - - spdk_json_write_array_end(w); -} - -static void -vhost_nvme_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w) -{ - struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev); - struct spdk_vhost_nvme_ns *ns_dev; - uint32_t i; - - if (nvme == NULL) { - return; - } - - spdk_json_write_object_begin(w); - spdk_json_write_named_string(w, "method", "vhost_create_nvme_controller"); - - spdk_json_write_named_object_begin(w, "params"); - spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name); - spdk_json_write_named_uint32(w, "io_queues", nvme->num_io_queues); - spdk_json_write_named_string(w, "cpumask", - spdk_cpuset_fmt(spdk_thread_get_cpumask(nvme->vdev.thread))); - spdk_json_write_object_end(w); - - spdk_json_write_object_end(w); - - for (i = 0; i < nvme->num_ns; i++) { - ns_dev = &nvme->ns[i]; - if (!ns_dev->active_ns) { - continue; - } - - spdk_json_write_object_begin(w); - spdk_json_write_named_string(w, "method", "vhost_nvme_controller_add_ns"); - - spdk_json_write_named_object_begin(w, "params"); - spdk_json_write_named_string(w, "ctrlr", nvme->vdev.name); - spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(ns_dev->bdev)); - spdk_json_write_object_end(w); - - spdk_json_write_object_end(w); - } -} - -static const struct spdk_vhost_dev_backend spdk_vhost_nvme_device_backend = { - .session_ctx_size = 0, - .start_session = vhost_nvme_start, - .stop_session = vhost_nvme_stop, - .dump_info_json = vhost_nvme_dump_info_json, - .write_config_json = vhost_nvme_write_config_json, - .remove_device = vhost_nvme_dev_remove, -}; - -static int -vhost_nvme_ns_identify_update(struct spdk_vhost_nvme_dev *dev) -{ - struct spdk_nvme_ctrlr_data *cdata = &dev->cdata; - struct spdk_nvme_ns_data *nsdata; - uint64_t num_blocks; - uint32_t i; - - /* Identify Namespace */ - cdata->nn = dev->num_ns; - for (i = 0; i < dev->num_ns; i++) { - nsdata = &dev->ns[i].nsdata; - if (dev->ns[i].active_ns) { - num_blocks = spdk_bdev_get_num_blocks(dev->ns[i].bdev); - nsdata->nsze = num_blocks; - /* ncap must be non-zero for active Namespace */ - nsdata->ncap = num_blocks; - nsdata->nuse = num_blocks; - nsdata->nlbaf = 0; - nsdata->flbas.format = 0; - nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_block_size(dev->ns[i].bdev)); - nsdata->noiob = spdk_bdev_get_optimal_io_boundary(dev->ns[i].bdev); - dev->ns[i].block_size = spdk_bdev_get_block_size(dev->ns[i].bdev); - dev->ns[i].capacity = num_blocks * dev->ns[i].block_size; - } else { - memset(nsdata, 0, sizeof(*nsdata)); - } - } - return 0; -} - -static int -vhost_nvme_ctrlr_identify_update(struct spdk_vhost_nvme_dev *dev) -{ - struct spdk_nvme_ctrlr_data *cdata = &dev->cdata; - char sn[20]; - - /* Controller Capabilities */ - dev->cap.bits.cqr = 1; - dev->cap.bits.to = 1; - dev->cap.bits.dstrd = 0; - dev->cap.bits.css = SPDK_NVME_CAP_CSS_NVM; - dev->cap.bits.mpsmin = 0; - dev->cap.bits.mpsmax = 0; - /* MQES is 0 based value */ - dev->cap.bits.mqes = MAX_QUEUE_ENTRIES_SUPPORTED - 1; - - /* Controller Configuration */ - dev->cc.bits.en = 0; - - /* Controller Status */ - dev->csts.bits.rdy = 0; - - /* Identify Controller */ - spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' '); - cdata->vid = 0x8086; - cdata->ssvid = 0x8086; - spdk_strcpy_pad(cdata->mn, "SPDK Virtual NVMe Controller", sizeof(cdata->mn), ' '); - snprintf(sn, sizeof(sn), "NVMe_%s", dev->vdev.name); - spdk_strcpy_pad(cdata->sn, sn, sizeof(cdata->sn), ' '); - cdata->ieee[0] = 0xe4; - cdata->ieee[1] = 0xd2; - cdata->ieee[2] = 0x5c; - cdata->ver.bits.mjr = 1; - cdata->ver.bits.mnr = 0; - cdata->mdts = 5; /* 128 KiB */ - cdata->rab = 6; - cdata->sqes.min = 6; - cdata->sqes.max = 6; - cdata->cqes.min = 4; - cdata->cqes.max = 4; - cdata->oncs.dsm = 1; - /* Emulated NVMe controller */ - cdata->oacs.doorbell_buffer_config = 1; - - vhost_nvme_ns_identify_update(dev); - - return 0; -} - -int -vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t num_io_queues) -{ - struct spdk_vhost_nvme_dev *dev; - int rc; - - if (posix_memalign((void **)&dev, SPDK_CACHE_LINE_SIZE, sizeof(*dev))) { - return -ENOMEM; - } - memset(dev, 0, sizeof(*dev)); - - if (num_io_queues < 1 || num_io_queues > MAX_IO_QUEUES) { - free(dev); - return -EINVAL; - } - - spdk_vhost_lock(); - rc = vhost_dev_register(&dev->vdev, name, cpumask, - &spdk_vhost_nvme_device_backend); - - if (rc) { - free(dev); - spdk_vhost_unlock(); - return rc; - } - - dev->num_io_queues = num_io_queues; - STAILQ_INIT(&dev->free_tasks); - TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, dev, tailq); - - vhost_nvme_ctrlr_identify_update(dev); - - SPDK_NOTICELOG("Controller %s: Constructed\n", name); - spdk_vhost_unlock(); - return rc; -} - -int -vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev) -{ - struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev); - struct spdk_vhost_nvme_ns *ns; - int rc; - uint32_t i; - - if (nvme == NULL) { - return -EINVAL; - } - - TAILQ_REMOVE(&g_nvme_ctrlrs, nvme, tailq); - for (i = 0; i < nvme->num_ns; i++) { - ns = &nvme->ns[i]; - if (ns->active_ns) { - vhost_nvme_deactive_ns(ns); - } - } - - rc = vhost_dev_unregister(vdev); - if (rc != 0) { - return rc; - } - - free(nvme); - return 0; -} - -int -vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev, const char *bdev_name) -{ - struct spdk_vhost_nvme_dev *nvme = to_nvme_dev(vdev); - struct spdk_vhost_nvme_ns *ns; - struct spdk_bdev *bdev; - int rc = -1; - - if (nvme == NULL) { - return -ENODEV; - } - - if (nvme->num_ns == MAX_NAMESPACE) { - SPDK_ERRLOG("Can't support %d Namespaces\n", nvme->num_ns); - return -ENOSPC; - } - - bdev = spdk_bdev_get_by_name(bdev_name); - if (!bdev) { - SPDK_ERRLOG("could not find bdev %s\n", bdev_name); - return -ENODEV; - } - - ns = &nvme->ns[nvme->num_ns]; - rc = spdk_bdev_open(bdev, true, bdev_remove_cb, ns, &nvme->ns[nvme->num_ns].bdev_desc); - if (rc != 0) { - SPDK_ERRLOG("Could not open bdev '%s', error=%d\n", - bdev_name, rc); - return rc; - } - - nvme->ns[nvme->num_ns].bdev = bdev; - nvme->ns[nvme->num_ns].active_ns = 1; - nvme->ns[nvme->num_ns].nsid = nvme->num_ns + 1; - nvme->num_ns++; - - vhost_nvme_ns_identify_update(nvme); - - return rc; -} - -int -vhost_nvme_controller_construct(void) -{ - struct spdk_conf_section *sp; - const char *name; - const char *bdev_name; - const char *cpumask; - int rc, i = 0; - struct spdk_vhost_dev *vdev; - uint32_t ctrlr_num, io_queues; - - for (sp = spdk_conf_first_section(NULL); sp != NULL; sp = spdk_conf_next_section(sp)) { - if (!spdk_conf_section_match_prefix(sp, "VhostNvme")) { - continue; - } - - if (sscanf(spdk_conf_section_get_name(sp), "VhostNvme%u", &ctrlr_num) != 1) { - SPDK_ERRLOG("Section '%s' has non-numeric suffix.\n", - spdk_conf_section_get_name(sp)); - return -1; - } - - name = spdk_conf_section_get_val(sp, "Name"); - if (name == NULL) { - SPDK_ERRLOG("VhostNvme%u: missing Name\n", ctrlr_num); - return -1; - } - - cpumask = spdk_conf_section_get_val(sp, "Cpumask"); - rc = spdk_conf_section_get_intval(sp, "NumberOfQueues"); - if (rc > 0) { - io_queues = rc; - } else { - io_queues = 1; - } - - rc = vhost_nvme_dev_construct(name, cpumask, io_queues); - if (rc < 0) { - SPDK_ERRLOG("VhostNvme%u: Construct failed\n", ctrlr_num); - return -1; - } - - vdev = spdk_vhost_dev_find(name); - if (!vdev) { - return -1; - } - - for (i = 0; spdk_conf_section_get_nval(sp, "Namespace", i) != NULL; i++) { - bdev_name = spdk_conf_section_get_nmval(sp, "Namespace", i, 0); - if (!bdev_name) { - SPDK_ERRLOG("namespace configuration missing bdev name\n"); - break; - } - rc = vhost_nvme_dev_add_ns(vdev, bdev_name); - if (rc < 0) { - SPDK_WARNLOG("VhostNvme%u: Construct Namespace with %s failed\n", - ctrlr_num, bdev_name); - break; - } - } - } - - return 0; -} - -SPDK_LOG_REGISTER_COMPONENT(vhost_nvme) diff --git a/lib/vhost/vhost_rpc.c b/lib/vhost/vhost_rpc.c index 8dfef5f84b..5e8c4be6ac 100644 --- a/lib/vhost/vhost_rpc.c +++ b/lib/vhost/vhost_rpc.c @@ -526,127 +526,4 @@ SPDK_RPC_REGISTER("vhost_controller_set_coalescing", rpc_vhost_controller_set_co SPDK_RPC_RUNTIME) SPDK_RPC_REGISTER_ALIAS_DEPRECATED(vhost_controller_set_coalescing, set_vhost_controller_coalescing) -#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB - -struct rpc_vhost_nvme_ctrlr { - char *ctrlr; - uint32_t io_queues; - char *cpumask; -}; - -static const struct spdk_json_object_decoder rpc_construct_vhost_nvme_ctrlr[] = { - {"ctrlr", offsetof(struct rpc_vhost_nvme_ctrlr, ctrlr), spdk_json_decode_string }, - {"io_queues", offsetof(struct rpc_vhost_nvme_ctrlr, io_queues), spdk_json_decode_uint32}, - {"cpumask", offsetof(struct rpc_vhost_nvme_ctrlr, cpumask), spdk_json_decode_string, true}, -}; - -static void -free_rpc_vhost_nvme_ctrlr(struct rpc_vhost_nvme_ctrlr *req) -{ - free(req->ctrlr); - free(req->cpumask); -} - -static void -rpc_vhost_create_nvme_controller(struct spdk_jsonrpc_request *request, - const struct spdk_json_val *params) -{ - struct rpc_vhost_nvme_ctrlr req = {}; - struct spdk_json_write_ctx *w; - int rc; - - if (spdk_json_decode_object(params, rpc_construct_vhost_nvme_ctrlr, - SPDK_COUNTOF(rpc_construct_vhost_nvme_ctrlr), - &req)) { - rc = -EINVAL; - goto invalid; - } - - rc = vhost_nvme_dev_construct(req.ctrlr, req.cpumask, req.io_queues); - if (rc < 0) { - goto invalid; - } - - free_rpc_vhost_nvme_ctrlr(&req); - - w = spdk_jsonrpc_begin_result(request); - spdk_json_write_bool(w, true); - spdk_jsonrpc_end_result(request, w); - return; - -invalid: - free_rpc_vhost_nvme_ctrlr(&req); - spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, - spdk_strerror(-rc)); - -} -SPDK_RPC_REGISTER("vhost_create_nvme_controller", rpc_vhost_create_nvme_controller, - SPDK_RPC_RUNTIME) -SPDK_RPC_REGISTER_ALIAS_DEPRECATED(vhost_create_nvme_controller, construct_vhost_nvme_controller) - -struct rpc_vhost_nvme_ctrlr_add_ns { - char *ctrlr; - char *bdev_name; -}; - -static void -free_rpc_vhost_nvme_ctrlr_add_ns(struct rpc_vhost_nvme_ctrlr_add_ns *req) -{ - free(req->ctrlr); - free(req->bdev_name); -} - -static const struct spdk_json_object_decoder rpc_vhost_nvme_add_ns[] = { - {"ctrlr", offsetof(struct rpc_vhost_nvme_ctrlr_add_ns, ctrlr), spdk_json_decode_string }, - {"bdev_name", offsetof(struct rpc_vhost_nvme_ctrlr_add_ns, bdev_name), spdk_json_decode_string }, -}; - -static void -rpc_vhost_nvme_controller_add_ns(struct spdk_jsonrpc_request *request, - const struct spdk_json_val *params) -{ - struct rpc_vhost_nvme_ctrlr_add_ns req = {0}; - struct spdk_json_write_ctx *w; - struct spdk_vhost_dev *vdev; - int rc; - - if (spdk_json_decode_object(params, rpc_vhost_nvme_add_ns, - SPDK_COUNTOF(rpc_vhost_nvme_add_ns), - &req)) { - SPDK_DEBUGLOG(vhost_rpc, "spdk_json_decode_object failed\n"); - rc = -EINVAL; - goto invalid; - } - - spdk_vhost_lock(); - vdev = spdk_vhost_dev_find(req.ctrlr); - if (vdev == NULL) { - spdk_vhost_unlock(); - rc = -ENODEV; - goto invalid; - } - - rc = vhost_nvme_dev_add_ns(vdev, req.bdev_name); - spdk_vhost_unlock(); - if (rc < 0) { - goto invalid; - } - free_rpc_vhost_nvme_ctrlr_add_ns(&req); - - w = spdk_jsonrpc_begin_result(request); - spdk_json_write_bool(w, true); - spdk_jsonrpc_end_result(request, w); - return; - -invalid: - free_rpc_vhost_nvme_ctrlr_add_ns(&req); - spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, - spdk_strerror(-rc)); -} -SPDK_RPC_REGISTER("vhost_nvme_controller_add_ns", rpc_vhost_nvme_controller_add_ns, - SPDK_RPC_RUNTIME) -SPDK_RPC_REGISTER_ALIAS_DEPRECATED(vhost_nvme_controller_add_ns, add_vhost_nvme_ns) - -#endif /* SPDK_CONFIG_VHOST_INTERNAL_LIB */ - SPDK_LOG_REGISTER_COMPONENT(vhost_rpc) diff --git a/lib/virtio/virtio_user.c b/lib/virtio/virtio_user.c index e45003352a..10e3cb8116 100644 --- a/lib/virtio/virtio_user.c +++ b/lib/virtio/virtio_user.c @@ -160,20 +160,6 @@ virtio_user_map_notify(void *cb_ctx, struct spdk_mem_map *map, return ret; } -#ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB - /* Our internal rte_vhost lib requires SET_VRING_ADDR to flush a pending - * SET_MEM_TABLE. On the other hand, the upstream rte_vhost will invalidate - * the entire queue upon receiving SET_VRING_ADDR message, so we mustn't - * send it here. Both behaviors are strictly implementation specific, but - * this message isn't needed from the point of the spec, so send it only - * if vhost is compiled with our internal lib. - */ - ret = virtio_user_queue_setup(vdev, virtio_user_set_vring_addr); - if (ret < 0) { - return ret; - } -#endif - /* Since we might want to use that mapping straight away, we have to * make sure the guest has already processed our SET_MEM_TABLE message. * F_REPLY_ACK is just a feature and the host is not obliged to diff --git a/mk/spdk.lib_deps.mk b/mk/spdk.lib_deps.mk index 9b9aca069b..4184577b1f 100644 --- a/mk/spdk.lib_deps.mk +++ b/mk/spdk.lib_deps.mk @@ -87,9 +87,6 @@ DEPDIRS-scsi := log util thread $(JSON_LIBS) trace bdev DEPDIRS-iscsi := log sock util conf thread $(JSON_LIBS) trace scsi DEPDIRS-vhost = log util conf thread $(JSON_LIBS) bdev scsi -ifeq ($(CONFIG_VHOST_INTERNAL_LIB),y) -DEPDIRS-vhost += rte_vhost -endif # ------------------------------------------------------------------------ # Start module/ directory - This section extends the organizational pattern from diff --git a/mk/spdk.nvmecli.mk b/mk/spdk.nvmecli.mk index eb04a71a39..dbde972bda 100644 --- a/mk/spdk.nvmecli.mk +++ b/mk/spdk.nvmecli.mk @@ -62,11 +62,7 @@ NVMECLI_SPDK_LIBS += -lspdk_ocfenv endif ifeq ($(CONFIG_VHOST),y) -ifneq ($(CONFIG_VHOST_INTERNAL_LIB),y) DPDK_LIB_LIST += -lrte_vhost -lrte_net -lrte_cryptodev -lrte_hash -else -NVMECLI_SPDK_LIBS += -lrte_vhost -endif endif override CFLAGS += -I$(SPDK_ROOT_DIR)/include diff --git a/scripts/config_converter.py b/scripts/config_converter.py index 4fa65a9d7b..de43062e7f 100755 --- a/scripts/config_converter.py +++ b/scripts/config_converter.py @@ -20,7 +20,6 @@ bdev_dict["bdev_virtio_attach_controller"] = [] vhost_dict = OrderedDict() vhost_dict["vhost_create_scsi_controller"] = [] vhost_dict["vhost_create_blk_controller"] = [] -vhost_dict["vhost_create_nvme_controller"] = [] iscsi_dict = OrderedDict() iscsi_dict["iscsi_set_options"] = [] @@ -438,34 +437,6 @@ def get_vhost_blk_json(config, section): "params": to_json_params(params)}] -def get_vhost_nvme_json(config, section): - params = [ - ["Name", "ctrlr", str, ""], - ["NumberOfQueues", "io_queues", int, -1], - ["Cpumask", "cpumask", "hex", 0x1], - ["Namespace", "bdev_name", list, []] - ] - for option in config.options(section): - values = config.get(section, option).split("\n") - for value in values: - set_param(params, option, value) - vhost_nvme_json = [] - vhost_nvme_json.append({ - "params": to_json_params(params[:3]), - "method": "vhost_create_nvme_controller" - }) - for namespace in params[3][3]: - vhost_nvme_json.append({ - "params": { - "ctrlr": params[0][3], - "bdev_name": namespace, - }, - "method": "vhost_nvme_controller_add_ns" - }) - - return vhost_nvme_json - - def get_virtio_user_json(config, section): params = [ ["Path", "traddr", str, ""], @@ -665,7 +636,7 @@ if __name__ == "__main__": match = re.match(r'(Bdev|Nvme|Malloc|VirtioUser\d+|Split|Pmem|AIO|' r'iSCSI|PortalGroup\d+|InitiatorGroup\d+|' r'TargetNode\d+|Nvmf|Subsystem\d+|VhostScsi\d+|' - r'VhostBlk\d+|VhostNvme\d+)', section) + r'VhostBlk\d+)', section) if match: match_section = ''.join(letter for letter in match.group(0) if not letter.isdigit()) @@ -689,8 +660,6 @@ if __name__ == "__main__": items = get_vhost_scsi_json(config, section) elif match_section == "VhostBlk": items = get_vhost_blk_json(config, section) - elif match_section == "VhostNvme": - items = get_vhost_nvme_json(config, section) elif match_section == "VirtioUser": items = get_virtio_user_json(config, section) elif match_section == "iSCSI": @@ -704,8 +673,6 @@ if __name__ == "__main__": for item in items: if match_section == "VhostScsi": section_to_subsystem[match_section]["vhost_create_scsi_controller"].append(item) - elif match_section == "VhostNvme": - section_to_subsystem[match_section]["vhost_create_nvme_controller"].append(item) elif match_section == "Subsystem": section_to_subsystem[match_section]["subsystems"].append(item) else: diff --git a/scripts/rpc.py b/scripts/rpc.py index 5900246067..5b5d5b2e6b 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -2183,30 +2183,6 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse p.add_argument("-p", "--packed_ring", action='store_true', help='Set controller as packed ring supported') p.set_defaults(func=vhost_create_blk_controller) - def vhost_create_nvme_controller(args): - rpc.vhost.vhost_create_nvme_controller(args.client, - ctrlr=args.ctrlr, - io_queues=args.io_queues, - cpumask=args.cpumask) - - p = subparsers.add_parser('vhost_create_nvme_controller', aliases=['vhost_create_nvme_controller'], - help='Add new vhost controller') - p.add_argument('ctrlr', help='controller name') - p.add_argument('io_queues', help='number of IO queues for the controller', type=int) - p.add_argument('--cpumask', help='cpu mask for this controller') - p.set_defaults(func=vhost_create_nvme_controller) - - def vhost_nvme_controller_add_ns(args): - rpc.vhost.vhost_nvme_controller_add_ns(args.client, - ctrlr=args.ctrlr, - bdev_name=args.bdev_name) - - p = subparsers.add_parser('vhost_nvme_controller_add_ns', aliases=['add_vhost_nvme_ns'], - help='Add a Namespace to vhost controller') - p.add_argument('ctrlr', help='conntroller name where add a Namespace') - p.add_argument('bdev_name', help='block device name for a new Namespace') - p.set_defaults(func=vhost_nvme_controller_add_ns) - def vhost_get_controllers(args): print_dict(rpc.vhost.vhost_get_controllers(args.client, args.name)) diff --git a/scripts/rpc/vhost.py b/scripts/rpc/vhost.py index b2e0a846cb..f34a6f188f 100644 --- a/scripts/rpc/vhost.py +++ b/scripts/rpc/vhost.py @@ -62,40 +62,6 @@ def vhost_scsi_controller_remove_target(client, ctrlr, scsi_target_num): return client.call('vhost_scsi_controller_remove_target', params) -@deprecated_alias('construct_vhost_nvme_controller') -def vhost_create_nvme_controller(client, ctrlr, io_queues, cpumask=None): - """Construct vhost NVMe controller. - Args: - ctrlr: controller name - io_queues: number of IO queues for the controller - cpumask: cpu mask for this controller - """ - params = { - 'ctrlr': ctrlr, - 'io_queues': io_queues - } - - if cpumask: - params['cpumask'] = cpumask - - return client.call('vhost_create_nvme_controller', params) - - -@deprecated_alias('add_vhost_nvme_ns') -def vhost_nvme_controller_add_ns(client, ctrlr, bdev_name): - """Add namespace to vhost nvme controller. - Args: - ctrlr: controller name where to add a namespace - bdev_name: block device name for a new namespace - """ - params = { - 'ctrlr': ctrlr, - 'bdev_name': bdev_name, - } - - return client.call('vhost_nvme_controller_add_ns', params) - - @deprecated_alias('construct_vhost_blk_controller') def vhost_create_blk_controller(client, ctrlr, dev_name, cpumask=None, readonly=None, packed_ring=None): """Create vhost BLK controller. diff --git a/test/common/skipped_build_files.txt b/test/common/skipped_build_files.txt index ea794ec824..13443f2443 100644 --- a/test/common/skipped_build_files.txt +++ b/test/common/skipped_build_files.txt @@ -16,14 +16,6 @@ lib/util/base64_neon # Not configured for mlx5 dv testing lib/rdma/rdma_mlx5_dv -# Files related to testing our internal vhost implementation. -lib/rte_vhost/fd_man -lib/rte_vhost/socket -lib/rte_vhost/vhost -lib/rte_vhost/vhost_user -lib/vhost/vhost_nvme -lib/virtio/vhost_user - # These files all represent c files that are only compiled by direct inclusion in other files. test/common/lib/test_env test/common/lib/test_sock diff --git a/test/config_converter/config.ini b/test/config_converter/config.ini index bbfc17373d..5601414df9 100644 --- a/test/config_converter/config.ini +++ b/test/config_converter/config.ini @@ -85,13 +85,6 @@ ReadOnly no Cpumask 0x1 -[VhostNvme0] - Name naa.vhost.3 - NumberOfQueues 2 - Namespace Nvme0n1p0 - Namespace Nvme0n1p1 - Cpumask 0x1 - [Subsystem1] NQN nqn.2016-06.io.spdk:cnode1 Listen RDMA 10.0.2.15:4420 diff --git a/test/config_converter/spdk_config.json b/test/config_converter/spdk_config.json index af8dcfbcc3..297933dc6f 100644 --- a/test/config_converter/spdk_config.json +++ b/test/config_converter/spdk_config.json @@ -376,28 +376,6 @@ "cpumask": "1" }, "method": "vhost_create_blk_controller" - }, - { - "params": { - "cpumask": "1", - "io_queues": 2, - "ctrlr": "naa.vhost.3" - }, - "method": "vhost_create_nvme_controller" - }, - { - "params": { - "bdev_name": "Nvme0n1p0", - "ctrlr": "naa.vhost.3" - }, - "method": "vhost_nvme_controller_add_ns" - }, - { - "params": { - "bdev_name": "Nvme0n1p1", - "ctrlr": "naa.vhost.3" - }, - "method": "vhost_nvme_controller_add_ns" } ] }, diff --git a/test/json_config/json_config.sh b/test/json_config/json_config.sh index a5a714ccc2..687f5d8c3b 100755 --- a/test/json_config/json_config.sh +++ b/test/json_config/json_config.sh @@ -285,10 +285,6 @@ function create_vhost_subsystem_config() { tgt_rpc vhost_create_blk_controller VhostBlkCtrlr0 MallocForVhost0p5 - # FIXME: enable after vhost-nvme is properly implemented against the latest rte_vhost (DPDK 19.05+) - # tgt_rpc vhost_create_nvme_controller VhostNvmeCtrlr0 16 - # tgt_rpc vhost_nvme_controller_add_ns VhostNvmeCtrlr0 MallocForVhost0p6 - timing_exit "${FUNCNAME[0]}" } diff --git a/test/make/check_so_deps.sh b/test/make/check_so_deps.sh index 76a077b505..549e359adc 100755 --- a/test/make/check_so_deps.sh +++ b/test/make/check_so_deps.sh @@ -252,9 +252,6 @@ echo "---------------------------------------------------------------------" SPDK_LIBS=("$libdir/"libspdk_!(env_dpdk).so) declare -A IGNORED_LIBS=() -if grep -q 'CONFIG_VHOST_INTERNAL_LIB?=n' $rootdir/mk/config.mk; then - IGNORED_LIBS["rte_vhost"]=1 -fi if grep -q 'CONFIG_RDMA?=n' $rootdir/mk/config.mk; then IGNORED_LIBS["rdma"]=1 fi diff --git a/test/unit/lib/vhost/vhost.c/Makefile b/test/unit/lib/vhost/vhost.c/Makefile index 23438ec4d9..b7c1a162ce 100644 --- a/test/unit/lib/vhost/vhost.c/Makefile +++ b/test/unit/lib/vhost/vhost.c/Makefile @@ -34,10 +34,6 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../..) include $(SPDK_ROOT_DIR)/mk/config.mk -ifeq ($(CONFIG_VHOST_INTERNAL_LIB),y) -CFLAGS += -I$(SPDK_ROOT_DIR)/lib/rte_vhost -endif - CFLAGS += $(ENV_CFLAGS) TEST_FILE = vhost_ut.c diff --git a/test/unit/lib/vhost/vhost.c/vhost_ut.c b/test/unit/lib/vhost/vhost.c/vhost_ut.c index a62c7666f4..b7e746a8b3 100644 --- a/test/unit/lib/vhost/vhost.c/vhost_ut.c +++ b/test/unit/lib/vhost/vhost.c/vhost_ut.c @@ -70,10 +70,6 @@ DEFINE_STUB(rte_vhost_driver_callback_register, int, DEFINE_STUB(rte_vhost_driver_disable_features, int, (const char *path, uint64_t features), 0); DEFINE_STUB(rte_vhost_driver_set_features, int, (const char *path, uint64_t features), 0); DEFINE_STUB(rte_vhost_driver_register, int, (const char *path, uint64_t flags), 0); -DEFINE_STUB(vhost_nvme_admin_passthrough, int, (int vid, void *cmd, void *cqe, void *buf), 0); -DEFINE_STUB(vhost_nvme_set_cq_call, int, (int vid, uint16_t qid, int fd), 0); -DEFINE_STUB(vhost_nvme_set_bar_mr, int, (int vid, void *bar, uint64_t bar_size), 0); -DEFINE_STUB(vhost_nvme_get_cap, int, (int vid, uint64_t *cap), 0); void * spdk_call_unaffinitized(void *cb(void *arg), void *arg)