From 805d994281e892a7c2fc40e332b0fa4793649e2f Mon Sep 17 00:00:00 2001 From: Tomasz Kulasek Date: Thu, 24 Oct 2019 20:04:33 +0200 Subject: [PATCH] lib/nvme: add NVMe character device NVMe character device implementation. This patch adds implementation of IO producer using CUSE library. It allows to create nvme device nodes in linux kernel for controller as well as for namespace and process ioctl requests as usual from linux environment. Both devices (controller and namespaces) are exposed as character devices. To compile NVMe CUSE module use "./configure --with-nvme-cuse". Names for created CUSE devices can be retrieved using spdk_nvme_cuse_get_ctrlr_name() and spdk_nvme_cuse_get_ns_name(). Signed-off-by: Jim Harris Signed-off-by: Tomasz Kulasek Signed-off-by: Tomasz Zawadzki Signed-off-by: Maciej Szwed Change-Id: I0fc9a9a1ef3c9c2b3112d07c2b4b1f8d49665ee1 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466917 Tested-by: SPDK CI Jenkins --- CONFIG | 3 + configure | 17 +++ include/spdk/nvme.h | 19 +++ lib/nvme/Makefile | 7 + lib/nvme/nvme_cuse.c | 349 +++++++++++++++++++++++++++++++++++++++++++ lib/nvme/nvme_cuse.h | 42 ++++++ mk/spdk.common.mk | 4 + scripts/pkgdep.sh | 9 ++ 8 files changed, 450 insertions(+) create mode 100644 lib/nvme/nvme_cuse.c create mode 100644 lib/nvme/nvme_cuse.h diff --git a/CONFIG b/CONFIG index 1d6e5c9095..cc450e8e70 100644 --- a/CONFIG +++ b/CONFIG @@ -94,6 +94,9 @@ CONFIG_FIO_SOURCE_DIR=/usr/src/fio CONFIG_RDMA=n CONFIG_RDMA_SEND_WITH_INVAL=n +# Enable NVMe Character Devices. +CONFIG_NVME_CUSE=n + # Enable FC support for the NVMf target. # Requires FC low level driver (from FC vendor) CONFIG_FC=n diff --git a/configure b/configure index e4e6dae104..fd5dd75989 100755 --- a/configure +++ b/configure @@ -94,6 +94,8 @@ function usage() echo " be searched." echo " fuse Build FUSE components for mounting a blobfs filesystem." echo " No path required." + echo " nvme-cuse Build NVMe driver with support for CUSE-based character devices." + echo " No path required." echo "" echo "Environment variables:" echo "" @@ -363,6 +365,12 @@ for i in "$@"; do --without-fuse) CONFIG[FUSE]=n ;; + --with-nvme-cuse) + CONFIG[NVME_CUSE]=y + ;; + --without-nvme-cuse) + CONFIG[NVME_CUSE]=n + ;; --) break ;; @@ -611,6 +619,15 @@ if [[ "${CONFIG[VPP]}" = "y" ]]; then fi fi +if [[ "${CONFIG[NVME_CUSE]}" = "y" ]]; then + if ! echo -e '#define FUSE_USE_VERSION 31\n#include \n#include \n#include \nint main(void) { return 0; }\n' \ + | ${BUILD_CMD[@]} -lfuse3 -D_FILE_OFFSET_BITS=64 - 2>/dev/null; then + echo --with-cuse requires libfuse3. + echo Please install then re-run this script. + exit 1 + fi +fi + if [[ "${CONFIG[RBD]}" = "y" ]]; then if ! echo -e '#include \n#include \n' \ 'int main(void) { return 0; }\n' \ diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index 0fcd8caa82..4110bb6a34 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -2584,6 +2584,25 @@ struct spdk_nvme_rdma_hooks { */ void spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks); +/** + * Get name of cuse device associated with NVMe controller. + * + * \param ctrlr Opaque handle to NVMe controller. + * + * \return Pointer to the name of device. + */ +char *spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr); + +/** + * Get name of cuse device associated with NVMe namespace. + * + * \param ctrlr Opaque handle to NVMe controller. + * \param nsid Namespace id. + * + * \return Pointer to the name of device. + */ +char *spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); + #ifdef __cplusplus } #endif diff --git a/lib/nvme/Makefile b/lib/nvme/Makefile index 155f6ad184..5793f4015b 100644 --- a/lib/nvme/Makefile +++ b/lib/nvme/Makefile @@ -37,6 +37,8 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c nvme_uevent.c nvme_ctrlr_ocssd_cmd.c \ nvme_ns_ocssd_cmd.c nvme_tcp.c nvme_opal.c nvme_io_msg.c C_SRCS-$(CONFIG_RDMA) += nvme_rdma.c +C_SRCS-$(CONFIG_NVME_CUSE) += nvme_cuse.c + LIBNAME = nvme LOCAL_SYS_LIBS = -luuid ifeq ($(CONFIG_RDMA),y) @@ -58,4 +60,9 @@ endif endif endif +ifeq ($(CONFIG_NVME_CUSE),y) +# fuse requires to set _FILE_OFFSET_BITS to 64 bits even for 64 bit machines +CFLAGS += -D_FILE_OFFSET_BITS=64 +endif + include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/nvme/nvme_cuse.c b/lib/nvme/nvme_cuse.c new file mode 100644 index 0000000000..588b3d4d18 --- /dev/null +++ b/lib/nvme/nvme_cuse.c @@ -0,0 +1,349 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define FUSE_USE_VERSION 31 + +#include + +#include +#include + +#include "nvme_internal.h" +#include "nvme_io_msg.h" +#include "nvme_cuse.h" + +struct cuse_device { + char dev_name[128]; + + struct spdk_nvme_ctrlr *ctrlr; /**< NVMe controller */ + uint32_t nsid; /**< NVMe name space id, or 0 */ + + uint32_t idx; + pthread_t tid; + struct fuse_session *session; + + struct cuse_device *ctrlr_device; + TAILQ_HEAD(, cuse_device) ns_devices; + + TAILQ_ENTRY(cuse_device) tailq; +}; + +static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head); +static int g_controllers_found = 0; + +static void +cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg, + struct fuse_file_info *fi, unsigned flags, + const void *in_buf, size_t in_bufsz, size_t out_bufsz) +{ + if (flags & FUSE_IOCTL_COMPAT) { + fuse_reply_err(req, ENOSYS); + return; + } + + SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); + fuse_reply_err(req, EINVAL); +} + +static void +cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg, + struct fuse_file_info *fi, unsigned flags, + const void *in_buf, size_t in_bufsz, size_t out_bufsz) +{ + if (flags & FUSE_IOCTL_COMPAT) { + fuse_reply_err(req, ENOSYS); + return; + } + + SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd); + fuse_reply_err(req, EINVAL); +} + +/***************************************************************************** + * CUSE threads initialization. + */ + +static void cuse_open(fuse_req_t req, struct fuse_file_info *fi) +{ + fuse_reply_open(req, fi); +} + +static const struct cuse_lowlevel_ops cuse_ctrlr_clop = { + .open = cuse_open, + .ioctl = cuse_ctrlr_ioctl, +}; + +static const struct cuse_lowlevel_ops cuse_ns_clop = { + .open = cuse_open, + .ioctl = cuse_ns_ioctl, +}; + +static void * +cuse_thread(void *arg) +{ + struct cuse_device *cuse_device = arg; + char *cuse_argv[] = { "cuse", "-f" }; + int cuse_argc = SPDK_COUNTOF(cuse_argv); + char devname_arg[128 + 8]; + const char *dev_info_argv[] = { devname_arg }; + struct cuse_info ci; + int multithreaded; + + spdk_unaffinitize_thread(); + + snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name); + + memset(&ci, 0, sizeof(ci)); + ci.dev_info_argc = 1; + ci.dev_info_argv = dev_info_argv; + ci.flags = CUSE_UNRESTRICTED_IOCTL; + + if (cuse_device->nsid) { + cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop, + &multithreaded, cuse_device); + } else { + cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop, + &multithreaded, cuse_device); + } + if (!cuse_device->session) { + SPDK_ERRLOG("Cannot create cuse session\n"); + goto end; + } + + SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name); + fuse_session_loop(cuse_device->session); + +end: + cuse_lowlevel_teardown(cuse_device->session); + pthread_exit(NULL); +} + +/***************************************************************************** + * CUSE devices management + */ + +static int +cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid) +{ + struct cuse_device *ns_device; + + ns_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device)); + if (!ns_device) { + SPDK_ERRLOG("Cannot allocate momeory for ns_device."); + return -ENOMEM; + } + + ns_device->ctrlr = ctrlr_device->ctrlr; + ns_device->ctrlr_device = ctrlr_device; + ns_device->idx = nsid; + ns_device->nsid = nsid; + snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "spdk/nvme%dn%d", + ctrlr_device->idx, ns_device->idx); + + if (pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device)) { + SPDK_ERRLOG("pthread_create failed\n"); + free(ns_device); + return -1; + } + + TAILQ_INSERT_TAIL(&ctrlr_device->ns_devices, ns_device, tailq); + return 0; +} + +static void +cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device) +{ + struct cuse_device *ns_device, *tmp; + + TAILQ_FOREACH_SAFE(ns_device, &ctrlr_device->ns_devices, tailq, tmp) { + fuse_session_exit(ns_device->session); + pthread_kill(ns_device->tid, SIGHUP); + pthread_join(ns_device->tid, NULL); + TAILQ_REMOVE(&ctrlr_device->ns_devices, ns_device, tailq); + free(ns_device); + } + + fuse_session_exit(ctrlr_device->session); + pthread_kill(ctrlr_device->tid, SIGHUP); + pthread_join(ctrlr_device->tid, NULL); + TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq); + free(ctrlr_device); +} + +static int +nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr) +{ + uint32_t i, nsid; + struct cuse_device *ctrlr_device; + + SPDK_NOTICELOG("Creating cuse device for controller\n"); + + ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device)); + if (!ctrlr_device) { + SPDK_ERRLOG("Cannot allocate memory for ctrlr_device."); + return -ENOMEM; + } + + TAILQ_INIT(&ctrlr_device->ns_devices); + ctrlr_device->ctrlr = ctrlr; + ctrlr_device->idx = g_controllers_found++; + snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), + "spdk/nvme%d", ctrlr_device->idx); + + if (pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device)) { + SPDK_ERRLOG("pthread_create failed\n"); + free(ctrlr_device); + return -1; + } + TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq); + + /* Start all active namespaces */ + for (i = 0; i < spdk_nvme_ctrlr_get_num_ns(ctrlr); i++) { + nsid = i + 1; + if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) { + continue; + } + + if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) { + SPDK_ERRLOG("Cannot start CUSE namespace device."); + cuse_nvme_ctrlr_stop(ctrlr_device); + return -1; + } + } + + return 0; +} + +static void +nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr) +{ + struct cuse_device *ctrlr_device; + + TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) { + if (ctrlr_device->ctrlr == ctrlr) { + break; + } + } + + if (!ctrlr_device) { + SPDK_ERRLOG("Cannot find associated CUSE device\n"); + return; + } + + cuse_nvme_ctrlr_stop(ctrlr_device); +} + +static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = { + .name = "cuse", +}; + +int +nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr) +{ + int rc; + + rc = nvme_io_msg_ctrlr_start(ctrlr, &cuse_nvme_io_msg_producer); + if (rc) { + return rc; + } + + rc = nvme_cuse_start(ctrlr); + if (rc) { + nvme_io_msg_ctrlr_stop(ctrlr, &cuse_nvme_io_msg_producer, false); + } + + return rc; +} + +void +nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr) +{ + nvme_cuse_stop(ctrlr); + + nvme_io_msg_ctrlr_stop(ctrlr, &cuse_nvme_io_msg_producer, false); +} + +char * +spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr) +{ + struct cuse_device *ctrlr_device; + + if (TAILQ_EMPTY(&g_ctrlr_ctx_head)) { + return NULL; + } + + TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) { + if (ctrlr_device->ctrlr == ctrlr) { + break; + } + } + + if (!ctrlr_device) { + return NULL; + } + + return ctrlr_device->dev_name; +} + +char * +spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid) +{ + struct cuse_device *ns_device; + struct cuse_device *ctrlr_device; + + if (TAILQ_EMPTY(&g_ctrlr_ctx_head)) { + return NULL; + } + + TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) { + if (ctrlr_device->ctrlr == ctrlr) { + break; + } + } + + if (!ctrlr_device) { + return NULL; + } + + TAILQ_FOREACH(ns_device, &ctrlr_device->ns_devices, tailq) { + if (ns_device->nsid == nsid) { + break; + } + } + + if (!ns_device) { + return NULL; + } + + return ns_device->dev_name; +} diff --git a/lib/nvme/nvme_cuse.h b/lib/nvme/nvme_cuse.h new file mode 100644 index 0000000000..174431a887 --- /dev/null +++ b/lib/nvme/nvme_cuse.h @@ -0,0 +1,42 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __NVME_CUSE_H__ +#define __NVME_CUSE_H__ + +#include "spdk/nvme.h" + +int nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr); +void nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr); + +#endif /* __NVME_CUSE_H__ */ diff --git a/mk/spdk.common.mk b/mk/spdk.common.mk index b9ccfaf496..e14c419037 100644 --- a/mk/spdk.common.mk +++ b/mk/spdk.common.mk @@ -225,6 +225,10 @@ ifeq ($(CONFIG_LOG_BACKTRACE),y) SYS_LIBS += -lunwind endif +ifeq ($(CONFIG_NVME_CUSE),y) +SYS_LIBS += -lfuse3 +endif + MAKEFLAGS += --no-print-directory C_SRCS += $(C_SRCS-y) diff --git a/scripts/pkgdep.sh b/scripts/pkgdep.sh index c0d8d3621a..6d2c995c26 100755 --- a/scripts/pkgdep.sh +++ b/scripts/pkgdep.sh @@ -73,6 +73,8 @@ if [ -s /etc/redhat-release ]; then fi # Additional dependencies for ISA-L used in compression yum install -y autoconf automake libtool help2man + # Additional dependencies for FUSE and CUSE + yum install -y fuse3-devel elif [ -f /etc/debian_version ]; then # Includes Ubuntu, Debian apt-get install -y gcc g++ make libcunit1-dev libaio-dev libssl-dev \ @@ -93,6 +95,8 @@ elif [ -f /etc/debian_version ]; then "Note: Some SPDK CLI dependencies could not be installed." # Additional dependencies for ISA-L used in compression apt-get install -y autoconf automake libtool help2man + # Additional dependencies for FUSE and CUSE + apt-get install -y libfuse3-dev # Additional dependecies for nvmf performance test script apt-get install -y python3-paramiko elif [ -f /etc/SuSE-release ] || [ -f /etc/SUSE-brand ]; then @@ -111,6 +115,8 @@ elif [ -f /etc/SuSE-release ] || [ -f /etc/SUSE-brand ]; then zypper install -y doxygen mscgen graphviz # Additional dependencies for ISA-L used in compression zypper install -y autoconf automake libtool help2man + # Additional dependencies for FUSE and CUSE + zypper install -y fuse3-devel elif [ $(uname -s) = "FreeBSD" ] ; then pkg install -y gmake cunit openssl git devel/astyle bash py27-pycodestyle \ python misc/e2fsprogs-libuuid sysutils/sg3_utils nasm @@ -140,6 +146,9 @@ elif [ -f /etc/arch-release ]; then # Additional dependencies for ISA-L used in compression pacman -Sy --needed --noconfirm autoconf automake libtool help2man + # Additional dependencies for FUSE and CUSE + pacman -Sy --needed --noconfirm fuse3 + #fakeroot needed to instal via makepkg pacman -Sy --needed --noconfirm fakeroot su - $SUDO_USER -c "pushd /tmp;