From 0f912a0eafd92b3bb56b92d7bb3ab87ea965b3b4 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Mon, 6 Jun 2016 14:44:30 -0700 Subject: [PATCH] nvmf: add NVMe over Fabrics userspace target Change-Id: I739916824d033bd1a8f8b7f5def09e58f23d13cb Signed-off-by: Daniel Verkamp --- CONFIG | 4 + app/Makefile | 1 + app/nvmf_tgt/.gitignore | 1 + app/nvmf_tgt/Makefile | 70 ++ app/nvmf_tgt/nvmf_tgt.c | 240 +++++++ etc/spdk/nvmf.conf.in | 134 ++++ include/spdk/nvmf_spec.h | 555 +++++++++++++++ lib/Makefile | 2 + lib/nvmf/Makefile | 43 ++ lib/nvmf/conf.c | 352 ++++++++++ lib/nvmf/conf.h | 40 ++ lib/nvmf/conn.c | 1318 ++++++++++++++++++++++++++++++++++++ lib/nvmf/conn.h | 124 ++++ lib/nvmf/controller.c | 315 +++++++++ lib/nvmf/controller.h | 57 ++ lib/nvmf/framework.c | 370 ++++++++++ lib/nvmf/init_grp.c | 299 ++++++++ lib/nvmf/init_grp.h | 64 ++ lib/nvmf/nvmf.c | 91 +++ lib/nvmf/nvmf.h | 123 ++++ lib/nvmf/nvmf_admin_cmd.c | 338 +++++++++ lib/nvmf/nvmf_internal.h | 131 ++++ lib/nvmf/nvmf_io_cmd.c | 161 +++++ lib/nvmf/port.c | 208 ++++++ lib/nvmf/port.h | 105 +++ lib/nvmf/rdma.c | 1132 +++++++++++++++++++++++++++++++ lib/nvmf/rdma.h | 81 +++ lib/nvmf/session.c | 517 ++++++++++++++ lib/nvmf/session.h | 151 +++++ lib/nvmf/subsystem_grp.c | 446 ++++++++++++ lib/nvmf/subsystem_grp.h | 102 +++ scripts/autotest_common.sh | 4 + 32 files changed, 7579 insertions(+) create mode 100644 app/nvmf_tgt/.gitignore create mode 100644 app/nvmf_tgt/Makefile create mode 100644 app/nvmf_tgt/nvmf_tgt.c create mode 100644 etc/spdk/nvmf.conf.in create mode 100644 include/spdk/nvmf_spec.h create mode 100644 lib/nvmf/Makefile create mode 100644 lib/nvmf/conf.c create mode 100644 lib/nvmf/conf.h create mode 100644 lib/nvmf/conn.c create mode 100644 lib/nvmf/conn.h create mode 100644 lib/nvmf/controller.c create mode 100644 lib/nvmf/controller.h create mode 100644 lib/nvmf/framework.c create mode 100644 lib/nvmf/init_grp.c create mode 100644 lib/nvmf/init_grp.h create mode 100644 lib/nvmf/nvmf.c create mode 100644 lib/nvmf/nvmf.h create mode 100644 lib/nvmf/nvmf_admin_cmd.c create mode 100644 lib/nvmf/nvmf_internal.h create mode 100644 lib/nvmf/nvmf_io_cmd.c create mode 100644 lib/nvmf/port.c create mode 100644 lib/nvmf/port.h create mode 100644 lib/nvmf/rdma.c create mode 100644 lib/nvmf/rdma.h create mode 100644 lib/nvmf/session.c create mode 100644 lib/nvmf/session.h create mode 100644 lib/nvmf/subsystem_grp.c create mode 100644 lib/nvmf/subsystem_grp.h diff --git a/CONFIG b/CONFIG index 5c54a0eb78..7071d95ea0 100644 --- a/CONFIG +++ b/CONFIG @@ -68,3 +68,7 @@ CONFIG_FIO_PLUGIN?=n # This directory should contain the source code directory for fio # which is required for building the SPDK FIO plugin. FIO_SOURCE_DIR?=/usr/src/fio + +# Build NVMf (NVMe over Fabrics) target. +# Requires ibverbs development libraries. +CONFIG_NVMF?=n diff --git a/app/Makefile b/app/Makefile index 9a2b2580e8..cd28377952 100644 --- a/app/Makefile +++ b/app/Makefile @@ -35,6 +35,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk DIRS-y += trace +DIRS-$(CONFIG_NVMF) += nvmf_tgt .PHONY: all clean $(DIRS-y) diff --git a/app/nvmf_tgt/.gitignore b/app/nvmf_tgt/.gitignore new file mode 100644 index 0000000000..e96d82befa --- /dev/null +++ b/app/nvmf_tgt/.gitignore @@ -0,0 +1 @@ +nvmf_tgt diff --git a/app/nvmf_tgt/Makefile b/app/nvmf_tgt/Makefile new file mode 100644 index 0000000000..7511e19652 --- /dev/null +++ b/app/nvmf_tgt/Makefile @@ -0,0 +1,70 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = nvmf_tgt + +CFLAGS += $(DPDK_INC) + +# Add NVMf library directory to include path +# TODO: remove this once NVMf has a public API header +CFLAGS += -I$(SPDK_ROOT_DIR)/lib + +C_SRCS := nvmf_tgt.c + +SPDK_LIBS = \ + $(SPDK_ROOT_DIR)/lib/nvmf/libspdk_nvmf.a \ + $(SPDK_ROOT_DIR)/lib/nvme/libspdk_nvme.a \ + $(SPDK_ROOT_DIR)/lib/event/libspdk_event.a \ + $(SPDK_ROOT_DIR)/lib/log/libspdk_log.a \ + $(SPDK_ROOT_DIR)/lib/trace/libspdk_trace.a \ + $(SPDK_ROOT_DIR)/lib/conf/libspdk_conf.a \ + $(SPDK_ROOT_DIR)/lib/util/libspdk_util.a \ + $(SPDK_ROOT_DIR)/lib/memory/libspdk_memory.a \ + +LIBS += $(SPDK_LIBS) $(PCIACCESS_LIB) + +LIBS += -libverbs -lrdmacm +LIBS += $(DPDK_LIB) -lpthread -lrt + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIBS) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/app/nvmf_tgt/nvmf_tgt.c b/app/nvmf_tgt/nvmf_tgt.c new file mode 100644 index 0000000000..b28104b231 --- /dev/null +++ b/app/nvmf_tgt/nvmf_tgt.c @@ -0,0 +1,240 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "spdk/event.h" + +#include "nvmf/conn.h" +#include "nvmf/rdma.h" +#include "nvmf/port.h" +#include "nvmf/init_grp.h" +#include "nvmf/nvmf.h" + +#include "spdk/log.h" +#include "spdk/nvme.h" + +struct rte_mempool *request_mempool; + +#define SPDK_NVMF_DEFAULT_CONFIG SPDK_NVMF_BUILD_ETC "/nvmf.conf" + +static void +spdk_nvmf_shutdown_cb(void) +{ + nvmf_acceptor_stop(); + spdk_shutdown_nvmf_conns(); + + fprintf(stdout, "\n=========================\n"); + fprintf(stdout, " NVMF shutdown signal\n"); + fprintf(stdout, "=========================\n"); +} + +static void +usage(void) +{ + printf("nvmf [options]\n"); + printf("options:\n"); + printf(" -c config - config file (default %s)\n", SPDK_NVMF_DEFAULT_CONFIG); + printf(" -e mask - tracepoint group mask for spdk trace buffers (default 0x0)\n"); + printf(" -m mask - core mask for DPDK\n"); + printf(" -i instance ID\n"); + printf(" -l facility - use specific syslog facility (default %s)\n", + SPDK_APP_DEFAULT_LOG_FACILITY); + printf(" -n channel number of memory channels used for DPDK\n"); + printf(" -p core master (primary) core for DPDK\n"); + printf(" -s size memory size in MB for DPDK\n"); + +#ifdef DEBUG + printf(" -t flag - trace flag options (all, rdma, nvmf, debug)\n"); +#else + printf(" -t flag - trace flag options (not supported - must rebuild with CONFIG_DEBUG=y)\n"); +#endif + printf(" -v - verbose (enable warnings)\n"); + printf(" -H - show this usage\n"); + printf(" -d - disable coredump file enabling\n"); +} + +static void +spdk_nvmf_startup(spdk_event_t event) +{ + int rc; + + /* start the rdma poller that will listen + on all available ports */ + rc = nvmf_acceptor_start(); + if (rc < 0) { + SPDK_ERRLOG("nvmf_acceptor_start() failed\n"); + goto initialize_error; + } + + if (getenv("MEMZONE_DUMP") != NULL) { + rte_memzone_dump(stdout); + fflush(stdout); + } + + return; + +initialize_error: + spdk_app_stop(rc); +} + +/*! \file + +This is the main file. + +*/ + +/*! + +\brief This is the main function for the NVMf target application. + +\msc + + c_runtime [label="C Runtime"], dpdk [label="DPDK"], nvmf [label="NVMf target"]; + c_runtime=>nvmf [label="main()"]; + nvmf=> [label="rte_eal_init()"]; + nvmf=>nvmf [label="spdk_app_init()"]; + nvmf=>nvmf [label="spdk_event_allocate()"]; + nvmf=>nvmf [label="spdk_app_start()"]; + nvmf=>nvmf [label="spdk_app_fini()"]; + nvmf=>nvmf [label="spdk_nvmf_check_pools()"]; + c_runtime< + +#include "spdk/assert.h" +#include "spdk/nvme_spec.h" + +/** + * \file + * + */ + +#pragma pack(push, 1) + +struct spdk_nvmf_capsule_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; + uint8_t reserved2[35]; + uint8_t fabric_specific[24]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_capsule_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_capsule_rsp { + uint8_t fabric_specific[8]; + uint16_t sqhd; + uint8_t reserved1[2]; + uint16_t cid; + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_capsule_rsp) == 16, "Incorrect size"); + +/* Fabric Command Set */ +#define SPDK_NVMF_FABRIC_OPCODE 0x7f + +enum spdk_nvmf_fabric_cmd_types { + SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET = 0x00, + SPDK_NVMF_FABRIC_COMMAND_CONNECT = 0x01, + SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET = 0x04, + SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_SEND = 0x05, + SPDK_NVMF_FABRIC_COMMAND_AUTHENTICATION_RECV = 0x06, + SPDK_NVMF_FABRIC_COMMAND_START_VENDOR_SPECIFIC = 0xC0, +}; + +enum spdk_nvmf_fabric_cmd_status_code { + SPDK_NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT = 0x80, + SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY = 0x81, + SPDK_NVMF_FABRIC_SC_INVALID_PARAM = 0x82, + SPDK_NVMF_FABRIC_SC_RESTART_DISCOVERY = 0x83, + SPDK_NVMF_FABRIC_SC_INVALID_HOST = 0x84, + SPDK_NVMF_FABRIC_SC_LOG_RESTART_DISCOVERY = 0x90, + SPDK_NVMF_FABRIC_SC_AUTH_REQUIRED = 0x91, +}; + +struct spdk_nvmf_fabric_auth_recv_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; /* NVMF_FABRIC_COMMAND_AUTHENTICATION_RECV (0x06) */ + uint8_t reserved2[19]; + struct spdk_nvme_sgl_descriptor sgl1; + uint8_t reserved3; + uint8_t spsp0; + uint8_t spsp1; + uint8_t secp; + uint32_t al; + uint8_t reserved4[16]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_auth_recv_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_fabric_auth_recv_rsp { + uint8_t reserved0[8]; + uint16_t sqhd; + uint8_t reserved1[2]; + uint16_t cid; + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_auth_recv_rsp) == 16, "Incorrect size"); + +struct spdk_nvmf_fabric_auth_send_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; /* NVMF_FABRIC_COMMAND_AUTHENTICATION_SEND (0x05) */ + uint8_t reserved2[19]; + struct spdk_nvme_sgl_descriptor sgl1; + uint8_t reserved3; + uint8_t spsp0; + uint8_t spsp1; + uint8_t secp; + uint32_t tl; + uint8_t reserved4[16]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_auth_send_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_fabric_auth_send_rsp { + uint8_t reserved0[8]; + uint16_t sqhd; + uint8_t reserved1[2]; + uint16_t cid; + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_auth_send_rsp) == 16, "Incorrect size"); + +struct spdk_nvmf_fabric_connect_data { + uint8_t hostid[16]; + uint16_t cntlid; + uint8_t reserved5[238]; + uint8_t subnqn[256]; + uint8_t hostnqn[256]; + uint8_t reserved6[256]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_connect_data) == 1024, "Incorrect size"); + +#define SPDK_NVMF_CONNECT_ATTR_PRIORITY_URGENT 0x00 +#define SPDK_NVMF_CONNECT_ATTR_PRIORITY_HIGH 0x01 +#define SPDK_NVMF_CONNECT_ATTR_PRIORITY_MEDIUM 0x02 +#define SPDK_NVMF_CONNECT_ATTR_PRIORITY_LOW 0x03 +#define SPDK_NVMF_CONNECT_ATTR_RESERVED 0xFC + +struct spdk_nvmf_fabric_connect_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; + uint8_t reserved2[19]; + struct spdk_nvme_sgl_descriptor sgl1; + uint16_t recfmt; /* Connect Record Format */ + uint16_t qid; /* Queue Identifier */ + uint16_t sqsize; /* Submission Queue Size */ + uint8_t cattr; /* queue attributes */ + uint8_t reserved3; + uint32_t kato; /* keep alive timeout */ + uint8_t reserved4[12]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_connect_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_fabric_connect_rsp { + union { + struct { + uint16_t cntlid; + uint16_t authreq; + } success; + + struct { + uint16_t ipo; + uint8_t iattr; + uint8_t reserved; + } invalid; + + uint32_t raw; + } status_code_specific; + + uint32_t reserved0; + uint16_t sqhd; + uint16_t reserved1; + uint16_t cid; + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_connect_rsp) == 16, "Incorrect size"); + +#define SPDK_NVMF_PROP_CAP_OFST 0x0 +#define SPDK_NVMF_PROP_VS_OFST 0x8 +#define SPDK_NVMF_PROP_INTMS_OFST 0xC +#define SPDK_NVMF_PROP_INTMC_OFST 0x10 +#define SPDK_NVMF_PROP_CC_OFST 0x14 +#define SPDK_NVMF_PROP_CSTS_OFST 0x1C +#define SPDK_NVMF_PROP_NSSR_OFST 0x20 +#define SPDK_NVMF_PROP_AQA_OFST 0x24 +#define SPDK_NVMF_PROP_ASQ_OFST 0x28 +#define SPDK_NVMF_PROP_ACQ_OFST 0x30 +#define SPDK_NVMF_PROP_CMBLOC_OFST 0x38 +#define SPDK_NVMF_PROP_CMBSZ_OFST 0x3C + +#define SPDK_NVMF_PROP_CAP_LEN 0x8 +#define SPDK_NVMF_PROP_VS_LEN 0x4 +#define SPDK_NVMF_PROP_INTMS_LEN 0x4 +#define SPDK_NVMF_PROP_INTMC_LEN 0x4 +#define SPDK_NVMF_PROP_CC_LEN 0x4 +#define SPDK_NVMF_PROP_CSTS_LEN 0x4 +#define SPDK_NVMF_PROP_NSSR_LEN 0x4 +#define SPDK_NVMF_PROP_AQA_LEN 0x4 +#define SPDK_NVMF_PROP_ASQ_LEN 0x8 +#define SPDK_NVMF_PROP_ACQ_LEN 0x8 +#define SPDK_NVMF_PROP_CMBLOC_LEN 0x4 +#define SPDK_NVMF_PROP_CMBSZ_LEN 0x4 + +union spdk_nvmf_property_size { + uint32_t raw; + struct { + uint32_t reserved : 16; + + /** property address space size */ + uint32_t size : 16; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvmf_property_size) == 4, "Incorrect size"); + +union spdk_nvmf_capsule_attr_lo { + uint32_t raw; + struct { + /** maximum response capsule size */ + uint32_t rspsz : 16; + + /** maximum command capsule size */ + uint32_t cmdsz : 16; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvmf_capsule_attr_lo) == 4, "Incorrect size"); + +union spdk_nvmf_capsule_attr_hi { + uint32_t raw; + struct { + /** support capsule alignment in response capsules */ + uint32_t reserved : 26; + + /** support capsule alignment in response capsules */ + uint32_t cairsp : 1; + + /** support capsule alignment in command capsules */ + uint32_t caicmd : 1; + + /** support capsule metadata in response capsules */ + uint32_t cmirsp : 1; + + /** support capsule metadata in command capsules */ + uint32_t cmicmd : 1; + + /** support capsule data in response capsules */ + uint32_t cdirsp : 1; + + /** support capsule data in command capsules */ + uint32_t cdicmd : 1; + } bits; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvmf_capsule_attr_hi) == 4, "Incorrect size"); + +struct spdk_nvmf_ctrlr_properties { + union spdk_nvme_cap_lo_register cap_lo; + union spdk_nvme_cap_hi_register cap_hi; + + uint32_t vs; + uint32_t intms; + uint32_t intmc; + + union spdk_nvme_cc_register cc; + + uint32_t reserved1; + union spdk_nvme_csts_register csts; + uint32_t nssr; + + union spdk_nvme_aqa_register aqa; + + uint64_t asq; + uint64_t acq; + + uint32_t cmbloc; + uint32_t cmbsz; + + uint8_t reserved2[0xEC0]; + uint8_t reserved3[0x100]; + union spdk_nvmf_property_size propsz; + uint32_t reserved4; + union spdk_nvmf_capsule_attr_lo capattr_lo; + union spdk_nvmf_capsule_attr_hi capattr_hi; + uint8_t reserved5[0x2F0]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ctrlr_properties) == 4864, "Incorrect size"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_CAP_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, cap_lo), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_VS_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, vs), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_INTMS_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, intms), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_INTMC_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, intmc), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_CC_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, cc), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_CSTS_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, csts), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_NSSR_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, nssr), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_AQA_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, aqa), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_ASQ_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, asq), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_ACQ_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, acq), + "Incorrect register offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_CMBLOC_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, + cmbloc), + "Incorrect property offset"); +SPDK_STATIC_ASSERT(SPDK_NVMF_PROP_CMBSZ_OFST == offsetof(struct spdk_nvmf_ctrlr_properties, cmbsz), + "Incorrect property offset"); + +struct spdk_nvmf_fabric_prop_get_cmd { + uint8_t opcode; + uint8_t reserved1; + uint16_t cid; + uint8_t fctype; + uint8_t reserved2[35]; + uint8_t attrib; + uint8_t reserved3[3]; + uint32_t ofst; + uint8_t reserved4[16]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_get_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_fabric_prop_get_rsp { + union { + uint64_t u64; + struct { + uint32_t low; + uint32_t high; + } u32; + } value; + + uint16_t sqhd; + uint16_t reserved0; + uint16_t cid; + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_get_rsp) == 16, "Incorrect size"); + +struct spdk_nvmf_fabric_prop_set_cmd { + uint8_t opcode; + uint8_t reserved0; + uint16_t cid; + uint8_t fctype; + uint8_t reserved1[35]; + uint8_t attrib; + uint8_t reserved2[3]; + uint32_t ofst; + + union { + uint64_t u64; + struct { + uint32_t low; + uint32_t high; + } u32; + } value; + + uint8_t reserved4[8]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_set_cmd) == 64, "Incorrect size"); + +struct spdk_nvmf_fabric_prop_set_rsp { + uint8_t reserved0[8]; + uint16_t sqhd; + uint16_t reserved1; + uint16_t cid; + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_fabric_prop_set_rsp) == 16, "Incorrect size"); + +/* Overlays on the existing identify controller structure */ +#define SPDK_NVMF_EXTENDED_CTRLR_DATA_OFFSET 1792 +struct spdk_nvmf_extended_identify_ctrlr_data { + uint32_t ioccsz; + uint32_t iorcsz; + uint16_t icdoff; + uint8_t ctrattr; + uint8_t msdbd; + uint8_t reserved[244]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_extended_identify_ctrlr_data) == 256, "Incorrect size"); + +#define SPDK_NVMF_CTRLR_MAXCMD_OFFSET 514 +struct spdk_nvmf_ctrlr_maxcmd { + uint16_t maxcmd; +}; + +#define SPDK_NVMF_CTRLR_KAS_OFFSET 320 +struct spdk_nvmf_ctrlr_kas { + uint16_t kas; +}; + +struct spdk_nvmf_sgl_support { + uint32_t supported : 1; + uint32_t reserved1 : 1; + uint32_t keyed_sgls : 1; + uint32_t reserved2 : 13; + uint32_t bit_bucket_descriptor_supported : 1; + uint32_t metadata_pointer_supported : 1; + uint32_t oversized_sgl_supported : 1; + uint32_t single_aligned_sgl_supported : 1; + uint32_t address_as_offset_sgl_supported : 1; + uint32_t reserved3 : 11; +}; + +#define SPDK_NVMF_DISCOVERY_NQN "nqn.2014-08.org.nvmexpress.discovery" + +struct spdk_nvmf_discovery_identify_data { + uint8_t reserved0[64]; + uint64_t fr; + uint8_t reserved1[5]; + uint8_t mdts; + uint16_t cntlid; + uint32_t ver; + uint8_t reserved2[177]; + uint8_t lpa; + uint8_t elpe; + uint8_t reserved3[505]; + uint8_t subnqn[256]; + uint8_t discovery[1024]; + uint8_t reserved4[1024]; + uint8_t vs[1024]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_discovery_identify_data) == 4096, "Incorrect size"); + +#define SPDK_NVMF_LOG_PAGE_DISCOVERY 0x70 + +struct spdk_nvmf_discovery_log_page_entry { + uint8_t trtype; /* transport type */ + uint8_t adrfam; /* address family */ + uint8_t subtype; + uint8_t treq; + uint16_t portid; + uint16_t cntlid; + uint8_t reserved0[24]; + uint8_t trsvcid[32]; + uint8_t reserved1[192]; + uint8_t subnqn[256]; + uint8_t traddr[256]; + uint8_t tsas[256]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_discovery_log_page_entry) == 1024, "Incorrect size"); + +struct spdk_nvmf_discovery_log_page { + uint64_t genctr; + uint64_t numrec; + uint16_t recfmt; + uint8_t reserved0[1006]; + struct spdk_nvmf_discovery_log_page_entry entries[0]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_discovery_log_page) == 1024, "Incorrect size"); + +/* Add an additional type of SGL */ +#define SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK 0x4 + +/* Further, add SGL subtypes */ +#define SPDK_NVME_SGL_SUBTYPE_ADDRESS 0x0 +#define SPDK_NVME_SGL_SUBTYPE_OFFSET 0x1 + +struct spdk_nvmf_keyed_sgl_descriptor { + uint64_t address; + uint64_t length : 24; + uint64_t key : 32; + uint64_t subtype : 4; + uint64_t type : 4; /* SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK */ +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_keyed_sgl_descriptor) == 16, "Incorrect size"); + +/* Add a new admin command */ +#define SPDK_NVME_OPC_KEEP_ALIVE 0x18 + +/* Add new status codes */ +#define SPDK_NVME_SC_SGL_OFFSET_INVALID 0x16 +#define SPDK_NVME_SC_SGL_SUBTYPE_INVALID 0x17 +#define SPDK_NVME_SC_HOSTID_INCONSISTENT 0x18 +#define SPDK_NVME_SC_KEEP_ALIVE_EXPIRED 0x19 +#define SPDK_NVME_SC_KEEP_ALIVE_TIMEOUT_INVALID 0x1A + +/* RDMA Fabric specific definitions below */ + +#define SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY 0xF + +struct spdk_nvmf_rdma_transport_specific_address { + uint8_t rdma_qptype; /* see nvmf_rdma_qp_service_types */ + uint8_t rdma_prtype; /* see nvmf_rdma_provider_types */ + uint8_t rdma_cms; /* nvmf_rdma_connection_mgmt_service */ + uint8_t reserved0[5]; + uint16_t rdma_pkey; + uint8_t reserved2[246]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_transport_specific_address) == 256, + "Incorrect size"); + +struct spdk_nvmf_rdma_request_private_data { + uint16_t recfmt; /* record format */ + uint16_t qid; /* queue id */ + uint16_t hrqsize; /* host receive queue size */ + uint16_t hsqsize; /* host send queue size */ + uint8_t reserved[24]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_request_private_data) == 32, "Incorrect size"); + +struct spdk_nvmf_rdma_accept_private_data { + uint16_t recfmt; /* record format */ + uint16_t crqsize; /* controller receive queue size */ + uint8_t reserved[28]; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_accept_private_data) == 32, "Incorrect size"); + +struct spdk_nvmf_rdma_reject_private_data { + uint16_t recfmt; /* record format */ + struct spdk_nvme_status status; +}; +SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_rdma_reject_private_data) == 4, "Incorrect size"); + +union spdk_nvmf_rdma_private_data { + struct spdk_nvmf_rdma_request_private_data pd_request; + struct spdk_nvmf_rdma_accept_private_data pd_accept; + struct spdk_nvmf_rdma_reject_private_data pd_reject; +}; +SPDK_STATIC_ASSERT(sizeof(union spdk_nvmf_rdma_private_data) == 32, "Incorrect size"); + +enum spdk_nvmf_rdma_transport_errors { + SPDK_NVMF_RDMA_ERROR_INVALID_PRIVATE_DATA_LENGTH = 0x1, + SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT = 0x2, + SPDK_NVMF_RDMA_ERROR_INVALID_QID = 0x3, + SPDK_NVMF_RDMA_ERROR_INVALID_HSQSIZE = 0x4, + SPDK_NVMF_RDMA_ERROR_INVALID_HRQSIZE = 0x5, + SPDK_NVMF_RDMA_ERROR_NO_RESOURCES = 0x6, + SPDK_NVMF_RDMA_ERROR_INVALID_IRD = 0x7, + SPDK_NVMF_RDMA_ERROR_INVALID_ORD = 0x8, +}; + +#pragma pack(pop) + +#endif /* __NVMF_SPEC_H__ */ diff --git a/lib/Makefile b/lib/Makefile index 1b5c0402f5..55ce1444f3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -36,6 +36,8 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk DIRS-y += conf cunit event json jsonrpc log memory trace util nvme ioat +DIRS-$(CONFIG_NVMF) += nvmf + .PHONY: all clean $(DIRS-y) all: $(DIRS-y) diff --git a/lib/nvmf/Makefile b/lib/nvmf/Makefile new file mode 100644 index 0000000000..e005b4adff --- /dev/null +++ b/lib/nvmf/Makefile @@ -0,0 +1,43 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) + +CFLAGS += $(DPDK_INC) +LIBNAME = nvmf +C_SRCS = nvmf_admin_cmd.c nvmf_io_cmd.c nvmf.c \ + rdma.c port.c conn.c controller.c \ + init_grp.c subsystem_grp.c conf.c \ + framework.c session.c + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/nvmf/conf.c b/lib/nvmf/conf.c new file mode 100644 index 0000000000..0b6b7eee0b --- /dev/null +++ b/lib/nvmf/conf.c @@ -0,0 +1,352 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "conf.h" +#include "init_grp.h" +#include "nvmf.h" +#include "port.h" +#include "spdk/conf.h" +#include "spdk/log.h" + +#define PORTNUMSTRLEN 32 + +static int +spdk_nvmf_parse_nvmf_tgt(void) +{ + struct spdk_conf_section *sp; + char *authfile; + char *nodebase; + int max_in_capsule_data; + int max_sessions_per_subsystem; + int max_queue_depth; + int max_conn_per_sess; + int max_recv_seg_len; + int listen_port; + int rc; + + sp = spdk_conf_find_section(NULL, "Nvmf"); + if (sp == NULL) { + SPDK_ERRLOG("No Nvmf section in configuration file.\n"); + return -1; + } + + authfile = spdk_conf_section_get_val(sp, "AuthFile"); + if (authfile == NULL) { + authfile = SPDK_NVMF_DEFAULT_AUTHFILE; + } + + nodebase = spdk_conf_section_get_val(sp, "NodeBase"); + if (nodebase == NULL) { + nodebase = SPDK_NVMF_DEFAULT_NODEBASE; + } + + max_in_capsule_data = spdk_conf_section_get_intval(sp, "MaxInCapsuleData"); + if (max_in_capsule_data < 0) { + max_in_capsule_data = SPDK_NVMF_DEFAULT_IN_CAPSULE_DATA_SIZE; + } + + max_sessions_per_subsystem = spdk_conf_section_get_intval(sp, "MaxSessionsPerSubsystem"); + if (max_sessions_per_subsystem < 0) { + max_sessions_per_subsystem = SPDK_NVMF_DEFAULT_MAX_SESSIONS_PER_SUBSYSTEM; + } + + max_queue_depth = spdk_conf_section_get_intval(sp, "MaxQueueDepth"); + if (max_queue_depth < 0) { + max_queue_depth = SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH; + } + + max_conn_per_sess = spdk_conf_section_get_intval(sp, "MaxConnectionsPerSession"); + if (max_conn_per_sess < 0) { + max_conn_per_sess = SPDK_NVMF_DEFAULT_MAX_CONNECTIONS_PER_SESSION; + } + + max_recv_seg_len = SPDK_NVMF_MAX_RECV_DATA_TRANSFER_SIZE; + listen_port = SPDK_NVMF_DEFAULT_SIN_PORT; + + rc = nvmf_tgt_init(authfile, nodebase, max_in_capsule_data, max_sessions_per_subsystem, + max_queue_depth, max_conn_per_sess, max_recv_seg_len, listen_port); + + return rc; +} + +static int +spdk_nvmf_parse_addr(char *listen_addr, char **host, char **port) +{ + int n, len; + const char *p, *q; + + if (listen_addr == NULL) { + SPDK_ERRLOG("Invalid listen addr for Fabric Interface (NULL)\n"); + return -1; + } + + *host = NULL; + *port = NULL; + + if (listen_addr[0] == '[') { + /* IPv6 */ + p = strchr(listen_addr + 1, ']'); + if (p == NULL) { + return -1; + } + p++; + n = p - listen_addr; + *host = malloc(n + 1); + if (!*host) { + return -1; + } + memcpy(*host, listen_addr, n); + (*host)[n] = '\0'; + if (p[0] == '\0') { + *port = malloc(PORTNUMSTRLEN); + if (!*port) { + free(*host); + return -1; + } + snprintf(*port, PORTNUMSTRLEN, "%d", SPDK_NVMF_DEFAULT_SIN_PORT); + } else { + if (p[0] != ':') { + free(*host); + return -1; + } + q = strchr(listen_addr, '@'); + if (q == NULL) { + q = listen_addr + strlen(listen_addr); + } + len = q - p - 1; + + *port = malloc(len + 1); + if (!*port) { + free(*host); + return -1; + } + memset(*port, 0, len + 1); + memcpy(*port, p + 1, len); + } + } else { + /* IPv4 */ + p = strchr(listen_addr, ':'); + if (p == NULL) { + p = listen_addr + strlen(listen_addr); + } + n = p - listen_addr; + *host = malloc(n + 1); + if (!*host) { + return -1; + } + memcpy(*host, listen_addr, n); + (*host)[n] = '\0'; + if (p[0] == '\0') { + *port = malloc(PORTNUMSTRLEN); + if (!*port) { + free(*host); + return -1; + } + snprintf(*port, PORTNUMSTRLEN, "%d", SPDK_NVMF_DEFAULT_SIN_PORT); + } else { + if (p[0] != ':') { + free(*host); + return -1; + } + q = strchr(listen_addr, '@'); + if (q == NULL) { + q = listen_addr + strlen(listen_addr); + } + + if (q == p) { + free(*host); + return -1; + } + + len = q - p - 1; + *port = malloc(len + 1); + if (!*port) { + free(*host); + return -1; + } + memset(*port, 0, len + 1); + memcpy(*port, p + 1, len); + + } + } + + return 0; +} + +static int +spdk_nvmf_parse_port(struct spdk_conf_section *sp) +{ + struct spdk_nvmf_port *port; + struct spdk_nvmf_fabric_intf *fabric_intf; + char *listen_addr, *host, *listen_port; + int i = 0, rc = 0; + + /* Create the Subsystem Port */ + port = spdk_nvmf_port_create(sp->num); + if (!port) { + SPDK_ERRLOG("Port create failed\n"); + return -1; + } + + /* Loop over the fabric interfaces and add them to the port */ + for (i = 0; ; i++) { + listen_addr = spdk_conf_section_get_nmval(sp, "FabricIntf", i, 1); + if (listen_addr == NULL) { + break; + } + rc = spdk_nvmf_parse_addr(listen_addr, &host, &listen_port); + if (rc < 0) { + continue; + } + fabric_intf = spdk_nvmf_fabric_intf_create(host, listen_port); + if (!fabric_intf) { + continue; + } + + spdk_nvmf_port_add_fabric_intf(port, fabric_intf); + } + + return 0; +} + +static int +spdk_nvmf_parse_ports(void) +{ + int rc = 0; + struct spdk_conf_section *sp; + + sp = spdk_conf_first_section(NULL); + while (sp != NULL) { + if (spdk_conf_section_match_prefix(sp, "Port")) { + rc = spdk_nvmf_parse_port(sp); + if (rc < 0) { + return -1; + } + } + sp = spdk_conf_next_section(sp); + } + return 0; +} + +static int +spdk_nvmf_parse_init_grp(struct spdk_conf_section *sp) +{ + int i; + const char *mask; + char **netmasks; + int num_netmasks; + struct spdk_nvmf_init_grp *init_grp; + + + for (num_netmasks = 0; ; num_netmasks++) { + mask = spdk_conf_section_get_nval(sp, "Netmask", num_netmasks); + if (mask == NULL) { + break; + } + } + + if (num_netmasks == 0) { + return -1; + } + + + netmasks = calloc(num_netmasks, sizeof(char *)); + if (!netmasks) { + return -1; + } + + for (i = 0; i < num_netmasks; i++) { + mask = spdk_conf_section_get_nval(sp, "Netmask", i); + netmasks[i] = strdup(mask); + if (!netmasks[i]) { + free(netmasks); + return -1; + } + } + + init_grp = spdk_nvmf_init_grp_create(sp->num, num_netmasks, netmasks); + + if (!init_grp) { + free(netmasks); + return -1; + } + + return 0; +} + +static int +spdk_nvmf_parse_init_grps(void) +{ + int rc = 0; + struct spdk_conf_section *sp; + + sp = spdk_conf_first_section(NULL); + while (sp != NULL) { + if (spdk_conf_section_match_prefix(sp, "InitiatorGroup")) { + rc = spdk_nvmf_parse_init_grp(sp); + if (rc < 0) { + return -1; + } + } + sp = spdk_conf_next_section(sp); + } + return 0; +} + +int +spdk_nvmf_parse_conf(void) +{ + int rc; + + /* NVMf section */ + rc = spdk_nvmf_parse_nvmf_tgt(); + if (rc < 0) { + return rc; + } + + /* Port sections */ + rc = spdk_nvmf_parse_ports(); + if (rc < 0) { + return rc; + } + + /* Initiator Group sections */ + rc = spdk_nvmf_parse_init_grps(); + if (rc < 0) { + return rc; + } + + return 0; +} diff --git a/lib/nvmf/conf.h b/lib/nvmf/conf.h new file mode 100644 index 0000000000..712230c684 --- /dev/null +++ b/lib/nvmf/conf.h @@ -0,0 +1,40 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NVMF_CONF_H +#define NVMF_CONF_H + +int +spdk_nvmf_parse_conf(void); + +#endif diff --git a/lib/nvmf/conn.c b/lib/nvmf/conn.c new file mode 100644 index 0000000000..70fe5ee21d --- /dev/null +++ b/lib/nvmf/conn.c @@ -0,0 +1,1318 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "nvmf.h" +#include "spdk/nvmf_spec.h" +#include "conn.h" +#include "rdma.h" +#include "session.h" +#include "spdk/queue.h" +#include "spdk/log.h" +#include "spdk/trace.h" + + +/** \file + +*/ + +static rte_atomic32_t g_num_connections[RTE_MAX_LCORE]; + +static int g_max_conns; +struct spdk_nvmf_conn *g_conns_array; +char g_shm_name[64]; +int g_conns_array_fd; + +pthread_mutex_t g_conns_mutex; + +struct rte_timer g_shutdown_timer; + +static int nvmf_allocate_reactor(uint64_t cpumask); +static void spdk_nvmf_conn_do_work(void *arg); + +static void +nvmf_active_tx_desc(struct nvme_qp_tx_desc *tx_desc) +{ + struct spdk_nvmf_conn *conn; + + RTE_VERIFY(tx_desc != NULL); + conn = tx_desc->conn; + RTE_VERIFY(conn != NULL); + + STAILQ_REMOVE(&conn->qp_tx_desc, tx_desc, nvme_qp_tx_desc, link); + STAILQ_INSERT_TAIL(&conn->qp_tx_active_desc, tx_desc, link); +} + +static void +nvmf_deactive_tx_desc(struct nvme_qp_tx_desc *tx_desc) +{ + struct spdk_nvmf_conn *conn; + + RTE_VERIFY(tx_desc != NULL); + conn = tx_desc->conn; + RTE_VERIFY(tx_desc->conn != NULL); + + STAILQ_REMOVE(&conn->qp_tx_active_desc, tx_desc, nvme_qp_tx_desc, link); + STAILQ_INSERT_TAIL(&conn->qp_tx_desc, tx_desc, link); +} + +static struct spdk_nvmf_conn * +allocate_conn(void) +{ + struct spdk_nvmf_conn *conn; + int i; + + pthread_mutex_lock(&g_conns_mutex); + for (i = 0; i < g_max_conns; i++) { + conn = &g_conns_array[i]; + if (!conn->is_valid) { + memset(conn, 0, sizeof(*conn)); + conn->is_valid = 1; + pthread_mutex_unlock(&g_conns_mutex); + return conn; + } + } + pthread_mutex_unlock(&g_conns_mutex); + + return NULL; +} + +static void +free_conn(struct spdk_nvmf_conn *conn) +{ + conn->is_valid = 0; +} + +struct spdk_nvmf_conn * +spdk_find_nvmf_conn_by_cm_id(struct rdma_cm_id *cm_id) +{ + int i; + + for (i = 0; i < g_max_conns; i++) { + if ((g_conns_array[i].is_valid == 1) && + (g_conns_array[i].cm_id == cm_id)) { + return &g_conns_array[i]; + } + } + + return NULL; +} + +static struct spdk_nvmf_conn * +spdk_find_nvmf_conn_by_cntlid(int cntlid) +{ + int i; + + for (i = 0; i < g_max_conns; i++) { + if ((g_conns_array[i].is_valid == 1) && + (g_conns_array[i].cntlid == cntlid) && + (g_conns_array[i].qid == 0)) { + return &g_conns_array[i]; + } + } + + return NULL; +} + +int spdk_initialize_nvmf_conns(int max_connections) +{ + size_t conns_size; + int i, rc; + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Enter\n"); + + rc = pthread_mutex_init(&g_conns_mutex, NULL); + if (rc != 0) { + SPDK_ERRLOG("mutex_init() failed\n"); + return -1; + } + + sprintf(g_shm_name, "nvmf_conns.%d", spdk_app_get_instance_id()); + g_conns_array_fd = shm_open(g_shm_name, O_RDWR | O_CREAT, 0600); + if (g_conns_array_fd < 0) { + SPDK_ERRLOG("could not shm_open %s\n", g_shm_name); + return -1; + } + + g_max_conns = max_connections; + conns_size = sizeof(struct spdk_nvmf_conn) * g_max_conns; + + if (ftruncate(g_conns_array_fd, conns_size) != 0) { + SPDK_ERRLOG("could not ftruncate\n"); + shm_unlink(g_shm_name); + close(g_conns_array_fd); + return -1; + } + g_conns_array = mmap(0, conns_size, PROT_READ | PROT_WRITE, MAP_SHARED, + g_conns_array_fd, 0); + + memset(g_conns_array, 0, conns_size); + + for (i = 0; i < RTE_MAX_LCORE; i++) { + rte_atomic32_set(&g_num_connections[i], 0); + } + + return 0; +} + +struct spdk_nvmf_conn * +spdk_nvmf_allocate_conn(void) +{ + struct spdk_nvmf_conn *conn; + + conn = allocate_conn(); + if (conn == NULL) { + SPDK_ERRLOG("Could not allocate new connection.\n"); + goto err0; + } + + /* all new connections initially default as AQ until nvmf connect */ + conn->type = CONN_TYPE_AQ; + + /* no session association until nvmf connect */ + conn->sess = NULL; + + conn->state = CONN_STATE_INVALID; + conn->sq_head = conn->sq_tail = 0; + + return conn; + +err0: + return NULL; +} + +/** + +\brief Create an NVMf fabric connection from the given parameters and schedule it + on a reactor thread. + +\code + +# identify reactor where the new connections work item will be scheduled +reactor = nvmf_allocate_reactor() +schedule fabric connection work item on reactor + +\endcode + +*/ +int +spdk_nvmf_startup_conn(struct spdk_nvmf_conn *conn) +{ + int lcore; + struct spdk_nvmf_conn *admin_conn; + uint64_t nvmf_session_core = spdk_app_get_core_mask(); + + /* + * if starting IO connection then determine core + * allocated to admin queue to request core mask. + * Can not assume nvmf session yet created at time + * of fabric connection setup. Rely on fabric + * function to locate matching controller session. + */ + if (conn->type == CONN_TYPE_IOQ && conn->cntlid != 0) { + admin_conn = spdk_find_nvmf_conn_by_cntlid(conn->cntlid); + if (admin_conn != NULL) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Located admin conn session core %d\n", + admin_conn->poller.lcore); + nvmf_session_core = 1ULL << admin_conn->poller.lcore; + } + } + + lcore = nvmf_allocate_reactor(nvmf_session_core); + if (lcore < 0) { + SPDK_ERRLOG("Unable to find core to launch connection.\n"); + goto err0; + } + + conn->state = CONN_STATE_RUNNING; + SPDK_NOTICELOG("Launching nvmf connection[qid=%d] on core: %d\n", + conn->qid, lcore); + conn->poller.fn = spdk_nvmf_conn_do_work; + conn->poller.arg = conn; + + rte_atomic32_inc(&g_num_connections[lcore]); + spdk_poller_register(&conn->poller, lcore, NULL); + + return 0; +err0: + free_conn(conn); + return -1; +} + +static void +_conn_destruct(spdk_event_t event) +{ + struct spdk_nvmf_conn *conn = spdk_event_get_arg1(event); + + /* + * Notify NVMf library of the fabric connection + * going away. If this is the AQ connection then + * set state for other connections to abort. + */ + nvmf_disconnect((void *)conn, conn->sess); + + if (conn->type == CONN_TYPE_AQ) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "AQ connection destruct, trigger session closure\n"); + /* Trigger all I/O connections to shutdown */ + conn->state = CONN_STATE_FABRIC_DISCONNECT; + } + + nvmf_rdma_conn_cleanup(conn); + + pthread_mutex_lock(&g_conns_mutex); + conn->sess = NULL; + conn->cm_id = 0; + free_conn(conn); + pthread_mutex_unlock(&g_conns_mutex); +} + +static void spdk_nvmf_conn_destruct(struct spdk_nvmf_conn *conn) +{ + struct spdk_event *event; + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "conn %p\n", conn); + conn->state = CONN_STATE_INVALID; + + event = spdk_event_allocate(rte_lcore_id(), _conn_destruct, conn, NULL, NULL); + spdk_poller_unregister(&conn->poller, event); + rte_atomic32_dec(&g_num_connections[rte_lcore_id()]); +} + +static int +spdk_nvmf_get_active_conns(void) +{ + struct spdk_nvmf_conn *conn; + int num = 0; + int i; + + pthread_mutex_lock(&g_conns_mutex); + for (i = 0; i < g_max_conns; i++) { + conn = &g_conns_array[i]; + if (!conn->is_valid) + continue; + num++; + } + pthread_mutex_unlock(&g_conns_mutex); + return num; +} + +static void +spdk_nvmf_cleanup_conns(void) +{ + munmap(g_conns_array, sizeof(struct spdk_nvmf_conn) * g_max_conns); + shm_unlink(g_shm_name); + close(g_conns_array_fd); +} + +static void +spdk_nvmf_conn_check_shutdown(struct rte_timer *timer, void *arg) +{ + if (spdk_nvmf_get_active_conns() == 0) { + RTE_VERIFY(timer == &g_shutdown_timer); + rte_timer_stop(timer); + spdk_nvmf_cleanup_conns(); + spdk_app_stop(0); + } +} + +void spdk_shutdown_nvmf_conns(void) +{ + struct spdk_nvmf_conn *conn; + int i; + + pthread_mutex_lock(&g_conns_mutex); + + for (i = 0; i < g_max_conns; i++) { + conn = &g_conns_array[i]; + if (!conn->is_valid) + continue; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Set conn %d state to exiting\n", i); + conn->state = CONN_STATE_EXITING; + } + + pthread_mutex_unlock(&g_conns_mutex); + rte_timer_init(&g_shutdown_timer); + rte_timer_reset(&g_shutdown_timer, rte_get_timer_hz() / 1000, PERIODICAL, + rte_get_master_lcore(), spdk_nvmf_conn_check_shutdown, NULL); +} + +static int +spdk_nvmf_send_response(struct spdk_nvmf_conn *conn, struct nvmf_request *req) +{ + struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; + + /* Zero out fields reserved in NVMf */ + rsp->sqid = 0; + rsp->status.p = 0; + + rsp->sqhd = conn->sq_head; + rsp->cid = req->cid; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, + "cpl: cdw0=0x%x rsvd1=0x%x sqhd=0x%x sqid=0x%x cid=0x%x status=0x%x\n", + rsp->cdw0, rsp->rsvd1, rsp->sqhd, rsp->sqid, rsp->cid, *(uint16_t *)&rsp->status); + + return nvmf_post_rdma_send(conn, req->fabric_tx_ctx); +} + +static int +nvmf_io_cmd_continue(struct spdk_nvmf_conn *conn, struct nvme_qp_tx_desc *tx_desc) +{ + struct nvme_qp_rx_desc *rx_desc; + struct nvmf_request *req; + struct spdk_nvme_cmd *cmd; + int ret; + + + rx_desc = tx_desc->rx_desc; + if (rx_desc == NULL) { + SPDK_ERRLOG(" rx_desc does not exist!\n"); + return -1; + } + + req = &tx_desc->req_state; + cmd = &req->cmd->nvme_cmd; + req->fabric_rx_ctx = rx_desc; + + /* clear the SGL details for RDMA performed */ + req->length = 0; + + /* send to NVMf library for backend NVMe processing */ + ret = nvmf_process_io_cmd(req->session, cmd, (void *)rx_desc->bb, rx_desc->bb_sgl.length, req); + if (ret) { + /* library failed the request and should have + Updated the response */ + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " send nvme io cmd capsule error response\n"); + ret = spdk_nvmf_send_response(conn, req); + if (ret) { + SPDK_ERRLOG("Unable to send aq qp tx descriptor\n"); + return -1; + } + } + return 0; +} + +static void +nvmf_process_async_completion(struct nvmf_request *req) +{ + struct nvme_qp_tx_desc *tx_desc = (struct nvme_qp_tx_desc *)req->fabric_tx_ctx; + struct spdk_nvme_cpl *response; + struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; + int ret; + + response = &req->rsp->nvme_cpl; + + /* Was the command successful */ + if ((response->status.sc == SPDK_NVME_SC_SUCCESS) && req->length > 0) { + /* data to be copied to host via memory RDMA */ + if (req->length < rx_desc->bb_len) { + /* temporarily adjust SGE to only copy what the + host is prepared to receive. + */ + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " *** modify sgl length from %x to %x\n", + rx_desc->bb_sgl.length, req->length); + rx_desc->bb_sgl.length = req->length; + } + ret = nvmf_post_rdma_write(tx_desc->conn, tx_desc); + if (ret) { + SPDK_ERRLOG("Unable to post rdma write tx descriptor\n"); + goto command_fail; + } + } + + /* Now send back the response */ + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "send nvme cmd capsule response\n"); + ret = spdk_nvmf_send_response(tx_desc->conn, req); + if (ret) { + SPDK_ERRLOG("Unable to send aq qp tx descriptor\n"); + goto command_fail; + } + + return; + +command_fail: + nvmf_deactive_tx_desc(tx_desc); +} + +static int +nvmf_process_property_get(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc) +{ + struct spdk_nvmf_fabric_prop_get_rsp *response; + struct nvmf_request *req = &tx_desc->req_state; + struct spdk_nvmf_fabric_prop_get_cmd *cmd; + int ret; + + cmd = &req->cmd->prop_get_cmd; + response = &req->rsp->prop_get_rsp; + + nvmf_property_get(conn->sess, cmd, response); + + /* send the nvmf response if setup by NVMf library */ + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "send property get capsule response\n"); + ret = spdk_nvmf_send_response(conn, req); + if (ret) { + SPDK_ERRLOG("Unable to send aq qp tx descriptor\n"); + return -1; + } + + return 0; +} + +static int +nvmf_process_property_set(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc) +{ + struct spdk_nvmf_fabric_prop_set_rsp *response; + struct nvmf_request *req = &tx_desc->req_state; + struct spdk_nvmf_fabric_prop_set_cmd *cmd; + bool shutdown = false; + int ret; + + cmd = &req->cmd->prop_set_cmd; + response = &req->rsp->prop_set_rsp; + + nvmf_property_set(conn->sess, cmd, response, &shutdown); + if (shutdown == true) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Call to set properties has indicated shutdown\n"); + conn->state = CONN_STATE_FABRIC_DISCONNECT; + } + + /* send the nvmf response if setup by NVMf library */ + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "send property set capsule response\n"); + ret = spdk_nvmf_send_response(conn, req); + if (ret) { + SPDK_ERRLOG("Unable to send aq qp tx descriptor\n"); + return -1; + } + + return 0; +} + +/* Check the nvmf message received */ +static void nvmf_trace_command(struct spdk_nvmf_capsule_cmd *cap_hdr, enum conn_type conn_type) +{ + struct spdk_nvme_cmd *cmd = (struct spdk_nvme_cmd *)cap_hdr; + struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1; + uint8_t opc; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "NVMf %s%s Command:\n", + conn_type == CONN_TYPE_AQ ? "Admin" : "I/O", + cmd->opc == SPDK_NVMF_FABRIC_OPCODE ? " Fabrics" : ""); + + if (cmd->opc == SPDK_NVMF_FABRIC_OPCODE) { + opc = cap_hdr->fctype; + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: fctype 0x%02x\n", cap_hdr->fctype); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: cid 0x%x\n", cap_hdr->cid); + } else { + opc = cmd->opc; + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: opc 0x%02x\n", cmd->opc); + if (cmd->fuse) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: fuse %x\n", cmd->fuse); + } + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: psdt %u\n", cmd->psdt); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: cid 0x%x\n", cmd->cid); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: nsid %u\n", cmd->nsid); + if (cmd->mptr) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: mptr 0x%" PRIx64 "\n", cmd->mptr); + } + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: cdw10 0x%08x\n", cmd->cdw10); + } + + if (spdk_nvme_opc_get_data_transfer(opc) != SPDK_NVME_DATA_NONE) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: SGL type 0x%x\n", sgl->type); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: SGL subtype 0x%x\n", sgl->type_specific); + if (sgl->type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) { + + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: SGL address 0x%lx\n", + ((struct spdk_nvmf_keyed_sgl_descriptor *)sgl)->address); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: SGL key 0x%x\n", + ((struct spdk_nvmf_keyed_sgl_descriptor *)sgl)->key); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: SGL length 0x%x\n", + ((struct spdk_nvmf_keyed_sgl_descriptor *)sgl)->length); + } else if (sgl->type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: SGL %s 0x%" PRIx64 "\n", + sgl->type_specific == SPDK_NVME_SGL_SUBTYPE_OFFSET ? "offset" : "address", + sgl->address); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " SQE: SGL length 0x%x\n", sgl->length); + } + } +} + +static int +nvmf_process_io_command(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc) +{ + struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; + struct nvmf_request *req; + struct spdk_nvme_sgl_descriptor *sgl; + struct spdk_nvmf_keyed_sgl_descriptor *keyed_sgl; + struct spdk_nvme_cmd *cmd; + enum spdk_nvme_data_transfer xfer; + void *buf = NULL; + uint32_t len = 0; + int ret; + + req = &tx_desc->req_state; + cmd = &req->cmd->nvme_cmd; + sgl = (struct spdk_nvme_sgl_descriptor *)&cmd->dptr.sgl1; + keyed_sgl = (struct spdk_nvmf_keyed_sgl_descriptor *)sgl; + + xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); + if (xfer != SPDK_NVME_DATA_NONE) { + /* + NVMf does support in-capsule data for write comamnds. If caller indicates SGL, + verify the SGL for in-capsule or RDMA read/write use and prepare + data buffer reference and length for the NVMf library. + */ + /* TBD: add code to handle I/O larger than default bb size */ + if (sgl->type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK && + (sgl->type_specific == SPDK_NVME_SGL_SUBTYPE_ADDRESS || + sgl->type_specific == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) { + if (keyed_sgl->key == 0) { + SPDK_ERRLOG("Host did not specify SGL key!\n"); + goto command_fail; + } + + if (keyed_sgl->length > rx_desc->bb_sgl.length) { + SPDK_ERRLOG("SGL length 0x%x exceeds BB length 0x%x\n", + (uint32_t)keyed_sgl->length, rx_desc->bb_sgl.length); + goto command_fail; + } + + buf = (void *)rx_desc->bb; + len = rx_desc->bb_sgl.length; + req->remote_addr = keyed_sgl->address; + req->rkey = keyed_sgl->key; + req->length = keyed_sgl->length; + } else if (sgl->type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && + sgl->type_specific == SPDK_NVME_SGL_SUBTYPE_OFFSET) { + uint64_t offset = sgl->address; + uint32_t max_len = rx_desc->bb_sgl.length; + + if (offset > max_len) { + SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n", + offset, max_len); + goto command_fail; + } + max_len -= (uint32_t)offset; + + if (sgl->length > max_len) { + SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", + sgl->length, max_len); + goto command_fail; + } + + buf = rx_desc->bb + offset; + len = sgl->length; + } else { + SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type %2x, Subtype %2x\n", + sgl->type, sgl->type_specific); + goto command_fail; + } + + /* for any I/O that requires rdma data to be + pulled into target BB before processing by + the backend NVMe device + */ + if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { + if (len > 0 && sgl->type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) { + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Issuing RDMA Read to get host data\n"); + /* data to be copied from remote host via memory RDMA */ + if (req->length < rx_desc->bb_len) { + /* temporarily adjust SGE to only copy what the + host is prepared to send. + */ + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " *** modify bb sgl length from %x to %x\n", + rx_desc->bb_sgl.length, req->length); + rx_desc->bb_sgl.length = req->length; + } + + req->pending = NVMF_PENDING_WRITE; + ret = nvmf_post_rdma_read(tx_desc->conn, tx_desc); + if (ret) { + SPDK_ERRLOG("Unable to post rdma read tx descriptor\n"); + goto command_fail; + } + /* Need to wait for RDMA completion indication where + it will continue I/O operation */ + return 0; + } + } + } + + /* send to NVMf library for backend NVMe processing */ + ret = nvmf_process_io_cmd(req->session, cmd, buf, len, req); + if (ret) { + /* library failed the request and should have + Updated the response */ + SPDK_TRACELOG(SPDK_TRACE_RDMA, "send nvme io cmd capsule error response\n"); + ret = spdk_nvmf_send_response(conn, req); + if (ret) { + SPDK_ERRLOG("Unable to send aq qp tx descriptor\n"); + goto command_fail; + } + } + + return 0; + +command_fail: + return -1; +} + +static int +nvmf_process_admin_command(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc) +{ + struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; + struct nvmf_request *req; + struct spdk_nvme_cmd *cmd; + struct spdk_nvme_sgl_descriptor *sgl; + struct spdk_nvmf_keyed_sgl_descriptor *keyed_sgl; + void *buf = NULL; + uint32_t len = 0; + int ret; + + req = &tx_desc->req_state; + cmd = &req->cmd->nvme_cmd; + sgl = (struct spdk_nvme_sgl_descriptor *)&cmd->dptr.sgl1; + keyed_sgl = (struct spdk_nvmf_keyed_sgl_descriptor *)sgl; + + /* + NVMf does not support in-capsule data for admin command or response capsules. + If caller indicates SGL for return RDMA data, verify the SGL and prepare + data buffer reference and length for the NVMf library. Only keyed type + SGLs are supported for return data + */ + if (sgl->type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK && + (sgl->type_specific == SPDK_NVME_SGL_SUBTYPE_ADDRESS || + sgl->type_specific == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) { + buf = (void *)rx_desc->bb; + len = rx_desc->bb_sgl.length; + req->remote_addr = keyed_sgl->address; + req->rkey = keyed_sgl->key; + req->length = keyed_sgl->length; + } + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " tx_desc %p: req_state %p, rsp %p, addr %p\n", + tx_desc, req, (void *)req->rsp, (void *)tx_desc->send_sgl.addr); + + /* send to NVMf library for backend NVMe processing */ + ret = nvmf_process_admin_cmd(req->session, cmd, buf, len, req); + if (ret) { + /* library failed the request and should have + Updated the response */ + SPDK_TRACELOG(SPDK_TRACE_NVMF, "send nvme admin cmd capsule sync response\n"); + ret = spdk_nvmf_send_response(conn, req); + if (ret) { + SPDK_ERRLOG("Unable to send aq qp tx descriptor\n"); + goto command_fail; + } + } + + return 0; + +command_fail: + return -1; +} + +static void +nvmf_init_conn_properites(struct spdk_nvmf_conn *conn, + struct nvmf_session *session, + struct spdk_nvmf_fabric_connect_rsp *response) +{ + + struct spdk_nvmf_extended_identify_ctrlr_data *lcdata; + uint32_t mdts; + + conn->cntlid = response->status_code_specific.success.cntlid; + session->max_connections_allowed = g_nvmf_tgt.MaxConnectionsPerSession; + nvmf_init_session_properties(session, conn->sq_depth); + + /* Update the session logical controller data with any + * application fabric side limits + */ + /* reset mdts in vcdata to equal the application default maximum */ + mdts = SPDK_NVMF_MAX_RECV_DATA_TRANSFER_SIZE / + (1 << (12 + session->vcprop.cap_hi.bits.mpsmin)); + if (mdts == 0) { + SPDK_ERRLOG("Min page size exceeds max transfer size!\n"); + SPDK_ERRLOG("Verify setting of SPDK_NVMF_MAX_RECV_DATA_TRANSFER_SIZE and mpsmin\n"); + session->vcdata.mdts = 1; /* Support single page for now */ + } else { + /* set mdts as a power of 2 representing number of mpsmin units */ + session->vcdata.mdts = 0; + while ((1ULL << session->vcdata.mdts) < mdts) { + session->vcdata.mdts++; + } + } + + /* increase the I/O recv capsule size for in_capsule data */ + lcdata = (struct spdk_nvmf_extended_identify_ctrlr_data *)&session->vcdata.reserved5[1088]; + lcdata->ioccsz += (g_nvmf_tgt.MaxInCapsuleData / 16); + +} + +static void +nvmf_connect_continue(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc) +{ + struct nvme_qp_rx_desc *rx_desc; + struct nvmf_request *req; + struct spdk_nvmf_fabric_connect_cmd *connect; + struct spdk_nvmf_fabric_connect_data *connect_data; + struct spdk_nvmf_fabric_connect_rsp *response; + struct nvmf_session *session; + int ret; + + if (tx_desc == NULL) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " tx_desc does not exist!\n"); + return; + } + rx_desc = tx_desc->rx_desc; + if (rx_desc == NULL) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " rx_desc does not exist!\n"); + return; + } + + connect = (struct spdk_nvmf_fabric_connect_cmd *)&rx_desc->msg_buf; + connect_data = (struct spdk_nvmf_fabric_connect_data *)rx_desc->bb; + + req = &tx_desc->req_state; + + /* clear the SGL details for any RDMA previously performed */ + req->length = 0; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** Connect Capsule Data *** %p\n", connect_data); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** cntlid = %x ***\n", connect_data->cntlid); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** hostid = %04x%04x-%04x-%04x-%04x-%04x%04x%04x ***\n", + htons(*(unsigned short *) &connect_data->hostid[0]), + htons(*(unsigned short *) &connect_data->hostid[2]), + htons(*(unsigned short *) &connect_data->hostid[4]), + htons(*(unsigned short *) &connect_data->hostid[6]), + htons(*(unsigned short *) &connect_data->hostid[8]), + htons(*(unsigned short *) &connect_data->hostid[10]), + htons(*(unsigned short *) &connect_data->hostid[12]), + htons(*(unsigned short *) &connect_data->hostid[14])); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** subsiqn = %s ***\n", (char *)&connect_data->subnqn[0]); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** hostiqn = %s ***\n", (char *)&connect_data->hostnqn[0]); + + response = &req->rsp->connect_rsp; + + session = nvmf_connect((void *)conn, connect, connect_data, response); + if (session != NULL) { + conn->sess = session; + conn->qid = connect->qid; + if (connect->qid > 0) { + conn->type = CONN_TYPE_IOQ; /* I/O Connection */ + } else { + /* When session first created, set some attributes */ + nvmf_init_conn_properites(conn, session, response); + } + } + + /* synchronous call, nvmf library expected to init + response status. + */ + SPDK_TRACELOG(SPDK_TRACE_NVMF, "send connect capsule response\n"); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** cntlid = %x ***\n", + response->status_code_specific.success.cntlid); + ret = spdk_nvmf_send_response(conn, req); + if (ret) { + SPDK_ERRLOG("Unable to send aq qp tx descriptor\n"); + goto connect_error; + } + return; + +connect_error: + /* recover the tx_desc */ + if (tx_desc != NULL) { + tx_desc->rx_desc = NULL; + nvmf_deactive_tx_desc(tx_desc); + } +} + +static int +nvmf_process_connect(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc) +{ + struct spdk_nvmf_fabric_connect_cmd *connect; + struct nvmf_request *req; + struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; + union sgl_shift *sgl; + int ret; + + connect = (struct spdk_nvmf_fabric_connect_cmd *)&rx_desc->msg_buf; + sgl = (union sgl_shift *)&connect->sgl1; + + /* debug - display the connect capsule */ + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** Connect Capsule *** %p\n", connect); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** cid = %x ***\n", connect->cid); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** recfmt = %x ***\n", connect->recfmt); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** qid = %x ***\n", connect->qid); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** sqsize = %x ***\n", connect->sqsize); + + if (sgl->nvmf_sgl.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && + sgl->nvmf_sgl.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { + /* + Extended data was passed by initiator to target via in-capsule + data and not via RDMA SGL xfer. So extended data resides in + the rx message buffer + */ + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Using In-Capsule connect data\n"); + if (rx_desc->recv_bc < (sizeof(struct spdk_nvmf_fabric_connect_cmd) + + sizeof(struct spdk_nvmf_fabric_connect_data))) { + SPDK_ERRLOG("insufficient in-capsule data to satisfy connect!\n"); + goto connect_fail; + } + nvmf_connect_continue(conn, tx_desc); + } else if (sgl->nvmf_sgl.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK && + (sgl->nvmf_sgl.subtype == SPDK_NVME_SGL_SUBTYPE_ADDRESS || + sgl->nvmf_sgl.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) { + /* setup a new SQE that uses local bounce buffer */ + req = &tx_desc->req_state; + req->remote_addr = sgl->nvmf_sgl.address; + req->rkey = sgl->nvmf_sgl.key; + req->pending = NVMF_PENDING_CONNECT; + + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Issuing RDMA Read to get host connect data\n"); + /* data to be copied from host via memory RDMA */ + if (sgl->nvmf_sgl.length < rx_desc->bb_len) { + /* temporarily adjust SGE to only copy what the + host is prepared to send. + */ + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " *** modify bb sgl length from %x to %x\n", + rx_desc->bb_sgl.length, sgl->nvmf_sgl.length); + rx_desc->bb_sgl.length = sgl->nvmf_sgl.length; + } + + ret = nvmf_post_rdma_read(tx_desc->conn, tx_desc); + if (ret) { + SPDK_ERRLOG("Unable to post rdma read tx descriptor\n"); + goto connect_fail; + } + /* Need to wait for RDMA completion indication where + it will continue connect operation */ + } else { + SPDK_ERRLOG("Invalid NVMf Connect SGL: Type %2x, Subtype %2x\n", + sgl->nvmf_sgl.type, sgl->nvmf_sgl.subtype); + goto connect_fail; + } + return 0; + +connect_fail: + return -1; +} + +static int +nvmf_process_fabrics_command(struct spdk_nvmf_conn *conn, struct nvme_qp_tx_desc *tx_desc) +{ + struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; + struct spdk_nvmf_capsule_cmd *cap_hdr; + + cap_hdr = (struct spdk_nvmf_capsule_cmd *)&rx_desc->msg_buf; + + switch (cap_hdr->fctype) { + case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET: + return nvmf_process_property_set(conn, tx_desc); + case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET: + return nvmf_process_property_get(conn, tx_desc); + case SPDK_NVMF_FABRIC_COMMAND_CONNECT: + return nvmf_process_connect(conn, tx_desc); + default: + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "recv capsule header type invalid [%x]!\n", + cap_hdr->fctype); + return 1; /* skip, do nothing */ + } +} + +static int nvmf_recv(struct spdk_nvmf_conn *conn, struct ibv_wc *wc) +{ + struct nvme_qp_rx_desc *rx_desc; + struct nvme_qp_tx_desc *tx_desc = NULL; + struct spdk_nvmf_capsule_cmd *cap_hdr; + struct nvmf_request *req; + int ret = 0; + + rx_desc = (struct nvme_qp_rx_desc *)wc->wr_id; + cap_hdr = (struct spdk_nvmf_capsule_cmd *)&rx_desc->msg_buf; + + /* Update Connection SQ Tracking, increment + the SQ tail consuming a free RX recv slot. + Check for exceeding queue full - should + never happen. + */ + conn->sq_tail < (conn->sq_depth - 1) ? (conn->sq_tail++) : (conn->sq_tail = 0); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sq_head %x, sq_tail %x, sq_depth %x\n", + conn->sq_head, conn->sq_tail, conn->sq_depth); + /* trap if initiator exceeds qdepth */ + if (conn->sq_head == conn->sq_tail) { + SPDK_ERRLOG(" *** SQ Overflow !! ***\n"); + /* controller fatal status condition: + set the cfs flag in controller status + and stop processing this and any I/O + on this queue. + */ + if (conn->sess) { + conn->sess->vcprop.csts.bits.cfs = 1; + conn->state = CONN_STATE_OVERFLOW; + } + if (conn->type == CONN_TYPE_IOQ) { + /* if overflow on the I/O queue + stop processing, allow for + remote host to query failure + via admin queue + */ + goto drop_recv; + } else { + /* if overflow on the admin queue + there is no recovery, error out + to trigger disconnect + */ + goto recv_error; + } + } + + if (wc->byte_len < sizeof(*cap_hdr)) { + SPDK_ERRLOG("recv length less than capsule header\n"); + goto recv_error; + } + rx_desc->recv_bc = wc->byte_len; + SPDK_TRACELOG(SPDK_TRACE_NVMF, "recv byte count %x\n", rx_desc->recv_bc); + + /* get a response buffer */ + if (STAILQ_EMPTY(&conn->qp_tx_desc)) { + SPDK_ERRLOG("tx desc pool empty!\n"); + goto recv_error; + } + tx_desc = STAILQ_FIRST(&conn->qp_tx_desc); + nvmf_active_tx_desc(tx_desc); + tx_desc->rx_desc = rx_desc; + + req = &tx_desc->req_state; + req->session = conn->sess; + req->fabric_tx_ctx = tx_desc; + req->fabric_rx_ctx = rx_desc; + req->cb_fn = nvmf_process_async_completion; + req->length = 0; + req->cid = cap_hdr->cid; + req->cmd = &rx_desc->msg_buf; + + nvmf_trace_command(cap_hdr, conn->type); + + if (cap_hdr->opcode == SPDK_NVMF_FABRIC_OPCODE) { + ret = nvmf_process_fabrics_command(conn, tx_desc); + } else if (conn->type == CONN_TYPE_AQ) { + ret = nvmf_process_admin_command(conn, tx_desc); + } else { + ret = nvmf_process_io_command(conn, tx_desc); + } + + if (ret < 0) { + goto recv_error; + } + + /* re-post rx_desc and re-queue tx_desc here, + there is not a delayed posting because of + command processing. + */ + if (ret == 1) { + tx_desc->rx_desc = NULL; + nvmf_deactive_tx_desc(tx_desc); + if (nvmf_post_rdma_recv(conn, rx_desc)) { + SPDK_ERRLOG("Unable to re-post aq rx descriptor\n"); + goto recv_error; + } + } + +drop_recv: + return 0; + +recv_error: + /* recover the tx_desc */ + if (tx_desc != NULL) { + tx_desc->rx_desc = NULL; + nvmf_deactive_tx_desc(tx_desc); + } + return -1; +} + +static int nvmf_cq_event_handler(struct spdk_nvmf_conn *conn) +{ + struct ibv_wc wc; + struct nvme_qp_tx_desc *tx_desc; + struct nvmf_request *req; + int rc; + int cq_count = 0; + int i; + + for (i = 0; i < conn->sq_depth; i++) { + tx_desc = NULL; + + /* if an overflow condition was hit + we want to stop all processing, but + do not disconnect. + */ + if (conn->state == CONN_STATE_OVERFLOW) + break; + + rc = ibv_poll_cq(conn->cq, 1, &wc); + if (rc == 0) // No completions at this time + break; + + if (rc < 0) { + SPDK_ERRLOG("Poll CQ error!(%d): %s\n", + errno, strerror(errno)); + goto handler_error; + } + + /* OK, process the single successful cq event */ + cq_count += rc; + + if (wc.status) { + SPDK_TRACELOG(SPDK_TRACE_RDMA, "CQ completion error status %d, exiting handler\n", + wc.status); + break; + } + + switch (wc.opcode) { + case IBV_WC_SEND: + SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ send completion\n"); + tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id; + nvmf_deactive_tx_desc(tx_desc); + break; + + case IBV_WC_RDMA_WRITE: + /* + * Will get this event only if we set IBV_SEND_SIGNALED + * flag in rdma_write, to trace rdma write latency + */ + SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma write completion\n"); + tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id; + spdk_trace_record(TRACE_RDMA_WRITE_COMPLETE, 0, 0, (uint64_t)tx_desc->rx_desc, 0); + break; + + case IBV_WC_RDMA_READ: + SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma read completion\n"); + tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id; + spdk_trace_record(TRACE_RDMA_READ_COMPLETE, 0, 0, (uint64_t)tx_desc->rx_desc, 0); + req = &tx_desc->req_state; + if (req->pending == NVMF_PENDING_WRITE) { + req->pending = NVMF_PENDING_NONE; + rc = nvmf_io_cmd_continue(conn, tx_desc); + if (rc) { + SPDK_ERRLOG("error from io cmd continue\n"); + goto handler_error; + } + + /* + * Check for any pending rdma_reads to start + */ + conn->pending_rdma_read_count--; + if (!STAILQ_EMPTY(&conn->qp_pending_desc)) { + tx_desc = STAILQ_FIRST(&conn->qp_pending_desc); + STAILQ_REMOVE_HEAD(&conn->qp_pending_desc, link); + STAILQ_INSERT_TAIL(&conn->qp_tx_active_desc, tx_desc, link); + + SPDK_TRACELOG(SPDK_TRACE_RDMA, "Issue rdma read from pending queue: tx_desc %p\n", + tx_desc); + + rc = nvmf_post_rdma_read(conn, tx_desc); + if (rc) { + SPDK_ERRLOG("Unable to post pending rdma read descriptor\n"); + goto handler_error; + } + } + } else if (req->pending == NVMF_PENDING_CONNECT) { + req->pending = NVMF_PENDING_NONE; + nvmf_connect_continue(conn, tx_desc); + } + break; + + case IBV_WC_RECV: + SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ recv completion\n"); + spdk_trace_record(TRACE_NVMF_IO_START, 0, 0, wc.wr_id, 0); + rc = nvmf_recv(conn, &wc); + if (rc) { + SPDK_ERRLOG("nvmf_recv processing failure\n"); + goto handler_error; + } + break; + + default: + SPDK_ERRLOG("Poll cq opcode type unknown!!!!! completion\n"); + goto handler_error; + } + } + return cq_count; + +handler_error: + if (tx_desc != NULL) + nvmf_deactive_tx_desc(tx_desc); + SPDK_ERRLOG("handler error, exiting!\n"); + return -1; +} + + +static int nvmf_execute_conn(struct spdk_nvmf_conn *conn) +{ + int rc = 0; + + /* for an active session, process any pending NVMf completions */ + if (conn->sess) { + if (conn->type == CONN_TYPE_AQ) + nvmf_check_admin_completions(conn->sess); + else + nvmf_check_io_completions(conn->sess); + } + + /* process all pending completions */ + rc = nvmf_cq_event_handler(conn); + if (rc > 0) { + SPDK_TRACELOG(SPDK_TRACE_RDMA, "CQ event handler, %d CQ completions\n", rc); + } else if (rc < 0) { + SPDK_ERRLOG("CQ event handler error!\n"); + return -1; + } + + return 0; +} + + +/** + +\brief This is the main routine for the nvmf connection work item. + +Serves mainly as a wrapper for the nvmf_execute_conn() function which +does the bulk of the work. This function handles connection cleanup when +NVMf application is exiting or there is an error on the connection. +It also drains the connection if the work item is being suspended to +move to a different reactor. + +*/ +static void +spdk_nvmf_conn_do_work(void *arg) +{ + struct spdk_nvmf_conn *conn = arg; + int rc; + + rc = nvmf_execute_conn(conn); + + if (rc != 0 || conn->state == CONN_STATE_EXITING || + conn->state == CONN_STATE_FABRIC_DISCONNECT) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "state exiting to shutdown\n"); + spdk_nvmf_conn_destruct(conn); + } +} + +static int +nvmf_allocate_reactor(uint64_t cpumask) +{ + int i, selected_core; + enum rte_lcore_state_t state; + int master_lcore = rte_get_master_lcore(); + int32_t num_pollers, min_pollers; + + cpumask &= spdk_app_get_core_mask(); + if (cpumask == 0) { + return 0; + } + + min_pollers = INT_MAX; + selected_core = 0; + + for (i = 0; i < RTE_MAX_LCORE; i++) { + if (!((1ULL << i) & cpumask)) { + continue; + } + + /* + * DPDK returns WAIT for the master lcore instead of RUNNING. + * So we always treat the reactor on master core as RUNNING. + */ + if (i == master_lcore) { + state = RUNNING; + } else { + state = rte_eal_get_lcore_state(i); + } + if (state == FINISHED) { + rte_eal_wait_lcore(i); + } + + switch (state) { + case WAIT: + case FINISHED: + /* Idle cores have 0 pollers */ + if (0 < min_pollers) { + selected_core = i; + min_pollers = 0; + } + break; + case RUNNING: + /* This lcore is running, check how many pollers it already has */ + num_pollers = rte_atomic32_read(&g_num_connections[i]); + + /* Fill each lcore to target minimum, else select least loaded lcore */ + if (num_pollers < (SPDK_NVMF_DEFAULT_NUM_SESSIONS_PER_LCORE * + g_nvmf_tgt.MaxConnectionsPerSession)) { + /* If fewer than the target number of session connections + * exist then add to this lcore + */ + return i; + } else if (num_pollers < min_pollers) { + /* Track the lcore that has the minimum number of pollers + * to be used if no lcores have already met our criteria + */ + selected_core = i; + min_pollers = num_pollers; + } + break; + } + } + + return selected_core; +} + diff --git a/lib/nvmf/conn.h b/lib/nvmf/conn.h new file mode 100644 index 0000000000..54ac2f739b --- /dev/null +++ b/lib/nvmf/conn.h @@ -0,0 +1,124 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NVMF_FABRIC_INTF_H +#define NVMF_FABRIC_INTF_H + +#include + +#include "spdk/event.h" +#include "nvmf_internal.h" +#include "spdk/queue.h" + +/* NVMF Connection States */ +enum nvmf_cstate { + NVMF_CSTATE_INIT, + NVMF_CSTATE_ACCEPT, + NVMF_CSTATE_CONNECTED, + NVMF_CSTATE_TERMINATING, + NVMF_CSTATE_DISCONNECTED, +}; + +/* + RDMA Connection Resouce Defaults + */ +#define NVMF_DEFAULT_TX_SGE 1 +#define NVMF_DEFAULT_RX_SGE 2 + +/* RDMA transport connection states */ +enum conn_state { + CONN_STATE_INVALID = 0, + CONN_STATE_RUNNING = 1, + CONN_STATE_FABRIC_DISCONNECT = 2, + CONN_STATE_OVERFLOW = 3, + CONN_STATE_EXITING = 4, +}; + +enum conn_type { + CONN_TYPE_AQ = 0, + CONN_TYPE_IOQ = 1, +}; + +struct spdk_nvmf_conn { + uint32_t is_valid; + + struct nvmf_session *sess; + + + /* + * values saved from fabric connect and private data + */ + uint8_t responder_resources; + uint8_t initiator_depth; + uint16_t sq_depth; + uint16_t cq_depth; + uint16_t qid; + uint16_t cntlid; + + enum conn_type type; + volatile enum conn_state state; + + uint16_t sq_head; + uint16_t sq_tail; + + struct rdma_cm_id *cm_id; + struct ibv_context *ctx; + struct ibv_comp_channel *comp_channel; + struct ibv_pd *pd; + struct ibv_cq *cq; + struct ibv_qp *qp; + + uint8_t pending_rdma_read_count; + STAILQ_HEAD(qp_pending_desc, nvme_qp_tx_desc) qp_pending_desc; + + STAILQ_HEAD(qp_rx_desc, nvme_qp_rx_desc) qp_rx_desc; + STAILQ_HEAD(qp_tx_desc, nvme_qp_tx_desc) qp_tx_desc; + STAILQ_HEAD(qp_tx_active_desc, nvme_qp_tx_desc) qp_tx_active_desc; + + TAILQ_ENTRY(spdk_nvmf_conn) link; + struct spdk_poller poller; +}; + +int spdk_initialize_nvmf_conns(int max_connections); + +void spdk_shutdown_nvmf_conns(void); + +struct spdk_nvmf_conn * +spdk_nvmf_allocate_conn(void); + +int spdk_nvmf_startup_conn(struct spdk_nvmf_conn *conn); + +struct spdk_nvmf_conn * +spdk_find_nvmf_conn_by_cm_id(struct rdma_cm_id *cm_id); + +#endif /* NVMF_FABRIC_INTF_H */ diff --git a/lib/nvmf/controller.c b/lib/nvmf/controller.c new file mode 100644 index 0000000000..bcee64262c --- /dev/null +++ b/lib/nvmf/controller.c @@ -0,0 +1,315 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "controller.h" +#include "spdk/conf.h" +#include "spdk/nvme.h" +#include "spdk/log.h" +#include "spdk/trace.h" + +static TAILQ_HEAD(, spdk_nvmf_ctrlr) g_ctrlrs = TAILQ_HEAD_INITIALIZER(g_ctrlrs); + +#define SPDK_NVMF_MAX_NVME_DEVICES 64 + +struct nvme_bdf_whitelist { + uint16_t domain; + uint8_t bus; + uint8_t dev; + uint8_t func; + char name[MAX_NVME_NAME_LENGTH]; +}; + +struct spdk_nvmf_probe_ctx { + bool claim_all; + int whitelist_count; + struct nvme_bdf_whitelist whitelist[SPDK_NVMF_MAX_NVME_DEVICES]; +}; + +static void +spdk_nvmf_complete_ctrlr_aer(struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_cpl *cpl) +{ + /* TODO: Temporarily disabled during refactoring. */ +#if 0 + struct spdk_nvmf_subsystem *subsystem; + struct nvmf_session *sess; + int i; + + + + /* + * Scan the whitelist for any subsystems claiming namespaces + * associated with this NVMe controller. + */ + for (i = 0; i < g_num_nvme_devices; i++) { + if (g_whitelist[i].ctrlr == ctrlr && + g_whitelist[i].subsystem != NULL) { + + subsystem = g_whitelist[i].subsystem; + TAILQ_FOREACH(sess, &subsystem->sessions, entries) { + if (sess->aer_req_state == NULL) { + continue; + } + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Process session AER request, sess %p, req_state %p\n", + sess, sess->aer_req_state); + nvmf_complete_cmd(sess->aer_req_state, cpl); + /* clear this AER from the session */ + sess->aer_req_state = NULL; + } + } + } +#endif +} + +static void +aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_nvme_ctrlr *ctrlr = arg; + + if (spdk_nvme_cpl_is_error(cpl)) { + fprintf(stderr, "Nvme AER failed!\n"); + return; + } + + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Nvme AER callback, log_page_id %x\n", + (cpl->cdw0 & 0xFF0000) >> 16); + + spdk_nvmf_complete_ctrlr_aer(ctrlr, cpl); +} + +static struct spdk_nvmf_ctrlr * +spdk_nvmf_ctrlr_create(char *name, int domain, int bus, int dev, int func, + struct spdk_nvme_ctrlr *ctrlr) +{ + struct spdk_nvmf_ctrlr *nvmf_ctrlr; + + nvmf_ctrlr = calloc(1, sizeof(struct spdk_nvmf_ctrlr)); + if (nvmf_ctrlr == NULL) { + return NULL; + } + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Found physical NVMe device. Name: %s\n", name); + + nvmf_ctrlr->ctrlr = ctrlr; + snprintf(nvmf_ctrlr->name, MAX_NVME_NAME_LENGTH, "%s", name); + + spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, ctrlr); + + TAILQ_INSERT_HEAD(&g_ctrlrs, nvmf_ctrlr, entry); + + return nvmf_ctrlr; +} + +static bool +probe_cb(void *cb_ctx, struct spdk_pci_device *dev, struct spdk_nvme_ctrlr_opts *opts) +{ + struct spdk_nvmf_probe_ctx *ctx = cb_ctx; + uint16_t found_domain = spdk_pci_device_get_domain(dev); + uint8_t found_bus = spdk_pci_device_get_bus(dev); + uint8_t found_dev = spdk_pci_device_get_dev(dev); + uint8_t found_func = spdk_pci_device_get_func(dev); + int i; + + SPDK_NOTICELOG("Probing device %x:%x:%x.%x\n", + found_domain, found_bus, found_dev, found_func); + + if (spdk_pci_device_has_non_uio_driver(dev)) { + SPDK_NOTICELOG("Skipping device %x:%x:%x.%x because it is bound to the kernel\n", + found_domain, found_bus, found_dev, found_func); + return false; + } + + if (ctx->claim_all) { + return true; + } + + for (i = 0; i < SPDK_NVMF_MAX_NVME_DEVICES; i++) { + if (found_domain == ctx->whitelist[i].domain && + found_bus == ctx->whitelist[i].bus && + found_dev == ctx->whitelist[i].dev && + found_func == ctx->whitelist[i].func) { + return true; + } + } + + return false; +} + +static void +attach_cb(void *cb_ctx, struct spdk_pci_device *dev, struct spdk_nvme_ctrlr *ctrlr, + const struct spdk_nvme_ctrlr_opts *opts) +{ + struct spdk_nvmf_probe_ctx *ctx = cb_ctx; + uint16_t found_domain = spdk_pci_device_get_domain(dev); + uint8_t found_bus = spdk_pci_device_get_bus(dev); + uint8_t found_dev = spdk_pci_device_get_dev(dev); + uint8_t found_func = spdk_pci_device_get_func(dev); + int i; + + SPDK_NOTICELOG("Attempting to claim device %x:%x:%x.%x\n", + found_domain, found_bus, found_dev, found_func); + + if (ctx->claim_all) { + /* If claim_all is true, whitelist_count can be repurposed here safely */ + char name[64]; + snprintf(name, 64, "Nvme%d", ctx->whitelist_count); + spdk_nvmf_ctrlr_create(name, found_domain, found_bus, + found_dev, found_func, ctrlr); + ctx->whitelist_count++; + return; + } + + for (i = 0; i < SPDK_NVMF_MAX_NVME_DEVICES; i++) { + if (found_domain == ctx->whitelist[i].domain && + found_bus == ctx->whitelist[i].bus && + found_dev == ctx->whitelist[i].dev && + found_func == ctx->whitelist[i].func) { + spdk_nvmf_ctrlr_create(ctx->whitelist[i].name, found_domain, found_bus, + found_dev, found_func, ctrlr); + return; + } + } + +} + +int +spdk_nvmf_init_nvme(void) +{ + struct spdk_conf_section *sp; + struct spdk_nvmf_probe_ctx ctx = { 0 }; + const char *val; + int i, rc; + + SPDK_NOTICELOG("*** Initialize NVMe Devices ***\n"); + sp = spdk_conf_find_section(NULL, "Nvme"); + if (sp == NULL) { + SPDK_ERRLOG("NVMe device section in config file not found!\n"); + return -1; + } + + val = spdk_conf_section_get_val(sp, "ClaimAllDevices"); + if (val != NULL) { + if (!strcmp(val, "Yes")) { + ctx.claim_all = true; + } + } + + if (!ctx.claim_all) { + for (i = 0; ; i++) { + unsigned int domain, bus, dev, func; + + val = spdk_conf_section_get_nmval(sp, "BDF", i, 0); + if (val == NULL) { + break; + } + + rc = sscanf(val, "%x:%x:%x.%x", &domain, &bus, &dev, &func); + if (rc != 4) { + SPDK_ERRLOG("Invalid format for BDF: %s\n", val); + return -1; + } + + ctx.whitelist[ctx.whitelist_count].domain = domain; + ctx.whitelist[ctx.whitelist_count].bus = bus; + ctx.whitelist[ctx.whitelist_count].dev = dev; + ctx.whitelist[ctx.whitelist_count].func = func; + + val = spdk_conf_section_get_nmval(sp, "BDF", i, 1); + if (val == NULL) { + SPDK_ERRLOG("BDF section with no device name\n"); + return -1; + } + + snprintf(ctx.whitelist[ctx.whitelist_count].name, MAX_NVME_NAME_LENGTH, "%s", val); + + ctx.whitelist_count++; + } + + if (ctx.whitelist_count == 0) { + SPDK_ERRLOG("No BDF section\n"); + return -1; + } + } + + /* Probe the physical NVMe devices */ + if (spdk_nvme_probe(&ctx, probe_cb, attach_cb)) { + SPDK_ERRLOG("One or more controllers failed in spdk_nvme_probe()\n"); + } + + /* check whether any nvme controller is probed */ + if (TAILQ_EMPTY(&g_ctrlrs)) { + SPDK_ERRLOG("No nvme controllers are probed\n"); + return -1; + } + + return 0; +} + +int +spdk_nvmf_shutdown_nvme(void) +{ + struct spdk_nvmf_ctrlr *ctrlr, *tctrlr; + + TAILQ_FOREACH_SAFE(ctrlr, &g_ctrlrs, entry, tctrlr) { + TAILQ_REMOVE(&g_ctrlrs, ctrlr, entry); + spdk_nvme_detach(ctrlr->ctrlr); + free(ctrlr); + } + + return 0; +} + +struct spdk_nvmf_ctrlr * +spdk_nvmf_ctrlr_claim(const char *name) +{ + struct spdk_nvmf_ctrlr *ctrlr, *tctrlr; + + if (name == NULL) { + return NULL; + } + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Attempting to claim NVMe controller %s\n", name); + + TAILQ_FOREACH_SAFE(ctrlr, &g_ctrlrs, entry, tctrlr) { + if (strncmp(ctrlr->name, name, MAX_NVME_NAME_LENGTH) == 0) { + if (ctrlr->claimed) { + SPDK_ERRLOG("Two subsystems are attempting to claim the same NVMe controller.\n"); + return NULL; + } + ctrlr->claimed = true; + return ctrlr; + } + } + + return NULL; +} diff --git a/lib/nvmf/controller.h b/lib/nvmf/controller.h new file mode 100644 index 0000000000..9c41c0c7e1 --- /dev/null +++ b/lib/nvmf/controller.h @@ -0,0 +1,57 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NVMF_CONTROLLER_H +#define NVMF_CONTROLLER_H + +#include + +#include "nvmf_internal.h" + +#define MAX_NVME_NAME_LENGTH 64 + +struct spdk_nvmf_ctrlr { + struct spdk_nvme_ctrlr *ctrlr; + char name[MAX_NVME_NAME_LENGTH]; + bool claimed; + TAILQ_ENTRY(spdk_nvmf_ctrlr) entry; +}; + +int spdk_nvmf_init_nvme(void); +int spdk_nvmf_shutdown_nvme(void); + +struct spdk_nvmf_ctrlr * +spdk_nvmf_ctrlr_claim(const char *name); + + +#endif diff --git a/lib/nvmf/framework.c b/lib/nvmf/framework.c new file mode 100644 index 0000000000..2a3d37a0b4 --- /dev/null +++ b/lib/nvmf/framework.c @@ -0,0 +1,370 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "spdk/log.h" +#include "spdk/conf.h" +#include "nvmf.h" +#include "conf.h" +#include "conn.h" +#include "controller.h" +#include "port.h" +#include "init_grp.h" +#include "rdma.h" +#include "subsystem_grp.h" +#include "spdk/trace.h" + + +#define MAX_SUBSYSTEMS 4 + +/* + * Define the global pool sizes for the maximum possible + * requests across all target connection queues. + * + * SPDK_NVMF_ADMINQ_POOL_SIZE: There is a single admin queue + * for each subsystem session. + * + * SPDK_NVMF_IOQ_POOL_SIZE: MaxConnectionsPerSession is config + * option that defines the total connection queues per session, + * so we -1 here to not account for the admin queue. + * + * SPDK_NVMF_DESC_POOL_SIZE: The total number of RDMA descriptors + * needed for all possible admin and I/O queue requests. + */ +#define SPDK_NVMF_ADMINQ_POOL_SIZE(spdk) (MAX_SUBSYSTEMS * \ + (spdk->MaxSessionsPerSubsystem) * \ + spdk->MaxQueueDepth) + +#define SPDK_NVMF_IOQ_POOL_SIZE(spdk) (MAX_SUBSYSTEMS * \ + (spdk->MaxSessionsPerSubsystem) * \ + (spdk->MaxConnectionsPerSession - 1) * \ + spdk->MaxQueueDepth) + +#define SPDK_NVMF_DESC_POOL_SIZE(spdk) (SPDK_NVMF_ADMINQ_POOL_SIZE(spdk) + \ + SPDK_NVMF_IOQ_POOL_SIZE(spdk)) + +#define SPDK_NVMF_MAX_CONNECTIONS(spdk) (MAX_SUBSYSTEMS * \ + ((spdk)->MaxSessionsPerSubsystem) * \ + ((spdk)->MaxConnectionsPerSession)) + +struct spdk_nvmf_globals g_nvmf_tgt; + +extern struct rte_mempool *request_mempool; + +static int +spdk_nvmf_initialize_pools(struct spdk_nvmf_globals *spdk_nvmf) +{ + SPDK_NOTICELOG("\n*** NVMf Pool Creation ***\n"); + + /* create NVMe backend request pool */ + spdk_nvmf->nvme_request_pool = rte_mempool_create("NVMe_Pool", + SPDK_NVMF_DESC_POOL_SIZE(spdk_nvmf), + spdk_nvme_request_size(), + 128, 0, + NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + if (!spdk_nvmf->nvme_request_pool) { + SPDK_ERRLOG("create NVMe request pool failed\n"); + return -1; + } + /* set global pointer for this pool referenced by libraries */ + request_mempool = spdk_nvmf->nvme_request_pool; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "NVMe request_mempool %p, size 0x%u bytes\n", + request_mempool, + (unsigned int)(SPDK_NVMF_DESC_POOL_SIZE(spdk_nvmf) * spdk_nvme_request_size())); + + spdk_nvmf->bb_small_pool = + rte_mempool_create("bb_small_pool", + SPDK_NVMF_ADMINQ_POOL_SIZE(spdk_nvmf), + SMALL_BB_MAX_SIZE, + 128, 0, + NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + if (!spdk_nvmf->bb_small_pool) { + SPDK_ERRLOG("create bb small pool failed\n"); + return -1; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Small data buffer pool %p, size 0x%x bytes\n", + spdk_nvmf->bb_small_pool, + (SPDK_NVMF_ADMINQ_POOL_SIZE(spdk_nvmf) * SMALL_BB_MAX_SIZE)); + + spdk_nvmf->bb_large_pool = + rte_mempool_create("bb_large_pool", + SPDK_NVMF_IOQ_POOL_SIZE(spdk_nvmf), + LARGE_BB_MAX_SIZE, + 32, 0, + NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + if (!spdk_nvmf->bb_large_pool) { + SPDK_ERRLOG("create bb large pool failed\n"); + return -1; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Large data buffer pool %p, size 0x%x bytes\n", + spdk_nvmf->bb_large_pool, + (SPDK_NVMF_IOQ_POOL_SIZE(spdk_nvmf) * LARGE_BB_MAX_SIZE)); + + spdk_nvmf->rx_desc_pool = + rte_mempool_create("RDMA RX Desc Pool", + SPDK_NVMF_DESC_POOL_SIZE(spdk_nvmf), + sizeof(struct nvme_qp_rx_desc), + 0, 0, + NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + if (!spdk_nvmf->rx_desc_pool) { + SPDK_ERRLOG("create RX Desc pool failed\n"); + return -1; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "RDMA Rx descriptor pool %p, size 0x%lx bytes\n", + spdk_nvmf->rx_desc_pool, + (SPDK_NVMF_DESC_POOL_SIZE(spdk_nvmf) * sizeof(struct nvme_qp_rx_desc))); + + spdk_nvmf->tx_desc_pool = + rte_mempool_create("RDMA TX Desc Pool", + SPDK_NVMF_DESC_POOL_SIZE(spdk_nvmf), + sizeof(struct nvme_qp_tx_desc), + 0, 0, + NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + if (!spdk_nvmf->tx_desc_pool) { + SPDK_ERRLOG("create TX Desc pool failed\n"); + return -1; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "RDMA Tx descriptor pool %p, size 0x%lx bytes\n", + spdk_nvmf->tx_desc_pool, + (SPDK_NVMF_DESC_POOL_SIZE(spdk_nvmf) * sizeof(struct nvme_qp_tx_desc))); + + return 0; +} + +static int spdk_nvmf_check_pool(struct rte_mempool *pool, uint32_t count) +{ + if (rte_mempool_count(pool) != count) { + SPDK_ERRLOG("rte_mempool_count(%s) == %d, should be %d\n", + pool->name, rte_mempool_count(pool), count); + return -1; + } else { + return 0; + } +} + +int +spdk_nvmf_check_pools(struct spdk_nvmf_globals *spdk_nvmf) +{ + int rc = 0; + + rc += spdk_nvmf_check_pool(spdk_nvmf->nvme_request_pool, SPDK_NVMF_DESC_POOL_SIZE(spdk_nvmf)); + rc += spdk_nvmf_check_pool(spdk_nvmf->rx_desc_pool, SPDK_NVMF_DESC_POOL_SIZE(spdk_nvmf)); + rc += spdk_nvmf_check_pool(spdk_nvmf->tx_desc_pool, SPDK_NVMF_DESC_POOL_SIZE(spdk_nvmf)); + rc += spdk_nvmf_check_pool(spdk_nvmf->bb_small_pool, SPDK_NVMF_ADMINQ_POOL_SIZE(spdk_nvmf)); + rc += spdk_nvmf_check_pool(spdk_nvmf->bb_large_pool, SPDK_NVMF_IOQ_POOL_SIZE(spdk_nvmf)); + + if (rc == 0) { + return 0; + } else { + return -1; + } +} + +int +nvmf_tgt_init(char *authfile, char *nodebase, int max_in_capsule_data, + int max_sessions_per_subsystem, + int max_queue_depth, int max_conn_per_sess, int max_recv_seg_len, int listen_port) +{ + int rc; + + g_nvmf_tgt.authfile = strdup(authfile); + if (!g_nvmf_tgt.authfile) { + SPDK_ERRLOG("No authfile provided\n"); + return -EINVAL; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "AuthFile: %s\n", g_nvmf_tgt.authfile); + + g_nvmf_tgt.nodebase = strdup(nodebase); + if (!g_nvmf_tgt.nodebase) { + SPDK_ERRLOG("No authfile provided\n"); + return -EINVAL; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "NodeBase: %s\n", g_nvmf_tgt.nodebase); + + if (max_in_capsule_data >= 16 && + max_in_capsule_data % 16 == 0 && + max_in_capsule_data <= SPDK_NVMF_MAX_RECV_DATA_TRANSFER_SIZE) { + g_nvmf_tgt.MaxInCapsuleData = max_in_capsule_data; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "MaxInCapsuleData: to %d\n", + g_nvmf_tgt.MaxInCapsuleData); + } else { + SPDK_ERRLOG("Invalid MaxInCapsuleData: %d\n", max_in_capsule_data); + return -EINVAL; + } + + if (max_sessions_per_subsystem >= 1 && + max_sessions_per_subsystem <= SPDK_NVMF_DEFAULT_MAX_SESSIONS_PER_SUBSYSTEM) { + g_nvmf_tgt.MaxSessionsPerSubsystem = max_sessions_per_subsystem; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "MaxSessionsPerSubsystem: %d\n", + g_nvmf_tgt.MaxSessionsPerSubsystem); + } else { + SPDK_ERRLOG("Invalid MaxSessionsPerSubsystem: %d\n", max_sessions_per_subsystem); + return -EINVAL; + } + + if (max_queue_depth >= 1 && + max_queue_depth <= SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH) { + g_nvmf_tgt.MaxQueueDepth = max_queue_depth; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "MaxQueueDepth: %d\n", + g_nvmf_tgt.MaxQueueDepth); + } else { + SPDK_ERRLOG("Invalid MaxQueueDepth: %d\n", max_queue_depth); + return -EINVAL; + } + + if (max_conn_per_sess >= 1 && + max_conn_per_sess <= SPDK_NVMF_DEFAULT_MAX_CONNECTIONS_PER_SESSION) { + g_nvmf_tgt.MaxConnectionsPerSession = max_conn_per_sess; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "MaxConnectionsPerSession: %d\n", + g_nvmf_tgt.MaxConnectionsPerSession); + } else { + SPDK_ERRLOG("Invalid MaxConnectionsPerSession: %d\n", max_conn_per_sess); + return -EINVAL; + } + + + g_nvmf_tgt.MaxRecvDataSegmentLength = max_recv_seg_len; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "MaxRecvDataSegmentLength %d\n", + g_nvmf_tgt.MaxRecvDataSegmentLength); + + rc = pthread_mutex_init(&g_nvmf_tgt.mutex, NULL); + if (rc != 0) { + SPDK_ERRLOG("mutex_init() failed\n"); + return -1; + } + + /* init nvmf specific config options */ + if (!g_nvmf_tgt.sin_port) { + g_nvmf_tgt.sin_port = htons(SPDK_NVMF_DEFAULT_SIN_PORT); + } + + rc = spdk_nvmf_initialize_pools(&g_nvmf_tgt); + if (rc != 0) { + SPDK_ERRLOG("spdk_nvmf_initialize_pools() failed\n"); + return rc; + } + + return 0; +} + +static int +nvmf_tgt_subsystem_initialize(void) +{ + int rc; + + /* initialize from configuration file */ + rc = spdk_nvmf_parse_conf(); + if (rc < 0) { + SPDK_ERRLOG("spdk_nvmf_parse_conf() failed\n"); + return rc; + } + + /* initialize with the NVMf transport */ + rc = nvmf_rdma_init(); + if (rc <= 0) { + SPDK_ERRLOG("nvmf_rdma_init() failed\n"); + return rc; + } + /* initialize NVMe/NVMf backend */ + + SPDK_NOTICELOG("\n*** NVMf Library Init ***\n"); + rc = nvmf_initialize(); + if (rc < 0) { + SPDK_ERRLOG("nvmf_initialize() failed\n"); + return rc; + } + + rc = spdk_nvmf_init_nvme(); + if (rc < 0) { + fprintf(stderr, "NVMf could not initialize NVMe devices.\n"); + return -1; + } + + rc = spdk_initialize_nvmf_subsystems(); + if (rc < 0) { + SPDK_ERRLOG("spdk_initialize_nvmf_subsystems failed\n"); + return rc; + } + rc = spdk_initialize_nvmf_conns(SPDK_NVMF_MAX_CONNECTIONS(&g_nvmf_tgt)); + if (rc < 0) { + SPDK_ERRLOG("spdk_initialize_nvmf_conns() failed\n"); + return rc; + } + return rc; +} + +static int +nvmf_tgt_subsystem_fini(void) +{ + spdk_shutdown_nvmf_subsystems(); + nvmf_shutdown(); + nvmf_initiator_group_array_destroy(); + spdk_nvmf_port_destroy_all(); + free(g_nvmf_tgt.authfile); + free(g_nvmf_tgt.nodebase); + + pthread_mutex_destroy(&g_nvmf_tgt.mutex); + return 0; +} + + +SPDK_SUBSYSTEM_REGISTER(nvmf, nvmf_tgt_subsystem_initialize, nvmf_tgt_subsystem_fini, NULL) + +SPDK_TRACE_REGISTER_FN(nvmf_trace) +{ + spdk_trace_register_object(OBJECT_NVMF_IO, 'r'); + spdk_trace_register_description("NVMF_IO_START", "", TRACE_NVMF_IO_START, + OWNER_NONE, OBJECT_NVMF_IO, 1, 0, 0, ""); + spdk_trace_register_description("NVMF_RDMA_READ_START", "", TRACE_RDMA_READ_START, + OWNER_NONE, OBJECT_NVMF_IO, 0, 0, 0, ""); + spdk_trace_register_description("NVMF_RDMA_WRITE_START", "", TRACE_RDMA_WRITE_START, + OWNER_NONE, OBJECT_NVMF_IO, 0, 0, 0, ""); + spdk_trace_register_description("NVMF_RDMA_READ_COMPLETE", "", TRACE_RDMA_READ_COMPLETE, + OWNER_NONE, OBJECT_NVMF_IO, 0, 0, 0, ""); + spdk_trace_register_description("NVMF_RDMA_WRITE_COMPLETE", "", TRACE_RDMA_WRITE_COMPLETE, + OWNER_NONE, OBJECT_NVMF_IO, 0, 0, 0, ""); + spdk_trace_register_description("NVMF_LIB_READ_START", "", TRACE_NVMF_LIB_READ_START, + OWNER_NONE, OBJECT_NVMF_IO, 0, 0, 0, ""); + spdk_trace_register_description("NVMF_LIB_WRITE_START", "", TRACE_NVMF_LIB_WRITE_START, + OWNER_NONE, OBJECT_NVMF_IO, 0, 0, 0, ""); + spdk_trace_register_description("NVMF_LIB_COMPLETE", "", TRACE_NVMF_LIB_COMPLETE, + OWNER_NONE, OBJECT_NVMF_IO, 0, 0, 0, ""); + spdk_trace_register_description("NVMF_IO_COMPLETION_DONE", "", TRACE_NVMF_IO_COMPLETE, + OWNER_NONE, OBJECT_NVMF_IO, 0, 0, 0, ""); +} diff --git a/lib/nvmf/init_grp.c b/lib/nvmf/init_grp.c new file mode 100644 index 0000000000..4ac937dcef --- /dev/null +++ b/lib/nvmf/init_grp.c @@ -0,0 +1,299 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "init_grp.h" +#include "nvmf.h" +#include "subsystem_grp.h" +#include "spdk/log.h" +#include "spdk/trace.h" + +#define MAX_MASKBUF 128 +#define MAX_INITIATOR 8 +#define MAX_INITIATOR_GROUP 32 + +#define MAX_ADDRBUF 64 +#define MAX_INITIATOR_ADDR (MAX_ADDRBUF) +#define MAX_INITIATOR_NAME 256 +#define MAX_NETMASK 256 + +static TAILQ_HEAD(, spdk_nvmf_init_grp) g_ig_head = TAILQ_HEAD_INITIALIZER(g_ig_head); + +struct spdk_nvmf_init_grp * +spdk_nvmf_init_grp_create(int tag, + int num_netmasks, + char **netmasks) +{ + int i; + struct spdk_nvmf_init_grp *ig = NULL; + + /* Make sure there are no duplicate initiator group tags */ + if (nvmf_initiator_group_find_by_tag(tag)) { + SPDK_ERRLOG("Initiator group creation failed due to duplicate initiator group tag (%d)\n", + tag); + return NULL; + } + + if (num_netmasks > MAX_NETMASK) { + SPDK_ERRLOG("%d > MAX_NETMASK\n", num_netmasks); + return NULL; + } + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, + "add initiator group (from initiator list) tag=%d, #masks=%d\n", + tag, num_netmasks); + + ig = calloc(1, sizeof(*ig)); + if (!ig) { + SPDK_ERRLOG("initiator group malloc error (%d)\n", tag); + return NULL; + } + + ig->tag = tag; + + ig->nnetmasks = num_netmasks; + ig->netmasks = netmasks; + for (i = 0; i < num_netmasks; i++) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Netmask %s\n", ig->netmasks[i]); + } + + ig->state = GROUP_INIT; + + pthread_mutex_lock(&g_nvmf_tgt.mutex); + ig->state = GROUP_READY; + TAILQ_INSERT_TAIL(&g_ig_head, ig, tailq); + pthread_mutex_unlock(&g_nvmf_tgt.mutex); + + return ig; +} + +static void +nvmf_initiator_group_destroy(struct spdk_nvmf_init_grp *ig) +{ +#if 0 // TODO: fix bogus scan-build warning about use-after-free + int i; + + if (!ig) { + return; + } + + for (i = 0; i < ig->nnetmasks; i++) { + free(ig->netmasks[i]); + } + + free(ig->netmasks); + free(ig); +#endif +} + + +static int +spdk_nvmf_allow_ipv6(const char *netmask, const char *addr) +{ + struct in6_addr in6_mask; + struct in6_addr in6_addr; + char mask[MAX_MASKBUF]; + const char *p; + size_t n; + int bits, bmask; + int i; + + if (netmask[0] != '[') + return 0; + p = strchr(netmask, ']'); + if (p == NULL) + return 0; + n = p - (netmask + 1); + if (n + 1 > sizeof mask) + return 0; + + memcpy(mask, netmask + 1, n); + mask[n] = '\0'; + p++; + + if (p[0] == '/') { + bits = (int) strtol(p + 1, NULL, 10); + if (bits < 0 || bits > 128) + return 0; + } else { + bits = 128; + } + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "input %s\n", addr); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "mask %s / %d\n", mask, bits); + + /* presentation to network order binary */ + if (inet_pton(AF_INET6, mask, &in6_mask) <= 0 + || inet_pton(AF_INET6, addr, &in6_addr) <= 0) { + return 0; + } + + /* check 128bits */ + for (i = 0; i < (bits / 8); i++) { + if (in6_mask.s6_addr[i] != in6_addr.s6_addr[i]) + return 0; + } + if (bits % 8 && i < (MAX_MASKBUF / 8)) { + bmask = (0xffU << (8 - (bits % 8))) & 0xffU; + if ((in6_mask.s6_addr[i] & bmask) != (in6_addr.s6_addr[i] & bmask)) + return 0; + } + + /* match */ + return 1; +} + +static int +spdk_nvmf_allow_ipv4(const char *netmask, const char *addr) +{ + struct in_addr in4_mask; + struct in_addr in4_addr; + char mask[MAX_MASKBUF]; + const char *p; + uint32_t bmask; + size_t n; + int bits; + + p = strchr(netmask, '/'); + if (p == NULL) { + p = netmask + strlen(netmask); + } + n = p - netmask; + if (n + 1 > sizeof mask) + return 0; + + memcpy(mask, netmask, n); + mask[n] = '\0'; + + if (p[0] == '/') { + bits = (int) strtol(p + 1, NULL, 10); + if (bits < 0 || bits > 32) + return 0; + } else { + bits = 32; + } + + /* presentation to network order binary */ + if (inet_pton(AF_INET, mask, &in4_mask) <= 0 + || inet_pton(AF_INET, addr, &in4_addr) <= 0) { + return 0; + } + + /* check 32bits */ + bmask = (0xffffffffULL << (32 - bits)) & 0xffffffffU; + if ((ntohl(in4_mask.s_addr) & bmask) != (ntohl(in4_addr.s_addr) & bmask)) + return 0; + + /* match */ + return 1; +} + +static int +spdk_nvmf_allow_netmask(const char *netmask, const char *addr) +{ + if (netmask == NULL || addr == NULL) + return 0; + if (strcasecmp(netmask, "ALL") == 0) + return 1; + if (netmask[0] == '[') { + /* IPv6 */ + if (spdk_nvmf_allow_ipv6(netmask, addr)) + return 1; + } else { + /* IPv4 */ + if (spdk_nvmf_allow_ipv4(netmask, addr)) + return 1; + } + return 0; +} + +struct spdk_nvmf_init_grp * +nvmf_initiator_group_find_by_addr(char *addr) +{ + struct spdk_nvmf_init_grp *ig; + int i; + int rc; + + if (addr == NULL) + return NULL; + + TAILQ_FOREACH(ig, &g_ig_head, tailq) { + /* check netmask of each group looking for permission */ + for (i = 0; i < ig->nnetmasks; i++) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "netmask=%s, addr=%s\n", + ig->netmasks[i], addr); + rc = spdk_nvmf_allow_netmask(ig->netmasks[i], addr); + if (rc > 0) { + /* OK netmask */ + return ig; + } + } + } + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "No initiator group addr match for %s\n", + addr); + return NULL; +} + +struct spdk_nvmf_init_grp * +nvmf_initiator_group_find_by_tag(int tag) +{ + struct spdk_nvmf_init_grp *ig; + + TAILQ_FOREACH(ig, &g_ig_head, tailq) { + if (ig->tag == tag) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " found initiator group with tag: ig %p\n", ig); + return ig; + } + } + + return NULL; +} + +void +nvmf_initiator_group_array_destroy(void) +{ + struct spdk_nvmf_init_grp *ig; + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Enter\n"); + pthread_mutex_lock(&g_nvmf_tgt.mutex); + while (!TAILQ_EMPTY(&g_ig_head)) { + ig = TAILQ_FIRST(&g_ig_head); + ig->state = GROUP_DESTROY; + TAILQ_REMOVE(&g_ig_head, ig, tailq); + nvmf_initiator_group_destroy(ig); + } + pthread_mutex_unlock(&g_nvmf_tgt.mutex); +} diff --git a/lib/nvmf/init_grp.h b/lib/nvmf/init_grp.h new file mode 100644 index 0000000000..2edc770006 --- /dev/null +++ b/lib/nvmf/init_grp.h @@ -0,0 +1,64 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NVMF_INIT_GRP_H +#define NVMF_INIT_GRP_H + +#include "spdk/queue.h" +#include "port.h" // For enum group_state + +struct spdk_nvmf_init_grp { + int nnetmasks; + char **netmasks; + int ref; + int tag; + enum group_state state; + TAILQ_ENTRY(spdk_nvmf_init_grp) tailq; +}; + +/* NVMf Initiator Group management API */ +struct spdk_nvmf_init_grp * +spdk_nvmf_init_grp_create(int tag, + int num_netmasks, + char **netmasks); + +struct spdk_nvmf_init_grp * +nvmf_initiator_group_find_by_tag(int tag); + +struct spdk_nvmf_init_grp * +nvmf_initiator_group_find_by_addr(char *addr); + +void +nvmf_initiator_group_array_destroy(void); + +#endif /* NVMF_INIT_GRP_H */ diff --git a/lib/nvmf/nvmf.c b/lib/nvmf/nvmf.c new file mode 100644 index 0000000000..188ce305f4 --- /dev/null +++ b/lib/nvmf/nvmf.c @@ -0,0 +1,91 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "conn.h" +#include "controller.h" +#include "port.h" +#include "init_grp.h" +#include "nvmf_internal.h" +#include "nvmf.h" +#include "rdma.h" +#include "subsystem_grp.h" +#include "spdk/conf.h" +#include "spdk/log.h" +#include "spdk/pci.h" +#include "spdk/trace.h" + +SPDK_LOG_REGISTER_TRACE_FLAG("nvmf", SPDK_TRACE_NVMF) + +/* + * The NVMf library maintains context for a list of subsystems. Each + * subsystem will be associated with one or more NVMe controllers + * that the library discovers. It is expected that the NVMf library + * consumer will make requests to create the desired subsystems. + */ +struct nvmf_driver { + pthread_mutex_t mutex; +}; + +static struct nvmf_driver g_nvmf_driver; + +extern struct rte_mempool *request_mempool; + +int +nvmf_initialize(void) +{ + struct nvmf_driver *system = &g_nvmf_driver; + int err; + + if (request_mempool == NULL) { + fprintf(stderr, "NVMf application has not created request mempool!\n"); + return -1; + } + + err = pthread_mutex_init(&system->mutex, NULL); + if (err < 0) { + fprintf(stderr, "NVMf system pthread_mutex_init() failed\n"); + return -1; + } + + return 0; +} + +void +nvmf_shutdown(void) +{ + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_shutdown\n"); + + spdk_nvmf_shutdown_nvme(); +} diff --git a/lib/nvmf/nvmf.h b/lib/nvmf/nvmf.h new file mode 100644 index 0000000000..402dc79fd7 --- /dev/null +++ b/lib/nvmf/nvmf.h @@ -0,0 +1,123 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __NVMF_H__ +#define __NVMF_H__ + +#include +#include +#include + +#include +#include + +#include "spdk/nvmf_spec.h" +#include "spdk/nvme.h" + +/** + * \file + * + */ + +#define SMALL_BB_MAX_SIZE 4096 +#define LARGE_BB_MAX_SIZE (128 * 1024) + +/* + * NVMf target supports a maximum transfer size that is equal to + * a single allocated bounce buffer per request. + */ +#define SPDK_NVMF_MAX_RECV_DATA_TRANSFER_SIZE LARGE_BB_MAX_SIZE + + +#define SPDK_NVMF_BUILD_ETC "/usr/local/etc/nvmf" +#define SPDK_NVMF_DEFAULT_NUM_SESSIONS_PER_LCORE 1 +#define SPDK_NVMF_DEFAULT_AUTHFILE SPDK_NVMF_BUILD_ETC "/auth.conf" +#define SPDK_NVMF_DEFAULT_NODEBASE "iqn.2013-10.com.intel.spdk" +#define SPDK_NVMF_DEFAULT_IN_CAPSULE_DATA_SIZE 1024 +#define SPDK_NVMF_DEFAULT_MAX_SESSIONS_PER_SUBSYSTEM 1 +#define SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH 128 +#define SPDK_NVMF_DEFAULT_MAX_CONNECTIONS_PER_SESSION 4 +#define SPDK_NVMF_DEFAULT_SIN_PORT ((uint16_t)7174) + +#define OBJECT_NVMF_IO 0x30 + +#define TRACE_GROUP_NVMF 0x3 +#define TRACE_NVMF_IO_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF, 0x0) +#define TRACE_RDMA_READ_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF, 0x1) +#define TRACE_RDMA_WRITE_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF, 0x2) +#define TRACE_RDMA_READ_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF, 0x3) +#define TRACE_RDMA_WRITE_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF, 0x4) +#define TRACE_NVMF_LIB_READ_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF, 0x5) +#define TRACE_NVMF_LIB_WRITE_START SPDK_TPOINT_ID(TRACE_GROUP_NVMF, 0x6) +#define TRACE_NVMF_LIB_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF, 0x7) +#define TRACE_NVMF_IO_COMPLETE SPDK_TPOINT_ID(TRACE_GROUP_NVMF, 0x8) + +struct spdk_nvmf_globals { + char *authfile; + + char *nodebase; + + pthread_mutex_t mutex; + + int MaxInCapsuleData; + int MaxSessionsPerSubsystem; + int MaxQueueDepth; + int MaxConnectionsPerSession; + int MaxRecvDataSegmentLength; + + struct rte_mempool *rx_desc_pool; + struct rte_mempool *tx_desc_pool; + struct rte_mempool *nvme_request_pool; + struct rte_mempool *bb_small_pool; + struct rte_mempool *bb_large_pool; + uint16_t sin_port; +}; + +extern struct spdk_nvmf_globals g_nvmf_tgt; + +int +nvmf_tgt_init(char *authfile, char *nodebase, int max_in_capsule_data, int max_sessions, + int max_queue_depth, int max_conn_per_sess, int max_recv_seg_len, int listen_port); + +struct nvmf_request; + +int +spdk_nvmf_check_pools(struct spdk_nvmf_globals *spdk_nvmf); + +int +nvmf_initialize(void); + +void +nvmf_shutdown(void); + +#endif /* __NVMF_H__ */ diff --git a/lib/nvmf/nvmf_admin_cmd.c b/lib/nvmf/nvmf_admin_cmd.c new file mode 100644 index 0000000000..6f9791c47d --- /dev/null +++ b/lib/nvmf/nvmf_admin_cmd.c @@ -0,0 +1,338 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "nvmf.h" +#include "nvmf_internal.h" +#include "session.h" +#include "subsystem_grp.h" +#include "spdk/log.h" +#include "spdk/nvme.h" +#include "spdk/nvme_spec.h" +#include "spdk/pci.h" +#include "spdk/trace.h" + +extern struct rte_mempool *request_mempool; + +int +nvmf_process_admin_cmd(struct nvmf_session *session, + struct spdk_nvme_cmd *cmd, + void *buf, uint32_t len, + struct nvmf_request *req_state) +{ + struct spdk_nvme_cpl *response; + struct spdk_nvmf_subsystem *subsystem = session->subsys; + struct spdk_nvme_ctrlr *ctrlr = NULL; + uint32_t nsid = 0; + int rc = 0; + uint8_t feature; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_admin_cmd: req_state %p\n", + req_state); + + /* pre-set response details for this command */ + response = &req_state->rsp->nvme_cpl; + response->status.sc = SPDK_NVME_SC_SUCCESS; + response->cid = cmd->cid; + + /* verify subsystem */ + if (subsystem == NULL) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_admin_cmd: Subsystem Not Initialized!\n"); + response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return -1; + } + + if (cmd->nsid == 0) { + /* may be valid for the requested command. but need + to at least map to a known valid controller. + Note: Issue when in multi-controller subsystem + mode, commands that do not provide ns_id can not + be mapped to valid HW ctrlr! This is where + definition of a virtual controller is required */ + ctrlr = subsystem->ns_list_map[0].ctrlr; + nsid = 0; + } else { + /* verify namespace id */ + if (cmd->nsid > MAX_PER_SUBSYSTEM_NAMESPACES) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_admin_cmd: Invalid NS_ID %x\n", + cmd->nsid); + response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; + return -1; + } + + ctrlr = subsystem->ns_list_map[cmd->nsid - 1].ctrlr; + nsid = subsystem->ns_list_map[cmd->nsid - 1].nvme_ns_id; + } + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_admin_cmd: ctrlr %p nvme ns_id %d\n", ctrlr, nsid); + + switch (cmd->opc) { + case SPDK_NVME_OPC_IDENTIFY: + if (buf == NULL) { + SPDK_ERRLOG("identify command with no buffer\n"); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + break; + } + if (cmd->cdw10 == 0) { + /* identify namespace */ + struct spdk_nvme_ns *ns; + const struct spdk_nvme_ns_data *nsdata; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Identify Namespace\n"); + if (nsid == 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_admin_cmd: Invalid NS_ID = 0\n"); + response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; + rc = -1; + break; + } + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Unsuccessful query for Namespace reference\n"); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + break; + } + nsdata = spdk_nvme_ns_get_data(ns); + memcpy((char *)buf, (char *)nsdata, sizeof(struct spdk_nvme_ns_data)); + req_state->cb_fn(req_state); + } else if (cmd->cdw10 == 1) { + /* identify controller */ + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Identify Controller\n"); + /* pull from virtual controller context */ + memcpy(buf, (char *)&session->vcdata, sizeof(struct spdk_nvme_ctrlr_data)); + req_state->cb_fn(req_state); + } else { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Identify Namespace List\n"); + response->status.sc = SPDK_NVME_SC_INVALID_OPCODE; + rc = -1; + } + break; + case SPDK_NVME_OPC_DELETE_IO_SQ: { + uint16_t qid = cmd->cdw10 & 0xffff; + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Delete IO SQ, QID %x\n", qid); + + if (qid >= MAX_SESSION_IO_QUEUES) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Exceeded Session QP Index Limit\n"); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + } else if (session->qps[qid].sq_active == 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Session SQ QP Index %x was not active!\n", qid); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + } else { + session->qps[qid].sq_size = 0; + session->qps[qid].sq_active = 0; + if (session->qps[qid].cq_active) + session->active_queues--; + rc = 1; + } + } + break; + case SPDK_NVME_OPC_DELETE_IO_CQ: { + uint16_t qid = cmd->cdw10 & 0xffff; + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Delete IO CQ, QID %x\n", qid); + + if (qid >= MAX_SESSION_IO_QUEUES) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Exceeded Session QP Index Limit\n"); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + } else if (session->qps[qid].cq_active == 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Session CQ QP Index %x was not active!\n", qid); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + } else { + session->qps[qid].cq_size = 0; + session->qps[qid].cq_active = 0; + if (session->qps[qid].sq_active) + session->active_queues--; + rc = 1; + } + } + break; + case SPDK_NVME_OPC_CREATE_IO_SQ: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Create IO SQ\n"); + /* queues have already been initialized for this session. + so for now save details in the session for which QPs + the remote host attempts to enable. + */ + { + uint16_t qid = cmd->cdw10 & 0xffff; + uint16_t qsize = cmd->cdw10 >> 16; + SPDK_TRACELOG(SPDK_TRACE_NVMF, " QID %x, Queue Size %x, CDW11 %x\n", + qid, qsize, cmd->cdw11); + + if (qid >= MAX_SESSION_IO_QUEUES) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Exceeded Session QP Index Limit\n"); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + } else if (session->qps[qid].sq_active > 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Session SQ QP Index %x Already active!\n", qid); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + } else { + session->qps[qid].sq_size = qsize; + session->qps[qid].sq_active = 1; + if (session->qps[qid].cq_active) + session->active_queues++; + rc = 1; + } + } + break; + case SPDK_NVME_OPC_CREATE_IO_CQ: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Create IO CQ\n"); + /* queues have already been initialized for this session. + so for now save details in the session for which QPs + the remote host attempts to enable. + */ + { + uint16_t qid = cmd->cdw10 & 0xffff; + uint16_t qsize = cmd->cdw10 >> 16; + SPDK_TRACELOG(SPDK_TRACE_NVMF, " QID %x, Queue Size %x, CDW11 %x\n", + qid, qsize, cmd->cdw11); + + if (qid >= MAX_SESSION_IO_QUEUES) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Exceeded Session QP Index Limit\n"); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + } else if (session->qps[qid].cq_active > 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, " Session CQ QP Index %x Already active!\n", qid); + response->status.sc = SPDK_NVME_SC_INVALID_FIELD; + rc = -1; + } else { + session->qps[qid].cq_size = qsize; + session->qps[qid].cq_active = 1; + if (session->qps[qid].sq_active) + session->active_queues++; + rc = 1; + } + } + break; + case SPDK_NVME_OPC_GET_FEATURES: + feature = cmd->cdw10 & 0xff; /* mask out the FID value */ + switch (feature) { + case SPDK_NVME_FEAT_NUMBER_OF_QUEUES: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Get Features - Number of Queues\n"); + response->cdw0 = ((session->max_io_queues - 1) << 16) | (session->max_io_queues - 1); + rc = 1; /* immediate completion */ + break; + case SPDK_NVME_FEAT_LBA_RANGE_TYPE: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Get Features - LBA Range Type\n"); + cmd->nsid = nsid; + goto passthrough; + break; + default: + goto passthrough; + break; + } + break; + case SPDK_NVME_OPC_SET_FEATURES: + feature = cmd->cdw10 & 0xff; /* mask out the FID value */ + switch (feature) { + case SPDK_NVME_FEAT_NUMBER_OF_QUEUES: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Set Features - Number of Queues, cdw11 %x\n", cmd->cdw11); + + /* verify that the contoller is ready to process commands */ + if (session->active_queues != 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Queue pairs already active!\n"); + response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR; + } else { + response->cdw0 = ((session->max_io_queues - 1) << 16) | (session->max_io_queues - 1); + } + rc = 1; /* immediate completion */ + break; + default: + goto passthrough; + break; + } + break; + case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Async Event Request\n"); + /* + Trap request here and save in the session context + until NVMe library indicates some event. + */ + if (session->aer_req_state == NULL) { + session->aer_req_state = req_state; + } else { + /* AER already recorded, send error response */ + SPDK_TRACELOG(SPDK_TRACE_NVMF, "AER already active!\n"); + response->status.sc = SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED; + rc = 1; /* immediate completion */ + } + break; + case SPDK_NVME_OPC_KEEP_ALIVE: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Keep Alive\n"); + /* + To handle keep alive just clear or reset the + session based keep alive duration counter. + When added, a separate timer based process + will monitor if the time since last recorded + keep alive has exceeded the max duration and + take appropriate action. + */ + //session->keep_alive_timestamp = ; + rc = 1; /* immediate completion */ + break; + default: +passthrough: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "RAW Passthrough: Admin Opcode %x for ctrlr %p\n", + cmd->opc, ctrlr); + cmd->nsid = nsid; + spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, + cmd, + buf, len, + nvmf_complete_cmd, + (void *)req_state); + break; + } + + return rc; +} + +void +nvmf_check_admin_completions(struct nvmf_session *session) +{ + struct spdk_nvmf_subsystem *subsystem = session->subsys; + struct spdk_nvme_ctrlr *ctrlr, *prev_ctrlr = NULL; + int i; + + for (i = 0; i < MAX_PER_SUBSYSTEM_NAMESPACES; i++) { + ctrlr = subsystem->ns_list_map[i].ctrlr; + if (ctrlr == NULL) + continue; + if (ctrlr != NULL && ctrlr != prev_ctrlr) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + prev_ctrlr = ctrlr; + } + } +} + diff --git a/lib/nvmf/nvmf_internal.h b/lib/nvmf/nvmf_internal.h new file mode 100644 index 0000000000..b3be2d23aa --- /dev/null +++ b/lib/nvmf/nvmf_internal.h @@ -0,0 +1,131 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __NVMF_INTERNAL_H__ +#define __NVMF_INTERNAL_H__ + +#include +#include +#include +#include +#include + +#include "spdk/nvmf_spec.h" +#include "spdk/assert.h" +#include "spdk/queue.h" + +#define nvmf_min(a,b) (((a)<(b))?(a):(b)) + +struct nvmf_request; + +typedef void (*nvmf_cb_fn_t)(struct nvmf_request *); + +union sgl_shift { + struct spdk_nvmf_keyed_sgl_descriptor nvmf_sgl; + struct spdk_nvme_sgl_descriptor nvme_sgl; +}; +SPDK_STATIC_ASSERT(sizeof(union sgl_shift) == 16, "Incorrect size"); + +union nvmf_h2c_msg { + struct spdk_nvmf_capsule_cmd nvmf_cmd; + struct spdk_nvme_cmd nvme_cmd; + struct spdk_nvmf_fabric_prop_set_cmd prop_set_cmd; + struct spdk_nvmf_fabric_prop_get_cmd prop_get_cmd; + struct spdk_nvmf_fabric_connect_cmd connect_cmd; +}; +SPDK_STATIC_ASSERT(sizeof(union nvmf_h2c_msg) == 64, "Incorrect size"); + +union nvmf_c2h_msg { + struct spdk_nvmf_capsule_rsp nvmf_rsp; + struct spdk_nvme_cpl nvme_cpl; + struct spdk_nvmf_fabric_prop_set_rsp prop_set_rsp; + struct spdk_nvmf_fabric_prop_get_rsp prop_get_rsp; + struct spdk_nvmf_fabric_connect_rsp connect_rsp; +}; +SPDK_STATIC_ASSERT(sizeof(union nvmf_c2h_msg) == 16, "Incorrect size"); + +#define NVMF_H2C_MAX_MSG (sizeof(union nvmf_h2c_msg)) +#define NVMF_C2H_MAX_MSG (sizeof(union nvmf_c2h_msg)) + +#define NVMF_CNTLID_SUBS_SHIFT 8 + +enum pending_rdma_action { + NVMF_PENDING_NONE = 0, + NVMF_PENDING_CONNECT, + NVMF_PENDING_READ, + NVMF_PENDING_WRITE, + NVMF_PENDING_ADMIN, +}; + +struct nvmf_request { + struct nvmf_session *session; + void *fabric_tx_ctx; + void *fabric_rx_ctx; + uint16_t cid; /* command identifier */ + uint64_t remote_addr; + uint32_t rkey; + uint32_t length; + union nvmf_h2c_msg *cmd; + union nvmf_c2h_msg *rsp; + enum pending_rdma_action pending; + nvmf_cb_fn_t cb_fn; + + TAILQ_ENTRY(nvmf_request) entries; +}; + +/* + * Some NVMe command definitions not provided in the nvme_spec.h file + */ + +/* read command dword 12 */ +struct __attribute__((packed)) nvme_read_cdw12 { + uint16_t nlb; /* number of logical blocks */ + uint16_t rsvd : 10; + uint8_t prinfo : 4; /* protection information field */ + uint8_t fua : 1; /* force unit access */ + uint8_t lr : 1; /* limited retry */ +}; + +/* read command dword 13 */ +struct __attribute__((packed)) nvme_read_cdw13 { + uint8_t dsm_af : 4; /* access frequency */ + uint8_t dsm_lat : 2; /* access latency */ + uint8_t dsm_seq : 1; /* sequential request */ + uint8_t dsm_inc : 1; /* incompressible */ + uint8_t rsvd[3]; +}; + +void +nvmf_complete_cmd(void *rsp, const struct spdk_nvme_cpl *cmp); + +#endif /* __NVMF_INTERNAL_H__ */ diff --git a/lib/nvmf/nvmf_io_cmd.c b/lib/nvmf/nvmf_io_cmd.c new file mode 100644 index 0000000000..785b2ff846 --- /dev/null +++ b/lib/nvmf/nvmf_io_cmd.c @@ -0,0 +1,161 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "nvmf.h" +#include "nvmf_internal.h" +#include "session.h" +#include "subsystem_grp.h" +#include "spdk/log.h" +#include "spdk/nvme.h" +#include "spdk/nvme_spec.h" +#include "spdk/pci.h" +#include "spdk/trace.h" + +extern struct rte_mempool *request_mempool; + +int +nvmf_process_io_cmd(struct nvmf_session *session, + struct spdk_nvme_cmd *cmd, + void *buf, uint32_t len, + struct nvmf_request *req_state) +{ + struct spdk_nvme_cpl *response; + struct spdk_nvmf_subsystem *subsystem = session->subsys; + struct spdk_nvmf_namespace *nvmf_ns; + struct spdk_nvme_ctrlr *ctrlr = NULL; + struct spdk_nvme_ns *ns = NULL; + struct spdk_nvme_qpair *qpair; + uint32_t nsid = 0; + struct nvme_read_cdw12 *cdw12; + uint64_t lba_address; + uint32_t lba_count; + uint32_t io_flags; + int rc = 0; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_io_cmd: req_state %p\n", req_state); + + /* pre-set response details for this command */ + response = &req_state->rsp->nvme_cpl; + response->status.sc = SPDK_NVME_SC_SUCCESS; + response->cid = cmd->cid; + + /* verify subsystem */ + if (subsystem == NULL) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_io_cmd: Subsystem Not Initialized!\n"); + response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + return -1; + } + + /* verify that the contoller is ready to process commands */ + if (session->vcprop.csts.bits.rdy == 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_io_cmd: Subsystem Controller Not Ready!\n"); + response->status.sc = SPDK_NVME_SC_NAMESPACE_NOT_READY; + return -1; + } + + /* verify namespace id */ + if (cmd->nsid == 0 || cmd->nsid > MAX_PER_SUBSYSTEM_NAMESPACES) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_io_cmd: Invalid NS_ID %x\n", + cmd->nsid); + response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT; + return -1; + } + + nvmf_ns = &subsystem->ns_list_map[cmd->nsid - 1]; + ctrlr = nvmf_ns->ctrlr; + nsid = nvmf_ns->nvme_ns_id; + ns = nvmf_ns->ns; + qpair = nvmf_ns->qpair; + + switch (cmd->opc) { + case SPDK_NVME_OPC_READ: + case SPDK_NVME_OPC_WRITE: + cdw12 = (struct nvme_read_cdw12 *)&cmd->cdw12; + /* NVMe library read/write interface expects non-0based lba_count value */ + lba_count = cdw12->nlb + 1; + lba_address = cmd->cdw11; + lba_address = (lba_address << 32) + cmd->cdw10; + io_flags = cmd->cdw12 & 0xFFFF0000U; + + if (cmd->opc == SPDK_NVME_OPC_READ) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_io_cmd: Read; lba address %lx, lba count %x\n", + lba_address, lba_count); + spdk_trace_record(TRACE_NVMF_LIB_READ_START, 0, 0, + (uint64_t)req_state->fabric_rx_ctx, 0); + spdk_nvme_ns_cmd_read(ns, qpair, + buf, lba_address, lba_count, + nvmf_complete_cmd, + (void *)req_state, io_flags); + } else { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_process_io_cmd: Write; lba address %lx, lba count %x\n", + lba_address, lba_count); + spdk_trace_record(TRACE_NVMF_LIB_WRITE_START, 0, 0, + (uint64_t)req_state->fabric_rx_ctx, 0); + spdk_nvme_ns_cmd_write(ns, qpair, + buf, lba_address, lba_count, + nvmf_complete_cmd, + (void *)req_state, io_flags); + } + break; + default: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "RAW Passthrough: I/O Opcode %x\n", cmd->opc); + cmd->nsid = nsid; + spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, + cmd, + buf, len, + nvmf_complete_cmd, + (void *)req_state); + break; + } + return rc; +} + +void +nvmf_check_io_completions(struct nvmf_session *session) +{ + struct spdk_nvmf_subsystem *subsystem = session->subsys; + struct spdk_nvme_qpair *qpair, *prev_qpair = NULL; + int i; + + for (i = 0; i < MAX_PER_SUBSYSTEM_NAMESPACES; i++) { + qpair = subsystem->ns_list_map[i].qpair; + if (qpair == NULL) + continue; + if (qpair != NULL && qpair != prev_qpair) { + spdk_nvme_qpair_process_completions(qpair, 0); + prev_qpair = qpair; + } + } +} + + diff --git a/lib/nvmf/port.c b/lib/nvmf/port.c new file mode 100644 index 0000000000..4ae01037a5 --- /dev/null +++ b/lib/nvmf/port.c @@ -0,0 +1,208 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "conn.h" +#include "nvmf.h" +#include "rdma.h" +#include "port.h" +#include "spdk/log.h" +#include "spdk/trace.h" + +#define MAX_FABRIC_INTF_PER_PORT 4 +#define MAX_PORTS 4 + +static TAILQ_HEAD(, spdk_nvmf_port) g_port_head = TAILQ_HEAD_INITIALIZER(g_port_head); + +/* Assumes caller allocated host and port strings on the heap */ +struct spdk_nvmf_fabric_intf * +spdk_nvmf_fabric_intf_create(char *host, char *sin_port) +{ + struct spdk_nvmf_fabric_intf *fabric_intf = NULL; + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Creating fabric intf: host address %s, port %s\n", + host, sin_port); + + RTE_VERIFY(host != NULL); + RTE_VERIFY(sin_port != NULL); + + fabric_intf = calloc(1, sizeof(*fabric_intf)); + if (!fabric_intf) { + SPDK_ERRLOG("fabric_intf calloc error\n"); + return NULL; + } + + fabric_intf->host = host; + fabric_intf->sin_port = sin_port; + + return fabric_intf; +} + +void +spdk_nvmf_fabric_intf_destroy(struct spdk_nvmf_fabric_intf *fabric_intf) +{ + RTE_VERIFY(fabric_intf != NULL); + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Enter\n"); + free(fabric_intf); +} + + +struct spdk_nvmf_fabric_intf * +spdk_nvmf_port_find_fabric_intf_by_addr(char *addr) +{ + struct spdk_nvmf_port *port; + struct spdk_nvmf_fabric_intf *fabric_intf; + int i; + + if (addr == NULL) + goto find_error; + + for (i = 1; i < MAX_PORTS; i++) { + port = spdk_nvmf_port_find_by_tag(i); + if (port == NULL) { + continue; + } + + TAILQ_FOREACH(fabric_intf, &port->head, tailq) { + if (!strncasecmp(fabric_intf->host, addr, strlen(fabric_intf->host))) { + return fabric_intf; + } + } + } + +find_error: + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "No device addr match for %s\n", addr); + return NULL; +} + +struct spdk_nvmf_port * +spdk_nvmf_port_create(int tag) +{ + struct spdk_nvmf_port *port; + + if (tag <= 0) { + SPDK_ERRLOG("invalid port tag (%d)\n", tag); + return NULL; + } + + /* Make sure there are no duplicate port tags */ + if (spdk_nvmf_port_find_by_tag(tag)) { + SPDK_ERRLOG("port creation failed. duplicate port tag (%d)\n", tag); + return NULL; + } + + port = calloc(1, sizeof(*port)); + if (!port) { + SPDK_ERRLOG("port calloc error (%d)\n", tag); + return NULL; + } + + port->state = GROUP_INIT; + port->tag = tag; + port->type = FABRIC_RDMA; + + TAILQ_INIT(&port->head); + + pthread_mutex_lock(&g_nvmf_tgt.mutex); + port->state = GROUP_READY; + TAILQ_INSERT_TAIL(&g_port_head, port, tailq); + pthread_mutex_unlock(&g_nvmf_tgt.mutex); + + return port; +} + +void +spdk_nvmf_port_destroy(struct spdk_nvmf_port *port) +{ +#if 0 // TODO: fix bogus scan-build warning about use-after-free + struct spdk_nvmf_fabric_intf *fabric_intf; + + RTE_VERIFY(port != NULL); + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Enter\n"); + while (!TAILQ_EMPTY(&port->head)) { + fabric_intf = TAILQ_FIRST(&port->head); + TAILQ_REMOVE(&port->head, fabric_intf, tailq); + spdk_nvmf_fabric_intf_destroy(fabric_intf); + } + + TAILQ_REMOVE(&g_port_head, port, tailq); + + free(port); +#endif +} + +void +spdk_nvmf_port_add_fabric_intf(struct spdk_nvmf_port *port, + struct spdk_nvmf_fabric_intf *fabric_intf) +{ + RTE_VERIFY(port != NULL); + RTE_VERIFY(fabric_intf != NULL); + + fabric_intf->port = port; + TAILQ_INSERT_TAIL(&port->head, fabric_intf, tailq); +} + +struct spdk_nvmf_port * +spdk_nvmf_port_find_by_tag(int tag) +{ + struct spdk_nvmf_port *port; + + if (tag <= 0) { + SPDK_ERRLOG("invalid port tag (%d)\n", tag); + return NULL; + } + + TAILQ_FOREACH(port, &g_port_head, tailq) { + if (port->tag == tag) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " found port with tag: port %p\n", port); + return port; + } + } + + return NULL; +} + +void +spdk_nvmf_port_destroy_all(void) +{ + struct spdk_nvmf_port *port; + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Enter\n"); + pthread_mutex_lock(&g_nvmf_tgt.mutex); + while (!TAILQ_EMPTY(&g_port_head)) { + port = TAILQ_FIRST(&g_port_head); + spdk_nvmf_port_destroy(port); + } + pthread_mutex_unlock(&g_nvmf_tgt.mutex); +} diff --git a/lib/nvmf/port.h b/lib/nvmf/port.h new file mode 100644 index 0000000000..fc2aebe263 --- /dev/null +++ b/lib/nvmf/port.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NVMF_PORT_H +#define NVMF_PORT_H + +#include + +#include "spdk/conf.h" +#include "spdk/queue.h" + +/** \file +* An NVMf subsystem port, referred to as simply "port" is defined by the +* specification as follows: +* +* An NVM subsystem port (port) is a collection of one or more physical fabric +* interfaces that together act as a single interface between the NVM subsystem +* and a fabric. When link aggregation (e.g., Ethernet) is used, the physical +* ports for the group of aggregated links constitute a single NVM subsystem port. +*/ + +enum fabric_type { + FABRIC_RDMA = 0x1, + FABRIC_PCI = 0x2, + FABRIC_ETHERNET = 0x3, +}; + +enum group_state { + GROUP_INIT = 0x0, + GROUP_READY = 0x1, + GROUP_DESTROY = 0x2, +}; + +struct spdk_nvmf_fabric_intf { + char *host; + char *sin_port; + struct spdk_nvmf_port *port; + uint32_t num_sessions; + TAILQ_ENTRY(spdk_nvmf_fabric_intf) tailq; +}; + +struct spdk_nvmf_port { + int tag; + enum group_state state; + enum fabric_type type; + TAILQ_HEAD(, spdk_nvmf_fabric_intf) head; + TAILQ_ENTRY(spdk_nvmf_port) tailq; +}; + +struct spdk_nvmf_fabric_intf * +spdk_nvmf_fabric_intf_create(char *host, char *sin_port); + +void +spdk_nvmf_fabric_intf_destroy(struct spdk_nvmf_fabric_intf *fabric_intf); + +struct spdk_nvmf_fabric_intf * +spdk_nvmf_port_find_fabric_intf_by_addr(char *addr); + +struct spdk_nvmf_port * +spdk_nvmf_port_create(int tag); + +void +spdk_nvmf_port_destroy(struct spdk_nvmf_port *port); + +struct spdk_nvmf_port * +spdk_nvmf_port_find_by_tag(int tag); + +void +spdk_nvmf_port_add_fabric_intf(struct spdk_nvmf_port *port, + struct spdk_nvmf_fabric_intf *fabric_intf); + +void +spdk_nvmf_port_destroy_all(void); + +#endif diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c new file mode 100644 index 0000000000..2f67a2d913 --- /dev/null +++ b/lib/nvmf/rdma.c @@ -0,0 +1,1132 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "conn.h" +#include "rdma.h" +#include "nvmf.h" +#include "port.h" +#include "init_grp.h" +#include "spdk/log.h" +#include "spdk/trace.h" + +#define ACCEPT_TIMEOUT (rte_get_timer_hz() >> 10) /* ~1ms */ +#define MAX_RDMA_DEVICES 4 +#define MAX_SESSIONS_PER_DEVICE 1 /* for now accept only single session per device */ + +static int alloc_qp_rx_desc(struct spdk_nvmf_conn *conn); +static int alloc_qp_tx_desc(struct spdk_nvmf_conn *conn); + +static struct rte_timer g_acceptor_timer; + +static struct rdma_event_channel *g_cm_event_ch = NULL; +static struct rdma_cm_id *g_cm_id = NULL; + +/*! \file + +*/ + +static int +nvmf_rdma_conn_init(struct spdk_nvmf_conn *conn, + struct ibv_context *verbs) +{ + int rc; + + if (conn->ctx) { + SPDK_ERRLOG("rdma_conn_init: context already set!\n"); + goto return_error; + } + conn->ctx = verbs; + + conn->pd = ibv_alloc_pd(verbs); + if (!conn->pd) { + SPDK_ERRLOG("rdma_conn_init: alloc pd error!\n"); + goto return_error; + } + + conn->comp_channel = ibv_create_comp_channel(verbs); + if (!conn->comp_channel) { + SPDK_ERRLOG("rdma_conn_init: create completion channel error!\n"); + goto comp_ch_error; + } + rc = fcntl(conn->comp_channel->fd, F_SETFL, O_NONBLOCK); + if (rc < 0) { + SPDK_ERRLOG("rdma_conn_init: fcntl to set comp channel to non-blocking failed\n"); + goto comp_ch_error; + } + + /* + * Size the CQ to handle Rx completions + Tx completions + rdma_read or write + * completions. Three times the target connection SQ depth should be more + * than enough. + */ + conn->cq = ibv_create_cq(verbs, (conn->sq_depth * 3), conn, conn->comp_channel, 0); + if (!conn->cq) { + SPDK_ERRLOG("rdma_conn_init: create cq error!\n"); + goto cq_error; + } + + return 0; + +cq_error: + ibv_destroy_comp_channel(conn->comp_channel); +comp_ch_error: + ibv_dealloc_pd(conn->pd); +return_error: + return -1; +} + +static void +free_qp_desc(struct spdk_nvmf_conn *conn) +{ + struct nvme_qp_rx_desc *tmp_rx; + struct nvme_qp_tx_desc *tmp_tx; + int rc; + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Enter\n"); + + STAILQ_FOREACH(tmp_rx, &conn->qp_rx_desc, link) { + STAILQ_REMOVE(&conn->qp_rx_desc, tmp_rx, nvme_qp_rx_desc, link); + + rc = ibv_dereg_mr(tmp_rx->bb_mr); + if (rc) { + SPDK_ERRLOG("Unable to de-register rx bb mr\n"); + } + + if (conn->type == CONN_TYPE_AQ) { + rte_mempool_put(g_nvmf_tgt.bb_small_pool, (void *)tmp_rx->bb); + } else { + rte_mempool_put(g_nvmf_tgt.bb_large_pool, (void *)tmp_rx->bb); + } + + rc = ibv_dereg_mr(tmp_rx->msg_buf_mr); + if (rc) { + SPDK_ERRLOG("Unable to de-register rx mr\n"); + } + + rte_mempool_put(g_nvmf_tgt.rx_desc_pool, (void *)tmp_rx); + } + + STAILQ_FOREACH(tmp_tx, &conn->qp_tx_desc, link) { + STAILQ_REMOVE(&conn->qp_tx_desc, tmp_tx, nvme_qp_tx_desc, link); + + rc = ibv_dereg_mr(tmp_tx->msg_buf_mr); + if (rc) { + SPDK_ERRLOG("Unable to de-register tx mr\n"); + } + + rte_mempool_put(g_nvmf_tgt.tx_desc_pool, (void *)tmp_tx); + } +} + +static void +nvmf_drain_cq(struct spdk_nvmf_conn *conn) +{ + struct ibv_wc wc; + + /* drain the cq before destruction */ + while (ibv_poll_cq(conn->cq, 1, &wc) > 0) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "drain cq event\n"); + //ibv_ack_cq_events(conn->cq, 1); + } + +} + +void +nvmf_rdma_conn_cleanup(struct spdk_nvmf_conn *conn) +{ + struct nvme_qp_tx_desc *pending_desc, *active_desc; + int rc; + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Enter\n"); + + rdma_destroy_qp(conn->cm_id); + + while (!STAILQ_EMPTY(&conn->qp_pending_desc)) { + pending_desc = STAILQ_FIRST(&conn->qp_pending_desc); + STAILQ_REMOVE_HEAD(&conn->qp_pending_desc, link); + STAILQ_INSERT_TAIL(&conn->qp_tx_desc, pending_desc, link); + } + + /* Remove tx_desc from qp_tx_active_desc list to qp_tx_desc list */ + while (!STAILQ_EMPTY(&conn->qp_tx_active_desc)) { + active_desc = STAILQ_FIRST(&conn->qp_tx_active_desc); + STAILQ_REMOVE_HEAD(&conn->qp_tx_active_desc, link); + STAILQ_INSERT_TAIL(&conn->qp_tx_desc, active_desc, link); + } + + free_qp_desc(conn); + + nvmf_drain_cq(conn); + rc = ibv_destroy_cq(conn->cq); + if (rc) { + SPDK_ERRLOG("ibv_destroy_cq error\n"); + } + + ibv_destroy_comp_channel(conn->comp_channel); + ibv_dealloc_pd(conn->pd); + rdma_destroy_id(conn->cm_id); +} + +static int +nvmf_rdma_qp_init(struct spdk_nvmf_conn *conn) +{ + struct ibv_qp_init_attr attr; + int ret; + + memset(&attr, 0, sizeof(struct ibv_qp_init_attr)); + + attr.qp_type = IBV_QPT_RC; + attr.send_cq = conn->cq; + attr.recv_cq = conn->cq; + attr.cap.max_send_wr = conn->cq_depth; + attr.cap.max_recv_wr = conn->sq_depth; + attr.cap.max_send_sge = NVMF_DEFAULT_TX_SGE; + attr.cap.max_recv_sge = NVMF_DEFAULT_RX_SGE; + + ret = rdma_create_qp(conn->cm_id, conn->pd, &attr); + if (ret) { + SPDK_ERRLOG("rdma_create_qp failed\n"); + return -1; + } + conn->qp = conn->cm_id->qp; + + return 0; +} + +int +nvmf_post_rdma_read(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc) +{ + struct ibv_send_wr wr, *bad_wr = NULL; + struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; + struct nvmf_request *req = &tx_desc->req_state; + int rc; + + if (rx_desc == NULL) { + SPDK_ERRLOG("Rx descriptor does not exist at rdma read!\n"); + return -1; + } + + /* + * Queue the rdma read if it would exceed max outstanding + * RDMA read limit. + */ + if (conn->pending_rdma_read_count == conn->initiator_depth) { + SPDK_TRACELOG(SPDK_TRACE_RDMA, "Insert rdma read into pending queue: tx_desc %p\n", + tx_desc); + STAILQ_REMOVE(&conn->qp_tx_active_desc, tx_desc, nvme_qp_tx_desc, link); + STAILQ_INSERT_TAIL(&conn->qp_pending_desc, tx_desc, link); + return 0; + } + conn->pending_rdma_read_count++; + + memset(&wr, 0, sizeof(wr)); + + wr.wr_id = (uintptr_t)tx_desc; + wr.next = NULL; + wr.opcode = IBV_WR_RDMA_READ; + wr.send_flags = IBV_SEND_SIGNALED; + wr.wr.rdma.rkey = req->rkey; + wr.wr.rdma.remote_addr = req->remote_addr; + wr.sg_list = &rx_desc->bb_sgl; /* sender sets correct length */ + wr.num_sge = 1; + + SPDK_TRACELOG(SPDK_TRACE_RDMA, "rkey %x\n", wr.wr.rdma.rkey); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "remote addr %p\n", + (void *)wr.wr.rdma.remote_addr); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "local addr %p\n", (void *)wr.sg_list->addr); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "length %x\n", wr.sg_list->length); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "lkey %x\n", wr.sg_list->lkey); + + + spdk_trace_record(TRACE_RDMA_READ_START, 0, 0, (uint64_t)rx_desc, 0); + rc = ibv_post_send(conn->qp, &wr, &bad_wr); + if (rc) { + SPDK_ERRLOG("Failure posting rdma read send, rc = 0x%x\n", rc); + } + return (rc); +} + +int +nvmf_post_rdma_write(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc) +{ + struct ibv_send_wr wr, *bad_wr = NULL; + struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; + struct nvmf_request *req = &tx_desc->req_state; + int rc; + + if (rx_desc == NULL) { + SPDK_ERRLOG("Rx descriptor does not exist at rdma write!\n"); + return -1; + } + + memset(&wr, 0, sizeof(wr)); + + wr.wr_id = (uintptr_t)tx_desc; + wr.next = NULL; + wr.opcode = IBV_WR_RDMA_WRITE; + //wr.send_flags = IBV_SEND_SIGNALED; /* set if we want to get completion event */ + wr.wr.rdma.rkey = req->rkey; + wr.wr.rdma.remote_addr = req->remote_addr; + wr.sg_list = &rx_desc->bb_sgl; /* sender sets correct length */ + wr.num_sge = 1; + + SPDK_TRACELOG(SPDK_TRACE_RDMA, "rkey %x\n", wr.wr.rdma.rkey); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "remote addr %p\n", + (void *)wr.wr.rdma.remote_addr); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "local addr %p\n", (void *)wr.sg_list->addr); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "length %x\n", wr.sg_list->length); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "lkey %x\n", wr.sg_list->lkey); + + + spdk_trace_record(TRACE_RDMA_WRITE_START, 0, 0, (uint64_t)rx_desc, 0); + rc = ibv_post_send(conn->qp, &wr, &bad_wr); + if (rc) { + SPDK_ERRLOG("Failure posting rdma write send, rc = 0x%x\n", rc); + } + return (rc); +} + +int +nvmf_post_rdma_send(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc) +{ + struct ibv_send_wr wr, *bad_wr = NULL; + struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; + int rc; + + RTE_VERIFY(rx_desc != NULL); + + /* restore the SGL length that may have been modified */ + rx_desc->bb_sgl.length = rx_desc->bb_len; + + /* Re-post recv */ + if (nvmf_post_rdma_recv(conn, rx_desc)) { + SPDK_ERRLOG("Unable to re-post rx descriptor\n"); + return -1; + } + tx_desc->rx_desc = NULL; + + memset(&wr, 0, sizeof(wr)); + + wr.wr_id = (uintptr_t)tx_desc; + wr.next = NULL; + wr.opcode = IBV_WR_SEND; + wr.send_flags = IBV_SEND_SIGNALED; + wr.sg_list = &tx_desc->send_sgl; + wr.num_sge = 1; + + /* caller responsible in setting up SGE */ + SPDK_TRACELOG(SPDK_TRACE_RDMA, "local addr %p\n", (void *)wr.sg_list->addr); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "length %x\n", wr.sg_list->length); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "lkey %x\n", wr.sg_list->lkey); +#ifdef DEBUG + { + struct nvmf_request *req = &tx_desc->req_state; + SPDK_TRACELOG(SPDK_TRACE_RDMA, + "tx_desc %p: req_state %p, rsp %p\n", + tx_desc, req, (void *)req->rsp); + } +#endif + + spdk_trace_record(TRACE_NVMF_IO_COMPLETE, 0, 0, (uint64_t)rx_desc, 0); + rc = ibv_post_send(conn->qp, &wr, &bad_wr); + if (rc) { + SPDK_ERRLOG("Failure posting rdma send for NVMf completion, rc = 0x%x\n", rc); + } + return (rc); +} + +int +nvmf_post_rdma_recv(struct spdk_nvmf_conn *conn, + struct nvme_qp_rx_desc *rx_desc) +{ + struct ibv_recv_wr wr, *bad_wr = NULL; + int rc; + + /* Update Connection SQ Tracking, increment + the SQ head counter opening up another + RX recv slot. + */ + conn->sq_head < (conn->sq_depth - 1) ? (conn->sq_head++) : (conn->sq_head = 0); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sq_head %x, sq_depth %x\n", conn->sq_head, conn->sq_depth); + + rx_desc->recv_bc = 0; /* clear previous recv byte count */ + + wr.wr_id = (uintptr_t)rx_desc; + wr.next = NULL; + wr.sg_list = &rx_desc->recv_sgl; + wr.num_sge = 1; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "local addr %p\n", (void *)wr.sg_list->addr); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "length %x\n", wr.sg_list->length); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "lkey %x\n", wr.sg_list->lkey); + + /* for I/O queues we add bb sgl for in-capsule data use */ + if (conn->type == CONN_TYPE_IOQ) { + wr.num_sge = 2; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sgl2 local addr %p\n", + (void *)rx_desc->bb_sgl.addr); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sgl2 length %x\n", rx_desc->bb_sgl.length); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "sgl2 lkey %x\n", rx_desc->bb_sgl.lkey); + } + + rc = ibv_post_recv(conn->qp, &wr, &bad_wr); + if (rc) { + SPDK_ERRLOG("Failure posting rdma recv, rc = 0x%x\n", rc); + } + return (rc); +} + +static int +nvmf_rdma_cm_connect(struct rdma_cm_event *event) +{ + struct spdk_nvmf_init_grp *ig; + struct spdk_nvmf_fabric_intf *fabric_intf; + struct rdma_cm_id *conn_id; + struct spdk_nvmf_conn *conn; + struct nvme_qp_rx_desc *rx_desc; + struct ibv_device_attr ibdev_attr; + struct sockaddr_in *addr; + struct rdma_conn_param *param = NULL; + const union spdk_nvmf_rdma_private_data *pdata = NULL; + union spdk_nvmf_rdma_private_data acc_rej_pdata; + uint16_t sts = 0; + char addr_str[INET_ADDRSTRLEN]; + int rc; + + + /* Check to make sure we know about this rdma device */ + if (event->id == NULL) { + SPDK_ERRLOG("connect request: missing cm_id\n"); + goto err0; + } + conn_id = event->id; + + if (conn_id->verbs == NULL) { + SPDK_ERRLOG("connect request: missing cm_id ibv_context\n"); + goto err0; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Connect Recv on fabric intf name %s, dev_name %s\n", + conn_id->verbs->device->name, conn_id->verbs->device->dev_name); + addr = (struct sockaddr_in *)rdma_get_local_addr(conn_id); + inet_ntop(AF_INET, &(addr->sin_addr), addr_str, INET_ADDRSTRLEN); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "Connect Route: local addr %s\n", + addr_str); + + fabric_intf = spdk_nvmf_port_find_fabric_intf_by_addr(addr_str); + if (fabric_intf == NULL) { + SPDK_ERRLOG("connect request: rdma device does not exist!\n"); + goto err1; + } + SPDK_TRACELOG(SPDK_TRACE_RDMA, "Found existing RDMA Device %p\n", fabric_intf); + + + /* validate remote address is within a provisioned initiator group */ + addr = (struct sockaddr_in *)rdma_get_peer_addr(conn_id); + inet_ntop(AF_INET, &(addr->sin_addr), addr_str, INET_ADDRSTRLEN); + SPDK_TRACELOG(SPDK_TRACE_RDMA, "Connect Route: peer addr %s\n", + addr_str); + + ig = nvmf_initiator_group_find_by_addr(addr_str); + if (ig == NULL) { + SPDK_ERRLOG("connect request: remote host addr not provisioned!\n"); + goto err1; + } + SPDK_TRACELOG(SPDK_TRACE_RDMA, "Found approved remote host initiator group %p\n", ig); + + /* Init the NVMf rdma transport connection */ + conn = spdk_nvmf_allocate_conn(); + if (conn == NULL) { + SPDK_ERRLOG("Error on nvmf connection creation\n"); + goto err1; + } + + /* + * Save the rdma_cm context id in our fabric connection context. This + * ptr can be used to get indirect access to ibv_context (cm_id->verbs) + * and also to ibv_device (cm_id->verbs->device) + */ + conn->cm_id = conn_id; + + /* check for private data */ + if (event->param.conn.private_data_len < sizeof(union spdk_nvmf_rdma_private_data)) { + SPDK_TRACELOG(SPDK_TRACE_RDMA, "No private nvmf connection setup data\n"); + conn->sq_depth = SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH; /* assume max default */ + conn->cq_depth = SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH; /* assume max default */ + } else { + pdata = event->param.conn.private_data; + if (pdata == NULL) { + SPDK_ERRLOG("Invalid private nvmf connection setup data pointer\n"); + sts = SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT; + goto err2; + } + + /* Save private details for later validation and use */ + conn->sq_depth = pdata->pd_request.hsqsize; + conn->cq_depth = pdata->pd_request.hrqsize; + conn->qid = pdata->pd_request.qid; + /* double send queue size for R/W commands */ + conn->cq_depth *= 2; + if (conn->qid > 0) { + conn->type = CONN_TYPE_IOQ; + } + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Private Data: QID %x\n", conn->qid); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Private Data: CQ Depth %x\n", conn->cq_depth); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Private Data: SQ Depth %x\n", conn->sq_depth); + } + + /* adjust conn settings to device limits */ + rc = ibv_query_device(conn_id->verbs, &ibdev_attr); + if (rc) { + SPDK_ERRLOG(" Failed on query for device attributes\n"); + goto err2; + } + + if (conn->cq_depth > ibdev_attr.max_cqe) { + conn->cq_depth = ibdev_attr.max_cqe; + } + if (conn->sq_depth > ibdev_attr.max_qp_wr) { + conn->sq_depth = ibdev_attr.max_qp_wr; + } + conn->sq_depth = nvmf_min(conn->sq_depth, conn->cq_depth); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Adjusted CQ Depth %x\n", conn->cq_depth); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Adjusted SQ Depth %x\n", conn->sq_depth); + + if (conn_id->ps == RDMA_PS_TCP) { + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect flow control: %x\n", event->param.conn.flow_control); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect retry count: %x\n", event->param.conn.retry_count); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect rnr retry count: %x\n", + event->param.conn.rnr_retry_count); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Responder Resources %x\n", + event->param.conn.responder_resources); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Initiator Depth %x\n", + event->param.conn.initiator_depth); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect SRQ %x\n", event->param.conn.srq); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect qp_num %x\n", event->param.conn.qp_num); + + conn->responder_resources = nvmf_min(event->param.conn.responder_resources, + ibdev_attr.max_qp_rd_atom); + conn->initiator_depth = nvmf_min(event->param.conn.initiator_depth, + ibdev_attr.max_qp_init_rd_atom); + if (event->param.conn.responder_resources != conn->responder_resources || + event->param.conn.initiator_depth != conn->initiator_depth) { + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Adjusted Responder Resources %x\n", + conn->responder_resources); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Adjusted Initiator Depth %x\n", + conn->initiator_depth); + } + } + + rc = nvmf_rdma_conn_init(conn, conn_id->verbs); + if (rc) { + SPDK_ERRLOG("connect request: rdma conn init failure!\n"); + goto err2; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "NVMf fabric connection initialized\n"); + + /* Allocate the AQ QP Channel */ + rc = nvmf_rdma_qp_init(conn); + if (rc) { + SPDK_ERRLOG("Unable to allocate connection qp\n"); + goto err2; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "QPs allocated\n"); + + STAILQ_INIT(&conn->qp_pending_desc); + STAILQ_INIT(&conn->qp_rx_desc); + STAILQ_INIT(&conn->qp_tx_desc); + STAILQ_INIT(&conn->qp_tx_active_desc); + + /* Allocate AQ QP RX Buffers */ + rc = alloc_qp_rx_desc(conn); + if (rc) { + SPDK_ERRLOG("Unable to allocate connection rx desc\n"); + goto err2; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Rx buffers allocated\n"); + + /* Allocate AQ QP TX Buffers */ + rc = alloc_qp_tx_desc(conn); + if (rc) { + SPDK_ERRLOG("Unable to allocate connection tx desc\n"); + goto err2; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Tx buffers allocated\n"); + + /* Post all the RX descriptors */ + STAILQ_FOREACH(rx_desc, &conn->qp_rx_desc, link) { + if (nvmf_post_rdma_recv(conn, rx_desc)) { + SPDK_ERRLOG("Unable to post connection rx desc\n"); + goto err2; + } + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "RX buffers posted\n"); + + rc = spdk_nvmf_startup_conn(conn); + if (rc) { + SPDK_ERRLOG("Error on startup connection\n"); + goto err2; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "New Connection Scheduled\n"); + + param = &event->param.conn; + if (conn_id->ps == RDMA_PS_TCP) { + event->param.conn.responder_resources = conn->responder_resources; + event->param.conn.initiator_depth = conn->initiator_depth; + } + if (pdata != NULL) { + event->param.conn.private_data = &acc_rej_pdata; + event->param.conn.private_data_len = sizeof(acc_rej_pdata); + memset((uint8_t *)&acc_rej_pdata, 0, sizeof(acc_rej_pdata)); + acc_rej_pdata.pd_accept.crqsize = conn->sq_depth; + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept Private Data Length %x\n", + param->private_data_len); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept Private Data: recfmt %x\n", + pdata->pd_accept.recfmt); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept Private Data: crqsize %x\n", + pdata->pd_accept.crqsize); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept flow control: %x\n", param->flow_control); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept retry count: %x\n", param->retry_count); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept rnr retry count: %x\n", param->rnr_retry_count); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept Responder Resources %x\n", + param->responder_resources); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept Initiator Depth %x\n", param->initiator_depth); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept SRQ %x\n", param->srq); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Connect Accept qp_num %x\n", param->qp_num); + } + + rc = rdma_accept(event->id, param); + if (rc) { + SPDK_ERRLOG("Error on rdma_accept\n"); + goto err3; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Sent back the accept\n"); + + return 0; + +err3: + /* halt the connection thread */ +err2: + /* free the connection and all it's resources */ +err1: + if (pdata != NULL) { + memset((uint8_t *)&acc_rej_pdata, 0, sizeof(acc_rej_pdata)); + acc_rej_pdata.pd_reject.status.sc = sts; + rc = rdma_reject(conn_id, &acc_rej_pdata, sizeof(acc_rej_pdata)); + } else { + rc = rdma_reject(conn_id, NULL, 0); + } + if (rc) + SPDK_ERRLOG("Error on rdma_reject\n"); +err0: + return -1; +} + +static int +nvmf_rdma_cm_disconnect(struct rdma_cm_event *event) +{ + struct rdma_cm_id *conn_id; + struct spdk_nvmf_conn *conn; + + /* Check to make sure we know about this rdma device */ + if (event->id == NULL) { + SPDK_ERRLOG("disconnect request: missing cm_id\n"); + goto err0; + } + conn_id = event->id; + + conn = spdk_find_nvmf_conn_by_cm_id(conn_id); + if (conn == NULL) { + SPDK_ERRLOG("disconnect request: no active connection\n"); + goto err0; + } + + /* + * Modify connection state to trigger async termination + * next time the connection poller executes + */ + conn->state = CONN_STATE_FABRIC_DISCONNECT; + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "rdma connection %p state set to CONN_STATE_FABRIC_DISCONNECT\n", + conn); + return 0; +err0: + return -1; +} + +const char *CM_EVENT_STR[] = { + "RDMA_CM_EVENT_ADDR_RESOLVED", + "RDMA_CM_EVENT_ADDR_ERROR", + "RDMA_CM_EVENT_ROUTE_RESOLVED", + "RDMA_CM_EVENT_ROUTE_ERROR", + "RDMA_CM_EVENT_CONNECT_REQUEST", + "RDMA_CM_EVENT_CONNECT_RESPONSE", + "RDMA_CM_EVENT_CONNECT_ERROR", + "RDMA_CM_EVENT_UNREACHABLE", + "RDMA_CM_EVENT_REJECTED", + "RDMA_CM_EVENT_ESTABLISHED", + "RDMA_CM_EVENT_DISCONNECTED", + "RDMA_CM_EVENT_DEVICE_REMOVAL", + "RDMA_CM_EVENT_MULTICAST_JOIN", + "RDMA_CM_EVENT_MULTICAST_ERROR", + "RDMA_CM_EVENT_ADDR_CHANGE", + "RDMA_CM_EVENT_TIMEWAIT_EXIT" +}; + +static int +nvmf_rdma_process_event(struct rdma_cm_event *event) +{ + int rc = 0; + + SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCM event - %s\n", CM_EVENT_STR[event->event]); + + switch (event->event) { + case RDMA_CM_EVENT_CONNECT_REQUEST: + rc = nvmf_rdma_cm_connect(event); + break; + case RDMA_CM_EVENT_ESTABLISHED: + break; + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_DISCONNECTED: + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + rc = nvmf_rdma_cm_disconnect(event); + break; + default: + SPDK_ERRLOG("Unexpected CM event [%d]\n", event->event); + goto event_error; + } + return rc; + +event_error: + return -1; +} + + +/*! + +\brief This is the main routine for the NVMf rdma acceptor work item. + +*/ +static void +nvmf_rdma_acceptor(struct rte_timer *timer, void *arg) +{ + struct rdma_cm_event *event; + int rc; + + if (g_cm_event_ch == NULL) { + return; + } + + while (1) { + rc = rdma_get_cm_event(g_cm_event_ch, &event); + if (!rc) { + /* + A memcopy is required if we ack the rdma event. + But it may be possible to hold off and not ack + until the event is processed. OFED documentation + only states that every event must be acked else + any attempt to destroy the cm_id associated with + that event shall block. + memcpy(&event_copy, event, sizeof(*event)); + rdma_ack_cm_event(event); + */ + + rc = nvmf_rdma_process_event(event); + rdma_ack_cm_event(event); + if (rc < 0) { + SPDK_ERRLOG("nvmf_rdma_process_event() failed\n"); + break; + } + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) { + SPDK_ERRLOG("get rdma event error(%d): %s\n", + errno, strerror(errno)); + } + break; + } + } +} + +int nvmf_acceptor_start(void) +{ + struct sockaddr_in addr; + uint16_t sin_port; + int rc; + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = g_nvmf_tgt.sin_port; + + /* create an event channel with rdmacm to receive + connection oriented requests and notifications */ + g_cm_event_ch = rdma_create_event_channel(); + if (g_cm_event_ch == NULL) { + SPDK_ERRLOG("rdma_create_event_channel() failed\n"); + return -1; + } + rc = fcntl(g_cm_event_ch->fd, F_SETFL, O_NONBLOCK); + if (rc < 0) { + SPDK_ERRLOG("fcntl to set fd to non-blocking failed\n"); + goto create_id_error; + } + + rc = rdma_create_id(g_cm_event_ch, &g_cm_id, NULL, RDMA_PS_TCP); + if (rc < 0) { + SPDK_ERRLOG("rdma_create_id() failed\n"); + goto create_id_error; + } + + rc = rdma_bind_addr(g_cm_id, (struct sockaddr *)&addr); + if (rc < 0) { + SPDK_ERRLOG("rdma_bind_addr() failed\n"); + goto listen_error; + } + + rc = rdma_listen(g_cm_id, 10); /* 10 = backlog */ + if (rc < 0) { + SPDK_ERRLOG("rdma_listen() failed\n"); + goto listen_error; + } + sin_port = ntohs(rdma_get_src_port(g_cm_id)); + SPDK_NOTICELOG("\n*** NVMf Target Listening on port %d ***\n", sin_port); + + rte_timer_init(&g_acceptor_timer); + rte_timer_reset(&g_acceptor_timer, ACCEPT_TIMEOUT, PERIODICAL, + rte_lcore_id(), nvmf_rdma_acceptor, NULL); + return (rc); + +listen_error: + rdma_destroy_id(g_cm_id); +create_id_error: + rdma_destroy_event_channel(g_cm_event_ch); + return -1; +} + +void nvmf_acceptor_stop(void) +{ + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "nvmf_acceptor_stop: shutdown\n"); + rte_timer_stop_sync(&g_acceptor_timer); +} + +/* + +Initialize with RDMA transport. Query OFED for device list. + +*/ +int +nvmf_rdma_init(void) +{ + struct ibv_device **dev_list; + struct ibv_context *ibdev_ctx = NULL; + struct ibv_device_attr ibdev_attr; + int num_of_rdma_devices; + int num_devices_found = 0; + int i, ret; + + SPDK_NOTICELOG("\n*** RDMA Transport Init ***\n"); + + dev_list = ibv_get_device_list(&num_of_rdma_devices); + if (!dev_list) { + SPDK_ERRLOG(" No RDMA verbs devices found\n"); + return -1; + } + SPDK_TRACELOG(SPDK_TRACE_RDMA, " %d RDMA verbs device(s) discovered\n", num_of_rdma_devices); + + /* Look through the list of devices for one we support */ + for (i = 0; dev_list[i] && num_devices_found < MAX_RDMA_DEVICES; i++, ibdev_ctx = NULL) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " RDMA Device %d:\n", i); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " Node type: %d\n", (int)dev_list[i]->node_type); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " Transport type: %d\n", (int)dev_list[i]->transport_type); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " Name: %s\n", dev_list[i]->name); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " Device Name: %s\n", dev_list[i]->dev_name); + + ibdev_ctx = ibv_open_device(dev_list[i]); + if (!ibdev_ctx) { + SPDK_ERRLOG(" No rdma context returned for device %d\n", i); + continue; + } + + ret = ibv_query_device(ibdev_ctx, &ibdev_attr); + if (ret) { + SPDK_ERRLOG(" Failed on query for device %d\n", i); + ibv_close_device(ibdev_ctx); + continue; + } + + /* display device specific attributes */ + SPDK_TRACELOG(SPDK_TRACE_RDMA, " RDMA Device Attributes:\n"); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max MR Size: 0x%llx\n", (long long int)ibdev_attr.max_mr_size); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Page Size Cap: 0x%llx\n", + (long long int)ibdev_attr.page_size_cap); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max QPs: 0x%x\n", (int)ibdev_attr.max_qp); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max QP WRs: 0x%x\n", (int)ibdev_attr.max_qp_wr); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max SGE: 0x%x\n", (int)ibdev_attr.max_sge); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max CQs: 0x%x\n", (int)ibdev_attr.max_cq); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max CQE per CQ: 0x%x\n", (int)ibdev_attr.max_cqe); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max MR: 0x%x\n", (int)ibdev_attr.max_mr); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max PD: 0x%x\n", (int)ibdev_attr.max_pd); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max QP RD Atom: 0x%x\n", (int)ibdev_attr.max_qp_rd_atom); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max QP Init RD Atom: 0x%x\n", + (int)ibdev_attr.max_qp_init_rd_atom); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max Res RD Atom: 0x%x\n", (int)ibdev_attr.max_res_rd_atom); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max EE: 0x%x\n", (int)ibdev_attr.max_ee); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max SRQ: 0x%x\n", (int)ibdev_attr.max_srq); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max SRQ WR: 0x%x\n", (int)ibdev_attr.max_srq_wr); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max SRQ SGE: 0x%x\n", (int)ibdev_attr.max_srq_sge); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Max PKeys: 0x%x\n", (int)ibdev_attr.max_pkeys); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " Phys Port Cnt: %d\n", (int)ibdev_attr.phys_port_cnt); + + num_devices_found++; + } + + ibv_free_device_list(dev_list); + SPDK_TRACELOG(SPDK_TRACE_RDMA, " %d Fabric Intf(s) active\n", num_devices_found); + return num_devices_found; +} + +/* Populate the AQ QP Rx Buffer Resources */ +static int +alloc_qp_rx_desc(struct spdk_nvmf_conn *conn) +{ + struct nvme_qp_rx_desc *rx_desc, *tmp; + int i; + int rc; + + /* Allocate buffer for rx descriptors (RX WQE + Msg Buffer) */ + for (i = 0; i < conn->sq_depth; i++) { + rx_desc = NULL; + rc = rte_mempool_get(g_nvmf_tgt.rx_desc_pool, (void **)&rx_desc); + if ((rc < 0) || !rx_desc) { + SPDK_ERRLOG("Unable to get rx desc object\n"); + goto fail; + } + + rx_desc->msg_buf_mr = ibv_reg_mr(conn->pd, + (void *)&rx_desc->msg_buf, + sizeof(rx_desc->msg_buf), + IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + if (rx_desc->msg_buf_mr == NULL) { + SPDK_ERRLOG("Unable to register rx desc buffer mr\n"); + goto fail; + } + + rx_desc->conn = conn; + + /* initialize recv_sgl of tx_desc */ + rx_desc->recv_sgl.addr = (uint64_t)&rx_desc->msg_buf; + rx_desc->recv_sgl.length = sizeof(rx_desc->msg_buf); + rx_desc->recv_sgl.lkey = rx_desc->msg_buf_mr->lkey; + + /* pre-assign a data bb (bounce buffer) with each RX descriptor */ + /* + For admin queue, assign smaller BB size to support maximum data that + would be exchanged related to admin commands. For IO queue, assign + the large BB size that is equal to the maximum I/O transfer supported + by the NVMe device. This large BB is also used for in-capsule receive + data. + */ + if (conn->type == CONN_TYPE_AQ) { + rc = rte_mempool_get(g_nvmf_tgt.bb_small_pool, (void **)&rx_desc->bb); + if ((rc < 0) || !rx_desc->bb) { + SPDK_ERRLOG("Unable to get small bb object\n"); + goto fail; + } + rx_desc->bb_len = SMALL_BB_MAX_SIZE; + } else { // for IO queues + rc = rte_mempool_get(g_nvmf_tgt.bb_large_pool, (void **)&rx_desc->bb); + if ((rc < 0) || !rx_desc->bb) { + SPDK_ERRLOG("Unable to get large bb object\n"); + goto fail; + } + rx_desc->bb_len = LARGE_BB_MAX_SIZE; + } + rx_desc->bb_mr = ibv_reg_mr(conn->pd, + (void *)rx_desc->bb, + rx_desc->bb_len, + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_WRITE); + if (rx_desc->bb_mr == NULL) { + SPDK_ERRLOG("Unable to register rx bb mr\n"); + goto fail; + } + + /* initialize bb_sgl of rx_desc */ + rx_desc->bb_sgl.addr = (uint64_t)rx_desc->bb; + rx_desc->bb_sgl.length = rx_desc->bb_len; + rx_desc->bb_sgl.lkey = rx_desc->bb_mr->lkey; + + STAILQ_INSERT_TAIL(&conn->qp_rx_desc, rx_desc, link); + } + + return 0; + +fail: + /* cleanup any partial descriptor that failed during init loop */ + if (rx_desc != NULL) { + if (rx_desc->bb_mr) { + rc = ibv_dereg_mr(rx_desc->bb_mr); + if (rc) { + SPDK_ERRLOG("Unable to de-register rx bb mr\n"); + } + } + + if (rx_desc->bb) { + if (conn->type == CONN_TYPE_AQ) { + rte_mempool_put(g_nvmf_tgt.bb_small_pool, (void *)rx_desc->bb); + } else { + rte_mempool_put(g_nvmf_tgt.bb_large_pool, (void *)rx_desc->bb); + } + } + + if (rx_desc->msg_buf_mr) { + rc = ibv_dereg_mr(rx_desc->msg_buf_mr); + if (rc) { + SPDK_ERRLOG("Unable to de-register rx mr\n"); + } + } + + rte_mempool_put(g_nvmf_tgt.rx_desc_pool, (void *)rx_desc); + } + + STAILQ_FOREACH(tmp, &conn->qp_rx_desc, link) { + STAILQ_REMOVE(&conn->qp_rx_desc, tmp, nvme_qp_rx_desc, link); + + rc = ibv_dereg_mr(tmp->bb_mr); + if (rc) { + SPDK_ERRLOG("Unable to de-register rx bb mr\n"); + } + + if (conn->type == CONN_TYPE_AQ) { + rte_mempool_put(g_nvmf_tgt.bb_small_pool, (void *)tmp->bb); + } else { + rte_mempool_put(g_nvmf_tgt.bb_large_pool, (void *)tmp->bb); + } + + rc = ibv_dereg_mr(tmp->msg_buf_mr); + if (rc) { + SPDK_ERRLOG("Unable to de-register rx mr\n"); + } + + rte_mempool_put(g_nvmf_tgt.rx_desc_pool, (void *)tmp); + } + + return -ENOMEM; +} + +/* Allocate the AQ QP Tx Buffer Resources */ +static int +alloc_qp_tx_desc(struct spdk_nvmf_conn *conn) +{ + struct nvme_qp_tx_desc *tx_desc, *tmp; + int i; + int rc; + + /* Initialize the tx descriptors */ + for (i = 0; i < conn->cq_depth; i++) { + tx_desc = NULL; + rc = rte_mempool_get(g_nvmf_tgt.tx_desc_pool, (void **)&tx_desc); + if ((rc < 0) || !tx_desc) { + SPDK_ERRLOG("Unable to get tx desc object\n"); + goto fail; + } + + tx_desc->msg_buf_mr = ibv_reg_mr(conn->pd, + (void *)&tx_desc->msg_buf, + sizeof(tx_desc->msg_buf), + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_WRITE); + if (tx_desc->msg_buf_mr == NULL) { + SPDK_ERRLOG("Unable to register tx desc buffer mr\n"); + goto fail; + } + + tx_desc->conn = conn; + + /* initialize send_sgl of tx_desc */ + tx_desc->send_sgl.addr = (uint64_t)&tx_desc->msg_buf; + tx_desc->send_sgl.length = sizeof(tx_desc->msg_buf); + tx_desc->send_sgl.lkey = tx_desc->msg_buf_mr->lkey; + + /* init request state associated with each tx_desc */ + tx_desc->req_state.rsp = &tx_desc->msg_buf; + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "tx_desc %p: req_state %p, rsp %p\n", + tx_desc, &tx_desc->req_state, + tx_desc->req_state.rsp); + + STAILQ_INSERT_TAIL(&conn->qp_tx_desc, tx_desc, link); + } + + return 0; +fail: + /* cleanup any partial descriptor that failed during init loop */ + if (tx_desc != NULL) { + + if (tx_desc->msg_buf_mr) { + rc = ibv_dereg_mr(tx_desc->msg_buf_mr); + if (rc) { + SPDK_ERRLOG("Unable to de-register tx mr\n"); + } + } + + rte_mempool_put(g_nvmf_tgt.tx_desc_pool, (void *)tx_desc); + } + + STAILQ_FOREACH(tmp, &conn->qp_tx_desc, link) { + STAILQ_REMOVE(&conn->qp_tx_desc, tmp, nvme_qp_tx_desc, link); + + rc = ibv_dereg_mr(tmp->msg_buf_mr); + if (rc) { + SPDK_ERRLOG("Unable to de-register tx mr\n"); + } + + rte_mempool_put(g_nvmf_tgt.tx_desc_pool, (void *)tmp); + } + + return -ENOMEM; +} + +SPDK_LOG_REGISTER_TRACE_FLAG("rdma", SPDK_TRACE_RDMA) diff --git a/lib/nvmf/rdma.h b/lib/nvmf/rdma.h new file mode 100644 index 0000000000..a8f3ab1f4e --- /dev/null +++ b/lib/nvmf/rdma.h @@ -0,0 +1,81 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NVMF_RDMA_H_ +#define _NVMF_RDMA_H_ + +#include + +#include "nvmf_internal.h" +#include "spdk/nvmf_spec.h" + +/* Define the Admin Queue Rx/Tx Descriptors */ + +struct nvme_qp_rx_desc { + union nvmf_h2c_msg msg_buf; + struct spdk_nvmf_conn *conn; + struct ibv_mr *msg_buf_mr; + struct ibv_sge recv_sgl; + struct ibv_sge bb_sgl; /* must follow recv_sgl */ + struct ibv_mr *bb_mr; + uint8_t *bb; + uint32_t bb_len; + uint32_t recv_bc; + STAILQ_ENTRY(nvme_qp_rx_desc) link; +}; + +struct nvme_qp_tx_desc { + union nvmf_c2h_msg msg_buf; + struct spdk_nvmf_conn *conn; + struct nvmf_request req_state; + struct ibv_mr *msg_buf_mr; + struct ibv_sge send_sgl; + struct nvme_qp_rx_desc *rx_desc; + STAILQ_ENTRY(nvme_qp_tx_desc) link; +}; + +int nvmf_post_rdma_read(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc); +int nvmf_post_rdma_write(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc); +int nvmf_post_rdma_recv(struct spdk_nvmf_conn *conn, + struct nvme_qp_rx_desc *rx_desc); +int nvmf_post_rdma_send(struct spdk_nvmf_conn *conn, + struct nvme_qp_tx_desc *tx_desc); +int nvmf_rdma_init(void); +void nvmf_rdma_conn_cleanup(struct spdk_nvmf_conn *conn); + +int nvmf_acceptor_start(void); +void nvmf_acceptor_stop(void); + +#endif /* _NVMF_RDMA_H_ */ diff --git a/lib/nvmf/session.c b/lib/nvmf/session.c new file mode 100644 index 0000000000..37f68619e2 --- /dev/null +++ b/lib/nvmf/session.c @@ -0,0 +1,517 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "session.h" +#include "nvmf.h" +#include "nvmf_internal.h" +#include "subsystem_grp.h" +#include "spdk/log.h" +#include "spdk/trace.h" + +static struct nvmf_session * +nvmf_create_session(const char *subnqn) +{ + struct nvmf_session *session; + struct spdk_nvmf_subsystem *subsystem; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_create_session:\n"); + + /* locate the previously provisioned subsystem */ + subsystem = nvmf_find_subsystem(subnqn); + if (subsystem == NULL) + return NULL; + + session = malloc(sizeof(struct nvmf_session)); + if (session == NULL) + goto exit; + memset(session, 0, sizeof(struct nvmf_session)); + + subsystem->num_sessions++; + /* define cntlid that is unique across all subsystems */ + session->cntlid = (subsystem->num << NVMF_CNTLID_SUBS_SHIFT) + subsystem->num_sessions; + TAILQ_INSERT_HEAD(&subsystem->sessions, session, entries); + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_create_session: allocated session cntlid %d\n", + session->cntlid); + TAILQ_INIT(&session->connections); + session->num_connections = 0; + session->is_valid = 1; + session->subsys = subsystem; + +exit: + return session; +} + +static void +nvmf_delete_session(struct nvmf_session *session) +{ + session->subsys->num_sessions--; + TAILQ_REMOVE(&session->subsys->sessions, session, entries); + + free(session); +} + +void +nvmf_init_session_properties(struct nvmf_session *session, int aq_depth) +{ + /* for now base virtual controller properties on first namespace controller */ + struct spdk_nvme_ctrlr *ctrlr = session->subsys->ns_list_map[0].ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + struct spdk_nvmf_ctrlr_maxcmd *maxcmd; + struct spdk_nvmf_ctrlr_kas *kas; + struct spdk_nvmf_extended_identify_ctrlr_data *nvmfdata; + struct spdk_nvmf_sgl_support *nvmfsgl; + uint8_t *vc_data; + uint32_t io_depth; + + /* + Here we are going to initialize the features, properties, and + identify controller details for the virtual controller associated + with a specific subsystem session. + */ + + /* Init the virtual controller details using actual HW details */ + cdata = spdk_nvme_ctrlr_get_data(ctrlr); + memcpy((char *)&session->vcdata, (char *)cdata, sizeof(struct spdk_nvme_ctrlr_data)); + + /* update virtual controller data to represent merge of + controllers for all namespaces + */ + session->vcdata.nn = session->subsys->ns_count; + + /* indicate support for only a single AER */ + session->vcdata.aerl = 0; + + /* reset cntlid in vcdata to match the logical cntlid known to NVMf */ + session->vcdata.cntlid = session->cntlid; + + /* initialize the nvmf new and extension details in controller data */ + vc_data = (uint8_t *)&session->vcdata; + kas = (struct spdk_nvmf_ctrlr_kas *)&vc_data[SPDK_NVMF_CTRLR_KAS_OFFSET]; + kas->kas = 10; /* for keep alive granularity in seconds (10 * 100ms) */ + maxcmd = (struct spdk_nvmf_ctrlr_maxcmd *)&vc_data[SPDK_NVMF_CTRLR_MAXCMD_OFFSET]; + io_depth = SPDK_NVMF_DEFAULT_MAX_QUEUE_DEPTH; + maxcmd->maxcmd = io_depth; + nvmfdata = (struct spdk_nvmf_extended_identify_ctrlr_data *) + &vc_data[SPDK_NVMF_EXTENDED_CTRLR_DATA_OFFSET]; + nvmfdata->ioccsz = (NVMF_H2C_MAX_MSG / 16); + nvmfdata->iorcsz = (NVMF_C2H_MAX_MSG / 16); + nvmfdata->icdoff = 0; /* offset starts directly after SQE */ + nvmfdata->ctrattr = 0; /* dynamic controller model */ + nvmfdata->msdbd = 1; /* target supports single SGL in capsule */ + nvmfsgl = (struct spdk_nvmf_sgl_support *)&session->vcdata.sgls; + nvmfsgl->keyed_sgls = 1; + nvmfsgl->address_as_offset_sgl_supported = 1; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: ctrlr data: maxcmd %x\n", + maxcmd->maxcmd); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: ext ctrlr data: ioccsz %x\n", + nvmfdata->ioccsz); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: ext ctrlr data: iorcsz %x\n", + nvmfdata->iorcsz); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: ext ctrlr data: icdoff %x\n", + nvmfdata->icdoff); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: ext ctrlr data: ctrattr %x\n", + nvmfdata->ctrattr); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: ext ctrlr data: msdbd %x\n", + nvmfdata->msdbd); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: sgls data: 0x%x\n", + *(uint32_t *)nvmfsgl); + + /* feature: Number Of Queues. */ + /* Init to zero. Host shall set before enabling the controller */ + session->max_io_queues = MAX_SESSION_IO_QUEUES; + session->vcfeat.noq = 0; + + session->vcprop.cap_lo.raw = 0; + session->vcprop.cap_lo.bits.cqr = 0; /* queues not contiguous */ + session->vcprop.cap_lo.bits.mqes = (io_depth - 1); /* max queue depth */ + session->vcprop.cap_lo.bits.ams = 0; /* optional arb mechanisms */ + session->vcprop.cap_lo.bits.to = 1; /* ready timeout - 500 msec units */ + + session->vcprop.cap_hi.raw = 0; + session->vcprop.cap_hi.bits.dstrd = 0; /* fixed to 0 for NVMf */ + session->vcprop.cap_hi.bits.css_nvm = 1; /* NVM command set */ + session->vcprop.cap_hi.bits.mpsmin = 0; /* 2 ^ 12 + mpsmin == 4k */ + session->vcprop.cap_hi.bits.mpsmax = 0; /* 2 ^ 12 + mpsmax == 4k */ + + session->vcprop.vs = 0x10000; /* Version Supported: Major 1, Minor 0 */ + + session->vcprop.cc.raw = 0; + session->vcprop.cc.bits.en = 0; /* Init controller disabled */ + + session->vcprop.csts.raw = 0; + session->vcprop.csts.bits.rdy = 0; /* Init controller as not ready */ + + /* nssr not defined for v1.0 */ + + /* Set AQA details to reflect the virtual connection SQ/CQ depth */ + session->vcprop.aqa.bits.asqs = (aq_depth & 0xFFF); + session->vcprop.aqa.bits.acqs = (aq_depth & 0xFFF); + + session->vcprop.propsz.bits.size = sizeof(struct spdk_nvmf_ctrlr_properties) / 64; + session->vcprop.capattr_hi.raw = 0; + session->vcprop.capattr_lo.bits.rspsz = sizeof(union nvmf_c2h_msg) / 16; + session->vcprop.capattr_lo.bits.cmdsz = sizeof(union nvmf_h2c_msg) / 16; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: max io queues %x\n", + session->max_io_queues); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: cap_lo %x\n", + session->vcprop.cap_lo.raw); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: cap_hi %x\n", + session->vcprop.cap_hi.raw); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: vs %x\n", session->vcprop.vs); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: cc %x\n", session->vcprop.cc.raw); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: csts %x\n", + session->vcprop.csts.raw); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: nssr %x\n", session->vcprop.nssr); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: aqa %x\n", session->vcprop.aqa.raw); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: propsz %x\n", + session->vcprop.propsz.raw); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: capattr_lo %x\n", + session->vcprop.capattr_lo.raw); + SPDK_TRACELOG(SPDK_TRACE_NVMF, " nvmf_init_session_properties: capattr_hi %x\n", + session->vcprop.capattr_hi.raw); +} + +static struct nvmf_session * +nvmf_find_session_by_id(const char *subnqn, uint16_t session_id) +{ + struct spdk_nvmf_subsystem *subsystem; + struct nvmf_session *sess, *tsess; + + subsystem = nvmf_find_subsystem(subnqn); + if (subsystem == NULL) + return NULL; + + TAILQ_FOREACH_SAFE(sess, &subsystem->sessions, entries, tsess) { + if (sess->cntlid == session_id) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Session Match cntlid %d, sess %p\n", session_id, sess); + return sess; + } + } + + return NULL; +} + +struct nvmf_session * +nvmf_connect(void *fabric_conn, + struct spdk_nvmf_fabric_connect_cmd *connect, + struct spdk_nvmf_fabric_connect_data *connect_data, + struct spdk_nvmf_fabric_connect_rsp *response) +{ + struct nvmf_session *session; + struct nvmf_connection_entry *connection = NULL; + + connection = malloc(sizeof(struct nvmf_connection_entry)); + if (connection == NULL) + goto connect_fail; + + /* Figure out if this is the first connect and we + * need to allocate an nvmf_session or if this is + * a subsequent connect for an I/O queue and we need + * to return an existing session + */ + if (connect->qid == 0) { + /* first connect for AQ connection */ + SPDK_TRACELOG(SPDK_TRACE_NVMF, "AQ connect capsule\n"); + if (connect_data->cntlid == 0xffff) { + /* no nvmf session/controller association, allocate one */ + session = nvmf_create_session(connect_data->subnqn); + if (session == NULL) { + SPDK_ERRLOG("create session failed\n"); + response->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY; + goto connect_fail; + } + } else { + SPDK_ERRLOG("nvmf AQ connection attempt to cntlid %d\n", connect_data->cntlid); + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + goto connect_fail; + } + connection->is_aq_conn = 1; + } else { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "IOQ connect capsule\n"); + /* locate the existing session */ + session = nvmf_find_session_by_id(connect_data->subnqn, connect_data->cntlid); + if (session == NULL) { + SPDK_ERRLOG("invalid nvmf cntlid %d\n", connect_data->cntlid); + response->status.sc = SPDK_NVMF_FABRIC_SC_RESTART_DISCOVERY; + goto connect_fail; + } + /* check if we would exceed session connection limit */ + if (session->num_connections >= session->max_connections_allowed) { + SPDK_ERRLOG("connection limit %d\n", session->num_connections); + response->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY; + goto connect_fail; + } + + if (session->is_valid == 0) { + SPDK_ERRLOG("session invalid or at IO connection limit %d\n", session->num_connections); + response->status.sc = SPDK_NVMF_FABRIC_SC_RESTART_DISCOVERY; + goto connect_fail; + } + connection->is_aq_conn = 0; + } + + connection->fabric_conn = fabric_conn; + + session->num_connections++; + TAILQ_INSERT_HEAD(&session->connections, connection, entries); + + response->status_code_specific.success.cntlid = session->cntlid; + response->status.sc = 0; + + return session; + +connect_fail: + if (connection) + free(connection); + return NULL; +} + +void +nvmf_disconnect(void *fabric_conn, + struct nvmf_session *session) +{ + struct nvmf_connection_entry *conn, *tconn, *rconn = NULL; + + /* Indication from the fabric transport that a + * specific connection has gone way. If the + * connection is the AQ connection then expect + * that the complete session will go away + */ + if (session == NULL) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_disconnect: session not active!\n"); + return; + } + + TAILQ_FOREACH_SAFE(conn, &session->connections, entries, tconn) { + if (conn->fabric_conn == fabric_conn) { + rconn = conn; + break; + } + } + if (rconn == NULL) { + SPDK_ERRLOG("Session connection did not exist!\n"); + return; + } + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Disconnect NVMf conn %p, sess %p\n", rconn, session); + + session->num_connections--; + TAILQ_REMOVE(&session->connections, rconn, entries); + free(rconn); + + if (session->num_connections == 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Session connection count 0, deleting session %p!\n", + session); + nvmf_delete_session(session); + } +} + +void +nvmf_complete_cmd(void *rsp, const struct spdk_nvme_cpl *cmp) +{ + struct nvmf_request *req_state = (struct nvmf_request *)rsp; + struct spdk_nvme_cpl *response; + + spdk_trace_record(TRACE_NVMF_LIB_COMPLETE, 0, 0, (uint64_t)req_state->fabric_rx_ctx, 0); + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_complete_cmd callback: req_state %p\n", req_state); + + response = &req_state->rsp->nvme_cpl; + memcpy(response, cmp, sizeof(*cmp)); + + req_state->cb_fn(req_state); +} + +void +nvmf_property_get(struct nvmf_session *session, + struct spdk_nvmf_fabric_prop_get_cmd *cmd, + struct spdk_nvmf_fabric_prop_get_rsp *response) +{ + response->status.sc = 0; + response->value.u64 = 0; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_property_get: attrib %d, offset %x\n", + cmd->attrib, cmd->ofst); + + if (cmd->ofst > offsetof(struct spdk_nvmf_ctrlr_properties, capattr_hi)) { + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + return; + } + + switch (cmd->ofst) { + case (offsetof(struct spdk_nvmf_ctrlr_properties, cap_lo)): + response->value.u32.low = session->vcprop.cap_lo.raw; + if (cmd->attrib == 1) + response->value.u32.high = session->vcprop.cap_hi.raw; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, cap_hi)): + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + response->value.u32.low = session->vcprop.cap_hi.raw; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, vs)): + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + response->value.u32.low = session->vcprop.vs; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, intms)): + case (offsetof(struct spdk_nvmf_ctrlr_properties, intmc)): + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, cc)): + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + response->value.u32.low = session->vcprop.cc.raw; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, csts)): + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + response->value.u32.low = session->vcprop.csts.raw; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, nssr)): + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + response->value.u32.low = session->vcprop.nssr; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, aqa)): + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + response->value.u32.low = session->vcprop.aqa.raw; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, asq)): + case (offsetof(struct spdk_nvmf_ctrlr_properties, acq)): + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, propsz)): + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + response->value.u32.low = session->vcprop.propsz.raw; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, capattr_lo)): + response->value.u32.low = session->vcprop.capattr_lo.raw; + if (cmd->attrib == 1) + response->value.u32.high = session->vcprop.capattr_hi.raw; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, capattr_hi)): + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + response->value.u32.low = session->vcprop.capattr_hi.raw; + break; + default: + break; + } +} + +void +nvmf_property_set(struct nvmf_session *session, + struct spdk_nvmf_fabric_prop_set_cmd *cmd, + struct spdk_nvmf_fabric_prop_set_rsp *response, + bool *shutdown) +{ + response->status.sc = 0; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, + "nvmf_property_set: attrib %d, offset %x, value %lx, value low %x, value high %x\n", + cmd->attrib, cmd->ofst, cmd->value.u64, cmd->value.u32.low, cmd->value.u32.high); + + if (cmd->ofst > offsetof(struct spdk_nvmf_ctrlr_properties, capattr_hi)) { + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + return; + } + + /* TBD: determine which values we allow to be changed, deal with spec version + difference. Fields within 32bit value, ex. for reset in csts */ + + switch (cmd->ofst) { + case (offsetof(struct spdk_nvmf_ctrlr_properties, cc)): { + union spdk_nvme_cc_register cc; + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Property Set CC\n"); + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else { + cc.raw = cmd->value.u32.low; + + if (cc.bits.en == 1 && session->vcprop.cc.bits.en == 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Property Set CC Enable!\n"); + session->vcprop.csts.bits.rdy = 1; + } + + if (cc.bits.shn && session->vcprop.cc.bits.shn == 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Property Set CC Shutdown!\n"); + session->vcprop.cc.bits.en = 0; + *shutdown = true; + } + + session->vcprop.cc.raw = cc.raw; + } + } + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, csts)): + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Property Set CSTS\n"); + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + session->vcprop.csts.raw = cmd->value.u32.low; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, nssr)): + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Property Set NSSR\n"); + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + session->vcprop.nssr = cmd->value.u32.low; + break; + case (offsetof(struct spdk_nvmf_ctrlr_properties, aqa)): + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Property Set AQA\n"); + if (cmd->attrib == 1) + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + else + session->vcprop.aqa.raw = cmd->value.u32.low; + break; + default: + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Property Set Invalid Offset %x\n", cmd->ofst); + response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM; + break; + } +} diff --git a/lib/nvmf/session.h b/lib/nvmf/session.h new file mode 100644 index 0000000000..8f78467cf7 --- /dev/null +++ b/lib/nvmf/session.h @@ -0,0 +1,151 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NVMF_SESSION_H +#define NVMF_SESSION_H + +#include +#include + +#include "spdk/nvmf_spec.h" +#include "spdk/queue.h" + +/* + * This structure maintains local NVMf library specific connection + * state that includes an opaque pointer back to its parent fabric + * transport connection context. + */ +struct nvmf_connection_entry { + void *fabric_conn; + int is_aq_conn; + + TAILQ_ENTRY(nvmf_connection_entry) entries; +}; + +/* define a virtual controller limit to the number of QPs supported */ +#define MAX_SESSION_IO_QUEUES 64 + +struct nvmf_io_queue { + uint16_t sq_size; + uint16_t sq_active; + uint16_t cq_size; + uint16_t cq_active; +}; + +struct nvmf_vc_features { + uint32_t arb; /* arbitration */ + uint32_t pm; /* power management */ + uint32_t temp; /* temp threshold */ + uint32_t err; /* error recovery */ + uint32_t vwc; /* volatile write cache */ + uint32_t noq; /* number of queues */ + uint32_t ic; /* interrupt coalescing */ + uint32_t ivc; /* interrupt vector config */ + uint32_t wan; /* write atomicity normal */ + uint32_t aec; /* async event config */ + uint32_t apst; /* autonomous power state transition */ + uint32_t hmb; /* host memory buffer */ + uint32_t spm; /* sw progress marker */ + uint32_t hostid; /* host identifier */ + uint32_t resnm; /* reservation notification mask */ + uint32_t resp; /* reservation persistence */ +}; + +/* + * This structure maintains the NVMf virtual controller session + * state. Each NVMf session permits some number of connections. + * At least one admin connection and additional IOQ connections. + */ +struct nvmf_session { + struct spdk_nvmf_subsystem *subsys; + + uint16_t cntlid; + uint32_t max_io_queues; /* maximum supported by backend NVMe library */ + struct nvmf_io_queue qps[MAX_SESSION_IO_QUEUES]; + int active_queues; + int is_valid; + struct spdk_nvmf_ctrlr_properties vcprop; /* virtual controller properties */ + struct nvmf_vc_features vcfeat; /* virtual controller features */ + struct spdk_nvme_ctrlr_data vcdata; /* virtual controller data */ + + TAILQ_HEAD(connection_q, nvmf_connection_entry) connections; + int num_connections; + int max_connections_allowed; + + struct nvmf_request *aer_req_state; + + TAILQ_ENTRY(nvmf_session) entries; +}; + +struct nvmf_session * +nvmf_connect(void *fabric_conn, + struct spdk_nvmf_fabric_connect_cmd *connect, + struct spdk_nvmf_fabric_connect_data *connect_data, + struct spdk_nvmf_fabric_connect_rsp *response); + +void +nvmf_disconnect(void *fabric_conn, struct nvmf_session *session); + +void +nvmf_init_session_properties(struct nvmf_session *session, int aq_depth); + +int +nvmf_process_admin_cmd(struct nvmf_session *session, + struct spdk_nvme_cmd *cmd, + void *buf, uint32_t len, + struct nvmf_request *req_state); + +int +nvmf_process_io_cmd(struct nvmf_session *session, + struct spdk_nvme_cmd *cmd, + void *buf, uint32_t len, + struct nvmf_request *req_state); + +void +nvmf_property_get(struct nvmf_session *session, + struct spdk_nvmf_fabric_prop_get_cmd *cmd, + struct spdk_nvmf_fabric_prop_get_rsp *response); + +void +nvmf_property_set(struct nvmf_session *session, + struct spdk_nvmf_fabric_prop_set_cmd *cmd, + struct spdk_nvmf_fabric_prop_set_rsp *response, + bool *shutdown); + +void +nvmf_check_io_completions(struct nvmf_session *session); + +void +nvmf_check_admin_completions(struct nvmf_session *session); + +#endif diff --git a/lib/nvmf/subsystem_grp.c b/lib/nvmf/subsystem_grp.c new file mode 100644 index 0000000000..10dee1c069 --- /dev/null +++ b/lib/nvmf/subsystem_grp.c @@ -0,0 +1,446 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "controller.h" +#include "port.h" +#include "init_grp.h" +#include "nvmf_internal.h" +#include "nvmf.h" +#include "session.h" +#include "subsystem_grp.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/trace.h" + +#define MAX_TMPBUF 1024 +#define SPDK_CN_TAG_MAX 0x0000ffff + +static TAILQ_HEAD(, spdk_nvmf_subsystem_grp) g_ssg_head = TAILQ_HEAD_INITIALIZER(g_ssg_head); +static TAILQ_HEAD(, spdk_nvmf_subsystem) g_subsystems = TAILQ_HEAD_INITIALIZER(g_subsystems); + +struct spdk_nvmf_subsystem * +nvmf_find_subsystem(const char *subnqn) +{ + struct spdk_nvmf_subsystem *subs; + + if (subnqn == NULL) + return NULL; + + TAILQ_FOREACH(subs, &g_subsystems, entries) { + if (strcasecmp(subnqn, subs->subnqn) == 0) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "found subsystem group with name: %s\n", + subnqn); + return subs; + } + } + + fprintf(stderr, "can't find subsystem %s\n", subnqn); + return NULL; +} + +struct spdk_nvmf_subsystem * +nvmf_create_subsystem(int num, char *name) +{ + struct spdk_nvmf_subsystem *subsystem; + + subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem)); + if (subsystem == NULL) { + return NULL; + } + + memset(subsystem, 0, sizeof(struct spdk_nvmf_subsystem)); + SPDK_TRACELOG(SPDK_TRACE_NVMF, "nvmf_create_subsystem: allocated subsystem %p\n", subsystem); + + subsystem->num = num; + snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", name); + TAILQ_INIT(&subsystem->sessions); + + TAILQ_INSERT_HEAD(&g_subsystems, subsystem, entries); + + return subsystem; +} + +int +nvmf_delete_subsystem(struct spdk_nvmf_subsystem *subsystem) +{ + struct nvmf_session *sess, *tsess; + + if (subsystem == NULL) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, + "nvmf_delete_subsystem: there is no subsystem\n"); + return 0; + } + + TAILQ_FOREACH_SAFE(sess, &subsystem->sessions, entries, tsess) { + subsystem->num_sessions--; + TAILQ_REMOVE(&subsystem->sessions, sess, entries); + free(sess); + } + + TAILQ_REMOVE(&g_subsystems, subsystem, entries); + + free(subsystem); + return 0; +} + +int +nvmf_subsystem_add_ns(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvme_ctrlr *ctrlr) +{ + int i, count, total_ns; + struct spdk_nvme_qpair *qpair; + struct spdk_nvmf_namespace *nvmf_ns; + + total_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); + + /* Assume that all I/O will be handled on one thread for now */ + qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, 0); + if (qpair == NULL) { + SPDK_ERRLOG("spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); + return -1; + } + + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Adding %d namespaces from ctrlr %p to subsystem %s\n", + total_ns, ctrlr, subsystem->subnqn); + + count = 0; + for (i = 0; i < MAX_PER_SUBSYSTEM_NAMESPACES; i++) { + if (count == total_ns) { + break; + } + nvmf_ns = &subsystem->ns_list_map[i]; + if (nvmf_ns->ctrlr == NULL) { + SPDK_TRACELOG(SPDK_TRACE_NVMF, "Adding namespace %d to subsystem %s\n", count + 1, + subsystem->subnqn); + nvmf_ns->ctrlr = ctrlr; + nvmf_ns->qpair = qpair; + nvmf_ns->nvme_ns_id = count + 1; + nvmf_ns->ns = spdk_nvme_ctrlr_get_ns(ctrlr, count + 1); + subsystem->ns_count++; + count++; + } + } + + return 0; +} + +/* nvmf uses iSCSI IQN format to name target subsystems. We expect that + the nvmf subsiqn name provided diring connect requests will be + equivalent to a individual controller name +*/ +static int +spdk_check_nvmf_name(const char *name) +{ + const unsigned char *up = (const unsigned char *) name; + size_t n; + + /* valid iSCSI name? */ + for (n = 0; up[n] != 0; n++) { + if (up[n] > 0x00U && up[n] <= 0x2cU) + goto err0; + if (up[n] == 0x2fU) + goto err0; + if (up[n] >= 0x3bU && up[n] <= 0x40U) + goto err0; + if (up[n] >= 0x5bU && up[n] <= 0x60U) + goto err0; + if (up[n] >= 0x7bU && up[n] <= 0x7fU) + goto err0; + if (isspace(up[n])) + goto err0; + } + + /* valid format? */ + if (strncasecmp(name, "iqn.", 4) == 0) { + /* iqn.YYYY-MM.reversed.domain.name */ + if (!isdigit(up[4]) || !isdigit(up[5]) || !isdigit(up[6]) + || !isdigit(up[7]) || up[8] != '-' || !isdigit(up[9]) + || !isdigit(up[10]) || up[11] != '.') { + SPDK_ERRLOG("invalid iqn format. " + "expect \"iqn.YYYY-MM.reversed.domain.name\"\n"); + return -1; + } + } else if (strncasecmp(name, "eui.", 4) == 0) { + /* EUI-64 -> 16bytes */ + /* XXX */ + } else if (strncasecmp(name, "naa.", 4) == 0) { + /* 64bit -> 16bytes, 128bit -> 32bytes */ + /* XXX */ + } + + return 0; +err0: + SPDK_ERRLOG("Invalid iSCSI character [val %x, index %d]\n", up[n], (int)n); + return -1; +} + +static void +spdk_nvmf_subsystem_destruct(struct spdk_nvmf_subsystem_grp *ss_group) +{ + int i; + + if (ss_group == NULL) { + return; + } + + free(ss_group->name); + + for (i = 0; i < ss_group->map_count; i++) { + ss_group->map[i].ig->ref--; + } + + /* Call NVMf library to free the subsystem */ + nvmf_delete_subsystem(ss_group->subsystem); + + free(ss_group); +} + +static int +spdk_nvmf_subsystem_add_map(struct spdk_nvmf_subsystem_grp *ss_group, + int port_tag, int ig_tag) +{ + struct spdk_nvmf_access_map *map; + struct spdk_nvmf_port *port; + struct spdk_nvmf_init_grp *ig; + + port = spdk_nvmf_port_find_by_tag(port_tag); + if (port == NULL) { + SPDK_ERRLOG("%s: Port%d not found\n", ss_group->name, port_tag); + return -1; + } + if (port->state != GROUP_READY) { + SPDK_ERRLOG("%s: Port%d not active\n", ss_group->name, port_tag); + return -1; + } + ig = nvmf_initiator_group_find_by_tag(ig_tag); + if (ig == NULL) { + SPDK_ERRLOG("%s: InitiatorGroup%d not found\n", ss_group->name, ig_tag); + return -1; + } + if (ig->state != GROUP_READY) { + SPDK_ERRLOG("%s: InitiatorGroup%d not active\n", ss_group->name, ig_tag); + return -1; + } + ig->ref++; + map = &ss_group->map[ss_group->map_count]; + map->port = port; + map->ig = ig; + ss_group->map_count++; + + return 0; +} + +static int +spdk_cf_add_nvmf_subsystem(struct spdk_conf_section *sp) +{ + char buf[MAX_TMPBUF]; + struct spdk_nvmf_subsystem_grp *ss_group; + const char *port_tag, *ig_tag; + const char *val, *name; + int port_tag_i, ig_tag_i; + struct spdk_nvmf_ctrlr *nvmf_ctrlr; + int i, ret; + + printf("Provisioning NVMf Subsystem %d:\n", sp->num); + + ss_group = calloc(1, sizeof(*ss_group)); + if (!ss_group) { + SPDK_ERRLOG("could not allocate new subsystem group\n"); + return -1; + } + + ss_group->num = sp->num; + + /* read in and verify the NQN for the subsystem */ + name = spdk_conf_section_get_val(sp, "SubsystemName"); + if (name == NULL) { + SPDK_ERRLOG("Subsystem Group %d: SubsystemName not found\n", ss_group->num); + goto err0; + } + + if (strncasecmp(name, "iqn.", 4) != 0 + && strncasecmp(name, "eui.", 4) != 0 + && strncasecmp(name, "naa.", 4) != 0) { + ss_group->name = spdk_sprintf_alloc("%s:%s", g_nvmf_tgt.nodebase, name); + } else { + ss_group->name = strdup(name); + } + + if (!ss_group->name) { + SPDK_ERRLOG("Could not allocate Controller Node name\n"); + goto err0; + } + + if (spdk_check_nvmf_name(ss_group->name) != 0) { + SPDK_ERRLOG("Controller Node name (n=%s) (fn=%s) contains an invalid character or format.\n", + name, ss_group->name); + goto err0; + } + + printf(" NVMf Subsystem: Name: %s\n", ss_group->name); + + /* Setup initiator and port access mapping */ + val = spdk_conf_section_get_val(sp, "Mapping"); + if (val == NULL) { + /* no access map */ + SPDK_ERRLOG("Subsystem Group %d: no access Mapping\n", ss_group->num); + goto err0; + } + + ss_group->map_count = 0; + for (i = 0; i < MAX_PER_SUBSYSTEM_ACCESS_MAP; i++) { + val = spdk_conf_section_get_nmval(sp, "Mapping", i, 0); + if (val == NULL) + break; + port_tag = spdk_conf_section_get_nmval(sp, "Mapping", i, 0); + ig_tag = spdk_conf_section_get_nmval(sp, "Mapping", i, 1); + if (port_tag == NULL || ig_tag == NULL) { + SPDK_ERRLOG("LU%d: mapping error\n", ss_group->num); + goto err0; + } + if (strncasecmp(port_tag, "Port", + strlen("Port")) != 0 + || sscanf(port_tag, "%*[^0-9]%d", &port_tag_i) != 1) { + SPDK_ERRLOG("LU%d: mapping port error\n", ss_group->num); + goto err0; + } + if (strncasecmp(ig_tag, "InitiatorGroup", + strlen("InitiatorGroup")) != 0 + || sscanf(ig_tag, "%*[^0-9]%d", &ig_tag_i) != 1) { + SPDK_ERRLOG("LU%d: mapping initiator error\n", ss_group->num); + goto err0; + } + if (port_tag_i < 1 || ig_tag_i < 1) { + SPDK_ERRLOG("LU%d: invalid group tag\n", ss_group->num); + goto err0; + } + + ret = spdk_nvmf_subsystem_add_map(ss_group, port_tag_i, ig_tag_i); + if (ret < 0) { + SPDK_ERRLOG("could not init access map within subsystem group\n"); + goto err0; + } + } + + /* register this subsystem with the NVMf library */ + ss_group->subsystem = nvmf_create_subsystem(ss_group->num, ss_group->name); + if (ss_group->subsystem == NULL) { + SPDK_ERRLOG("Failed creating new nvmf library subsystem\n"); + goto err0; + } + + /* add controllers into the subsystem */ + for (i = 0; i < MAX_PER_SUBSYSTEM_NAMESPACES; i++) { + snprintf(buf, sizeof(buf), "Controller%d", i); + val = spdk_conf_section_get_val(sp, buf); + if (val == NULL) { + break; + } + + val = spdk_conf_section_get_nmval(sp, buf, 0, 0); + if (val == NULL) { + SPDK_ERRLOG("No name specified for Controller%d\n", i); + goto err0; + } + + /* claim this controller from the available controller list */ + nvmf_ctrlr = spdk_nvmf_ctrlr_claim(val); + if (nvmf_ctrlr == NULL) { + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "nvme controller %s not found\n", val); + continue; + } + + /* notify nvmf library to add this device namespace + to this subsystem. + */ + ret = nvmf_subsystem_add_ns(ss_group->subsystem, nvmf_ctrlr->ctrlr); + if (ret < 0) { + SPDK_ERRLOG("nvmf library add namespace failed!\n"); + goto err0; + } + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, " NVMf Subsystem: Nvme Controller: %s , %p\n", + nvmf_ctrlr->name, nvmf_ctrlr->ctrlr); + } + + TAILQ_INSERT_TAIL(&g_ssg_head, ss_group, tailq); + + return 0; +err0: + spdk_nvmf_subsystem_destruct(ss_group); + return -1; +} + +int +spdk_initialize_nvmf_subsystems(void) +{ + struct spdk_conf_section *sp; + int rc; + + SPDK_NOTICELOG("\n*** NVMf Controller Subsystems Init ***\n"); + + TAILQ_INIT(&g_ssg_head); + + sp = spdk_conf_first_section(NULL); + while (sp != NULL) { + if (spdk_conf_section_match_prefix(sp, "SubsystemGroup")) { + if (sp->num > SPDK_CN_TAG_MAX) { + SPDK_ERRLOG("tag %d is invalid\n", sp->num); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "tag %d is invalid\n", sp->num); + return -1; + } + rc = spdk_cf_add_nvmf_subsystem(sp); + if (rc < 0) { + SPDK_ERRLOG("spdk_cf_add_nvmf_subsystem() failed\n"); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "spdk_cf_add_nvmf_subsystem() failed\n"); + return -1; + } + } + sp = spdk_conf_next_section(sp); + } + return 0; +} + +int +spdk_shutdown_nvmf_subsystems(void) +{ + struct spdk_nvmf_subsystem_grp *ss_group; + + while (!TAILQ_EMPTY(&g_ssg_head)) { + ss_group = TAILQ_FIRST(&g_ssg_head); + TAILQ_REMOVE(&g_ssg_head, ss_group, tailq); + spdk_nvmf_subsystem_destruct(ss_group); + } + + return 0; +} diff --git a/lib/nvmf/subsystem_grp.h b/lib/nvmf/subsystem_grp.h new file mode 100644 index 0000000000..30c45430dc --- /dev/null +++ b/lib/nvmf/subsystem_grp.h @@ -0,0 +1,102 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NVMF_SUBSYSTEM_GROUP_H_ +#define _NVMF_SUBSYSTEM_GROUP_H_ + +#include "spdk/nvme.h" +#include "spdk/queue.h" + +struct spdk_nvmf_conn; + +#define MAX_PER_SUBSYSTEM_ACCESS_MAP 2 +#define MAX_PER_SUBSYSTEM_NAMESPACES 32 +#define MAX_NQN_SIZE 255 + +struct spdk_nvmf_namespace { + int nvme_ns_id; + struct spdk_nvme_ns *ns; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_nvme_qpair *qpair; +}; + +/* + * The NVMf subsystem, as indicated in the specification, is a collection + * of virtual controller sessions. Any individual controller session has + * access to all the NVMe device/namespaces maintained by the subsystem. + */ +struct spdk_nvmf_subsystem { + uint16_t num; + char subnqn[MAX_NQN_SIZE]; + int num_sessions; + TAILQ_HEAD(session_q, nvmf_session) sessions; + struct spdk_nvmf_namespace ns_list_map[MAX_PER_SUBSYSTEM_NAMESPACES]; + int ns_count; + + TAILQ_ENTRY(spdk_nvmf_subsystem) entries; +}; + +struct spdk_nvmf_access_map { + struct spdk_nvmf_port *port; + struct spdk_nvmf_init_grp *ig; +}; + +struct spdk_nvmf_subsystem_grp { + int num; + char *name;; + struct spdk_nvmf_subsystem *subsystem; + int map_count; + struct spdk_nvmf_access_map map[MAX_PER_SUBSYSTEM_ACCESS_MAP]; + TAILQ_ENTRY(spdk_nvmf_subsystem_grp) tailq; +}; + +struct spdk_nvmf_subsystem * +nvmf_create_subsystem(int num, char *name); + +int +nvmf_delete_subsystem(struct spdk_nvmf_subsystem *subsystem); + +int +nvmf_subsystem_add_ns(struct spdk_nvmf_subsystem *subsystem, + struct spdk_nvme_ctrlr *ctrlr); + +struct spdk_nvmf_subsystem * +nvmf_find_subsystem(const char *subnqn); + +int +spdk_initialize_nvmf_subsystems(void); + +int +spdk_shutdown_nvmf_subsystems(void); + +#endif /* _NVMF_SUBSYSTEM_GROUP_H_ */ diff --git a/scripts/autotest_common.sh b/scripts/autotest_common.sh index f89eb0bdcf..62b67bc499 100755 --- a/scripts/autotest_common.sh +++ b/scripts/autotest_common.sh @@ -23,6 +23,10 @@ case `uname` in ;; esac +if [ -f /usr/include/infiniband/verbs.h ]; then + MAKECONFIG="$MAKECONFIG CONFIG_NVMF=y" +fi + if [ -z "$output_dir" ]; then if [ -z "$rootdir" ] || [ ! -d "$rootdir/../output" ]; then output_dir=.