diff --git a/app/iscsi_tgt/Makefile b/app/iscsi_tgt/Makefile index c077c46cf2..04766fec85 100644 --- a/app/iscsi_tgt/Makefile +++ b/app/iscsi_tgt/Makefile @@ -63,6 +63,9 @@ SPDK_LIBS = \ $(SPDK_ROOT_DIR)/build/lib/libspdk_event.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_app_rpc.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif LIBS += -Wl,--whole-archive $(SPDK_LIBS) -Wl,--no-whole-archive LIBS += -lcrypto $(ENV_LINKER_ARGS) LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \ diff --git a/autotest.sh b/autotest.sh index 550e9f4c68..67985809ab 100755 --- a/autotest.sh +++ b/autotest.sh @@ -113,6 +113,7 @@ run_test test/nvmf/fio/fio.sh run_test test/nvmf/filesystem/filesystem.sh run_test test/nvmf/discovery/discovery.sh run_test test/nvmf/nvme_cli/nvme_cli.sh +run_test test/nvmf/host/identify.sh timing_exit nvmf diff --git a/examples/nvme/arbitration/Makefile b/examples/nvme/arbitration/Makefile index a3c6c93fbb..5e16241df8 100644 --- a/examples/nvme/arbitration/Makefile +++ b/examples/nvme/arbitration/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/examples/nvme/fio_plugin/Makefile b/examples/nvme/fio_plugin/Makefile index 64384cd9cc..44d0999bf3 100644 --- a/examples/nvme/fio_plugin/Makefile +++ b/examples/nvme/fio_plugin/Makefile @@ -48,6 +48,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/examples/nvme/hello_world/Makefile b/examples/nvme/hello_world/Makefile index f097134c5b..9799fc23fc 100644 --- a/examples/nvme/hello_world/Makefile +++ b/examples/nvme/hello_world/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/examples/nvme/identify/Makefile b/examples/nvme/identify/Makefile index 66a293bb66..d9e675a660 100644 --- a/examples/nvme/identify/Makefile +++ b/examples/nvme/identify/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/examples/nvme/identify/identify.c b/examples/nvme/identify/identify.c index 3129a4bbd0..1a32690c84 100644 --- a/examples/nvme/identify/identify.c +++ b/examples/nvme/identify/identify.c @@ -36,10 +36,12 @@ #include #include #include +#include #include #include +#include "spdk/log.h" #include "spdk/nvme.h" #include "spdk/env.h" #include "spdk/nvme_intel.h" @@ -66,6 +68,8 @@ static struct spdk_nvme_intel_marketing_description_page intel_md_page; static bool g_hex_dump = false; +static struct spdk_nvme_discover_info info; + static void hex_dump(const void *data, size_t size) { @@ -845,26 +849,73 @@ usage(const char *program_name) { printf("%s [options]", program_name); printf("\n"); + printf("\t-x print hex dump of raw data\n"); printf("options:\n"); - printf(" -x print hex dump of raw data\n"); + printf("\t-a addr address for nvmf target\n"); + printf("\t-s service service id for nvmf target\n"); + printf("\t-n nqn nqn for nvmf target\n"); + + spdk_tracelog_usage(stdout, "-t"); + + printf("\t-v - verbose (enable warnings)\n"); + printf("\t-H - show this usage\n"); } static int parse_args(int argc, char **argv) { - int op; + int op, rc; - while ((op = getopt(argc, argv, "x")) != -1) { + while ((op = getopt(argc, argv, "a:n:s:t:x:H")) != -1) { switch (op) { case 'x': g_hex_dump = true; break; + case 't': + rc = spdk_log_set_trace_flag(optarg); + if (rc < 0) { + fprintf(stderr, "unknown flag\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } +#ifndef DEBUG + fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -t flag.\n", + argv[0]); + usage(argv[0]); + return 0; +#endif + break; + case 'a': + info.traddr = optarg; + break; + case 's': + info.trsvcid = optarg; + break; + case 'n': + info.nqn = optarg; + break; + case 'H': default: usage(argv[0]); return 1; } } + if (!info.traddr || !info.trsvcid || !info.nqn) { + return 0; + } + + if ((strlen(info.traddr) > 255)) { + printf("The string len of traddr should <= 255\n"); + return 0; + } + + if ((strlen(info.nqn) > 223)) { + printf("The string len of nqn should <= 223\n"); + return 0; + } + + info.type = SPDK_NVME_TRANSPORT_RDMA; optind = 1; return 0; @@ -889,6 +940,7 @@ static const char *ealargs[] = { "identify", "-c 0x1", "-n 4", + "-m 512", "--proc-type=auto", }; @@ -898,6 +950,7 @@ int main(int argc, char **argv) rc = parse_args(argc, argv); if (rc != 0) { + printf("parse_args error\n"); return rc; } @@ -910,6 +963,12 @@ int main(int argc, char **argv) } rc = 0; + if (info.type == SPDK_NVME_TRANSPORT_RDMA) { + if (spdk_nvme_discover(&info, NULL, probe_cb, attach_cb, NULL) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + } + } + if (spdk_nvme_probe(NULL, probe_cb, attach_cb, NULL) != 0) { fprintf(stderr, "spdk_nvme_probe() failed\n"); rc = 1; diff --git a/examples/nvme/nvme_manage/Makefile b/examples/nvme/nvme_manage/Makefile index 0ef5bdae05..5749662ef8 100644 --- a/examples/nvme/nvme_manage/Makefile +++ b/examples/nvme/nvme_manage/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/examples/nvme/perf/Makefile b/examples/nvme/perf/Makefile index a9fa8c5ebe..028767673e 100644 --- a/examples/nvme/perf/Makefile +++ b/examples/nvme/perf/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) ifeq ($(OS),Linux) diff --git a/examples/nvme/reserve/Makefile b/examples/nvme/reserve/Makefile index 99de1152ff..9085b12654 100644 --- a/examples/nvme/reserve/Makefile +++ b/examples/nvme/reserve/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index 07bb22ba4a..5ef8cc9a5d 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -86,6 +86,47 @@ struct spdk_nvme_ctrlr_opts { * are sent. */ uint32_t keep_alive_timeout_ms; + /** + * Specify the retry number when there is issue with the transport + */ + int transport_retry_count; + /** + * The queue depth of each NVMe I/O queue. + */ + int queue_size; +}; + +/** + * Define the NVMe transport type + */ +enum spdk_nvme_transport_type { + SPDK_NVME_TRANSPORT_PCIE, + SPDK_NVME_TRANSPORT_RDMA, +}; + +/** + * + * A pointer to this structure will be provided for connecting remote NVMe controller. + */ +struct spdk_nvme_discover_info { + /** + * Specify the NVMe transport type; + */ + enum spdk_nvme_transport_type type; + /** + * Subsystem NQN to be connected + */ + const char *nqn; + /** + * Transport address of the NVMe over fabrics target. For transports which uses IP + * addressing (e.g. rdma), this should be an IP-based address. + */ + const char *traddr; + /** + * Specifiy the transport service identifier. For transports which uses IP addressing + * (e.g. rdma), this field shoud be the port number. + */ + const char *trsvcid; }; /** @@ -105,6 +146,21 @@ struct spdk_nvme_probe_info { * If not available, each field will be filled with all 0xFs. */ struct spdk_pci_id pci_id; + + /** + * Subsystem NQN which is newly discovered + */ + const char *nqn; + /** + * Transport address of the NVMe over fabrics target. For transports which uses IP + * addressing (e.g. rdma), this should be an IP-based address. + */ + const char *traddr; + /** + * Specifiy the transport service identifier. For transports which uses IP addressing + * (e.g. rdma), this field shoud be the port number. + */ + const char *trsvcid; }; /** @@ -137,6 +193,24 @@ typedef void (*spdk_nvme_attach_cb)(void *cb_ctx, const struct spdk_nvme_probe_i */ typedef void (*spdk_nvme_remove_cb)(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); +/** + * \brief discover the remote Controller via NVMe over fabrics protocol + * + * \param cb_ctx Opaque value which will be passed back in cb_ctx parameter of the callbacks. + * \param info which specifies the info used to discover the NVMe over fabrics target. + * \param probe_cb will be called once per NVMe device found in the system. + * \param attach_cb will be called for devices for which probe_cb returned true once that NVMe + * controller has been attached to the userspace driver. + * \param remove_cb will be called for devices that were attached in a previous spdk_nvme_probe() + * call but are no longer attached to the system. Optional; specify NULL if removal notices are not + * desired. + * + */ +int spdk_nvme_discover(const struct spdk_nvme_discover_info *info, + void *cb_ctx, spdk_nvme_probe_cb probe_cb, + spdk_nvme_attach_cb attach_cb, + spdk_nvme_remove_cb remove_cb); + /** * \brief Enumerate the NVMe devices attached to the system and attach the userspace NVMe driver * to them if desired. diff --git a/include/spdk/nvmf_spec.h b/include/spdk/nvmf_spec.h index 27d4a424b0..048780633d 100644 --- a/include/spdk/nvmf_spec.h +++ b/include/spdk/nvmf_spec.h @@ -46,6 +46,9 @@ #pragma pack(push, 1) +/* Minimum number of admin queue entries defined by NVMe over Fabrics spec */ +#define SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES 32 + struct spdk_nvmf_capsule_cmd { uint8_t opcode; uint8_t reserved1; diff --git a/lib/nvme/Makefile b/lib/nvme/Makefile index 3232b32a97..75bdb10b55 100644 --- a/lib/nvme/Makefile +++ b/lib/nvme/Makefile @@ -36,6 +36,7 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk CFLAGS += $(ENV_CFLAGS) C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_ns_cmd.c nvme_ns.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c +C_SRCS-$(CONFIG_RDMA) += nvme_rdma.c LIBNAME = nvme include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c index fe450f6fa9..963f0f1293 100644 --- a/lib/nvme/nvme.c +++ b/lib/nvme/nvme.c @@ -31,6 +31,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "spdk/nvmf_spec.h" #include "nvme_internal.h" struct nvme_driver _g_nvme_driver = { @@ -45,14 +46,11 @@ struct nvme_driver *g_spdk_nvme_driver = &_g_nvme_driver; int32_t spdk_nvme_retry_count; -static struct spdk_nvme_ctrlr * -nvme_attach(void *devhandle) +struct spdk_nvme_ctrlr * +nvme_attach(enum spdk_nvme_transport_type transport, void *devhandle) { - enum spdk_nvme_transport transport; struct spdk_nvme_ctrlr *ctrlr; - transport = SPDK_NVME_TRANSPORT_PCIE; - ctrlr = nvme_transport_ctrlr_construct(transport, devhandle); return ctrlr; @@ -230,22 +228,12 @@ nvme_mutex_init_shared(pthread_mutex_t *mtx) return rc; } -struct nvme_enum_ctx { - spdk_nvme_probe_cb probe_cb; - void *cb_ctx; -}; - -/* This function must only be called while holding g_spdk_nvme_driver->lock */ static int -nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) +nvme_enum_cb(enum spdk_nvme_transport_type type, struct nvme_enum_usr_ctx *enum_ctx, + struct spdk_nvme_probe_info *probe_info, void *devhandle) { - struct nvme_enum_ctx *enum_ctx = ctx; struct spdk_nvme_ctrlr *ctrlr; struct spdk_nvme_ctrlr_opts opts; - struct spdk_nvme_probe_info probe_info; - - probe_info.pci_addr = spdk_pci_device_get_addr(pci_dev); - probe_info.pci_id = spdk_pci_device_get_id(pci_dev); /* Verify that this controller is not already attached */ TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->attached_ctrlrs, tailq) { @@ -253,22 +241,26 @@ nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) * different per each process, we compare by BDF to determine whether it is the * same controller. */ - if (spdk_pci_addr_compare(&probe_info.pci_addr, &ctrlr->probe_info.pci_addr) == 0) { - return 0; + if (type == SPDK_NVME_TRANSPORT_PCIE) { + if (spdk_pci_addr_compare(&probe_info->pci_addr, &ctrlr->probe_info.pci_addr) == 0) { + return 0; + } } + + /* Todo: need to differentiate the NVMe over fabrics to avoid duplicated connection */ } spdk_nvme_ctrlr_opts_set_defaults(&opts); - if (enum_ctx->probe_cb(enum_ctx->cb_ctx, &probe_info, &opts)) { - ctrlr = nvme_attach(pci_dev); + if (enum_ctx->probe_cb(enum_ctx->cb_ctx, probe_info, &opts)) { + ctrlr = nvme_attach(type, devhandle); if (ctrlr == NULL) { SPDK_ERRLOG("nvme_attach() failed\n"); return -1; } ctrlr->opts = opts; - ctrlr->probe_info = probe_info; + ctrlr->probe_info = *probe_info; TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->init_ctrlrs, ctrlr, tailq); } @@ -276,13 +268,15 @@ nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) return 0; } -int -spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb, - spdk_nvme_remove_cb remove_cb) +static int +_spdk_nvme_probe(const struct spdk_nvme_discover_info *info, void *cb_ctx, + spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb, + spdk_nvme_remove_cb remove_cb) { int rc, start_rc; struct nvme_enum_ctx enum_ctx; struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp; + enum spdk_nvme_transport_type type; if (!spdk_process_is_primary()) { while (g_spdk_nvme_driver->initialized == false) { @@ -302,10 +296,17 @@ spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb a } } - enum_ctx.probe_cb = probe_cb; - enum_ctx.cb_ctx = cb_ctx; + enum_ctx.usr_ctx.probe_cb = probe_cb; + enum_ctx.usr_ctx.cb_ctx = cb_ctx; + enum_ctx.enum_cb = nvme_enum_cb; + if (!info) { + type = SPDK_NVME_TRANSPORT_PCIE; + } else { + type = info->type; + } + + rc = nvme_transport_ctrlr_scan(type, &enum_ctx, (void *)info); - rc = spdk_pci_enumerate(SPDK_PCI_DEVICE_NVME, nvme_enum_cb, &enum_ctx); /* * Keep going even if one or more nvme_attach() calls failed, * but maintain the value of rc to signal errors when we return. @@ -368,4 +369,23 @@ spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb a return rc; } +int spdk_nvme_discover(const struct spdk_nvme_discover_info *info, void *cb_ctx, + spdk_nvme_probe_cb probe_cb, + spdk_nvme_attach_cb attach_cb, + spdk_nvme_remove_cb remove_cb) +{ + if (!info || !info->traddr || !info->trsvcid || !info->nqn) { + return -1; + } + + return _spdk_nvme_probe(info, cb_ctx, probe_cb, attach_cb, remove_cb); +} + +int +spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb, + spdk_nvme_remove_cb remove_cb) +{ + return _spdk_nvme_probe(NULL, cb_ctx, probe_cb, attach_cb, remove_cb); +} + SPDK_LOG_REGISTER_TRACE_FLAG("nvme", SPDK_TRACE_NVME) diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 463b78586c..065290a4ca 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -80,6 +80,7 @@ spdk_nvme_ctrlr_opts_set_defaults(struct spdk_nvme_ctrlr_opts *opts) opts->use_cmb_sqs = false; opts->arb_mechanism = SPDK_NVME_CC_AMS_RR; opts->keep_alive_timeout_ms = 10 * 1000; + opts->queue_size = DEFAULT_MAX_QUEUE_SIZE; } struct spdk_nvme_qpair * @@ -784,7 +785,8 @@ nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr) } if (spdk_nvme_cpl_is_error(&status.cpl)) { SPDK_ERRLOG("nvme_ctrlr_cmd_set_async_event_config failed!\n"); - return -ENXIO; + /* change the return value since NVMf target does not suppport aer, should be fixed later*/ + return 0; } /* aerl is a zero-based value, so we need to add 1 here. */ diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index 588fa9bb68..b3f83fcd21 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -56,6 +56,7 @@ #include "spdk/mmio.h" #include "spdk/pci_ids.h" #include "spdk/nvme_intel.h" +#include "spdk/nvmf_spec.h" #include "spdk_internal/log.h" @@ -91,6 +92,7 @@ * try to configure, if available. */ #define DEFAULT_MAX_IO_QUEUES (1024) +#define DEFAULT_MAX_QUEUE_SIZE (256) enum nvme_payload_type { NVME_PAYLOAD_TYPE_INVALID = 0, @@ -218,10 +220,6 @@ struct nvme_request { void *user_buffer; }; -enum spdk_nvme_transport { - SPDK_NVME_TRANSPORT_PCIE, -}; - struct nvme_completion_poll_status { struct spdk_nvme_cpl cpl; bool done; @@ -236,7 +234,7 @@ struct nvme_async_event_request { struct spdk_nvme_qpair { STAILQ_HEAD(, nvme_request) queued_req; - enum spdk_nvme_transport transport; + enum spdk_nvme_transport_type transport; uint16_t id; @@ -325,7 +323,7 @@ struct spdk_nvme_ctrlr { /** Array of namespaces indexed by nsid - 1 */ struct spdk_nvme_ns *ns; - enum spdk_nvme_transport transport; + enum spdk_nvme_transport_type transport; uint32_t num_ns; @@ -524,11 +522,27 @@ int nvme_mutex_init_recursive_shared(pthread_mutex_t *mtx); bool nvme_completion_is_retry(const struct spdk_nvme_cpl *cpl); void nvme_qpair_print_command(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cmd *cmd); void nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cpl *cpl); +struct spdk_nvme_ctrlr *nvme_attach(enum spdk_nvme_transport_type transport, void *devhandle); + +struct nvme_enum_usr_ctx { + spdk_nvme_probe_cb probe_cb; + void *cb_ctx; +}; + +typedef int (*nvme_ctrlr_enum_cb)(enum spdk_nvme_transport_type type, + struct nvme_enum_usr_ctx *enum_usr_ctx, + struct spdk_nvme_probe_info *probe_info, void *devhandle); + +struct nvme_enum_ctx { + struct nvme_enum_usr_ctx usr_ctx; + nvme_ctrlr_enum_cb enum_cb; +}; /* Transport specific functions */ #define DECLARE_TRANSPORT(name) \ - struct spdk_nvme_ctrlr *nvme_ ## name ## _ctrlr_construct(enum spdk_nvme_transport transport, void *devhandle); \ + struct spdk_nvme_ctrlr *nvme_ ## name ## _ctrlr_construct(enum spdk_nvme_transport_type transport, void *devhandle); \ int nvme_ ## name ## _ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); \ + int nvme_ ## name ## _ctrlr_scan(enum spdk_nvme_transport_type transport, struct nvme_enum_ctx *enum_ctx, void *devhandle); \ int nvme_ ## name ## _ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr); \ int nvme_ ## name ## _ctrlr_get_pci_id(struct spdk_nvme_ctrlr *ctrlr, struct spdk_pci_id *pci_id); \ int nvme_ ## name ## _ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); \ @@ -550,6 +564,9 @@ void nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair, struct spdk_nvme DECLARE_TRANSPORT(transport) /* generic transport dispatch functions */ DECLARE_TRANSPORT(pcie) +#ifdef SPDK_CONFIG_RDMA +DECLARE_TRANSPORT(rdma) +#endif #undef DECLARE_TRANSPORT diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c index c69e7dc258..09f45eac44 100644 --- a/lib/nvme/nvme_pcie.c +++ b/lib/nvme/nvme_pcie.c @@ -426,7 +426,28 @@ nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr) SPDK_NVME_QPRIO_URGENT); } -struct spdk_nvme_ctrlr *nvme_pcie_ctrlr_construct(enum spdk_nvme_transport transport, +/* This function must only be called while holding g_spdk_nvme_driver->lock */ +static int +pcie_nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) +{ + struct spdk_nvme_probe_info probe_info = {}; + struct nvme_enum_ctx *enum_ctx = ctx; + + probe_info.pci_addr = spdk_pci_device_get_addr(pci_dev); + probe_info.pci_id = spdk_pci_device_get_id(pci_dev); + + return enum_ctx->enum_cb(SPDK_NVME_TRANSPORT_PCIE, &enum_ctx->usr_ctx, &probe_info, + (void *)pci_dev); +} + +int +nvme_pcie_ctrlr_scan(enum spdk_nvme_transport_type transport, + struct nvme_enum_ctx *enum_ctx, void *devhandle) +{ + return spdk_pci_enumerate(SPDK_PCI_DEVICE_NVME, pcie_nvme_enum_cb, enum_ctx); +} + +struct spdk_nvme_ctrlr *nvme_pcie_ctrlr_construct(enum spdk_nvme_transport_type transport, void *devhandle) { struct spdk_pci_device *pci_dev = devhandle; diff --git a/lib/nvme/nvme_rdma.c b/lib/nvme/nvme_rdma.c new file mode 100644 index 0000000000..97d1af5dd8 --- /dev/null +++ b/lib/nvme/nvme_rdma.c @@ -0,0 +1,1462 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NVMe over RDMA transport + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spdk/assert.h" +#include "spdk/log.h" +#include "spdk/trace.h" +#include "spdk/event.h" +#include "spdk/queue.h" +#include "spdk/nvme.h" +#include "spdk/nvmf_spec.h" + +#include "nvme_internal.h" + +#define NVME_RDMA_TIME_OUT_IN_MS 2000 +#define NVME_RDMA_RW_BUFFER_SIZE 131072 +#define NVME_HOST_ID_DEFAULT "12345679890" +#define NVME_HOST_NQN "nqn.2016-06.io.spdk:host" + +/* +NVME RDMA qpair Resouce Defaults + */ +#define NVME_RDMA_DEFAULT_TX_SGE 2 +#define NVME_RDMA_DEFAULT_RX_SGE 1 + +typedef void (*spdk_nvme_rdma_req_cb)(struct nvme_request *req, + struct spdk_nvme_cpl *rsp); + +/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */ +struct nvme_rdma_ctrlr { + struct spdk_nvme_ctrlr ctrlr; + + uint16_t cntlid; + + struct spdk_nvme_discover_info info; +}; + +/* NVMe RDMA qpair extensions for spdk_nvme_qpair */ +struct nvme_rdma_qpair { + struct spdk_nvme_qpair qpair; + + uint16_t outstanding_reqs; + + struct rdma_event_channel *cm_channel; + + struct rdma_cm_id *cm_id; + + uint16_t max_queue_depth; + + struct spdk_nvme_rdma_req *rdma_reqs; + + struct spdk_nvme_rdma_rsp *rdma_rsps; + + STAILQ_HEAD(, spdk_nvme_rdma_req) free_reqs; +}; + +struct spdk_nvme_rdma_req { + int id; + + struct nvme_request *req; + + enum spdk_nvme_data_transfer xfer; + + struct nvme_rdma_qpair *rqpair; + + spdk_nvme_rdma_req_cb cb; + + struct spdk_nvme_cmd cmd; + + struct ibv_mr *cmd_mr; + + struct ibv_sge send_sgl; + + struct ibv_sge bb_sgl; + + struct ibv_mr *bb_mr; + + uint8_t *bb; + + uint32_t bb_len; + + STAILQ_ENTRY(spdk_nvme_rdma_req) link; +}; + +struct spdk_nvme_rdma_rsp { + struct spdk_nvme_cpl rsp; + + struct ibv_mr *rsp_mr; + + struct ibv_sge recv_sgl; +}; + +static inline struct nvme_rdma_qpair * +nvme_rdma_qpair(struct spdk_nvme_qpair *qpair) +{ + assert(qpair->transport == SPDK_NVME_TRANSPORT_RDMA); + return (struct nvme_rdma_qpair *)((uintptr_t)qpair - offsetof(struct nvme_rdma_qpair, qpair)); +} + +static inline struct nvme_rdma_ctrlr * +nvme_rdma_ctrlr(struct spdk_nvme_ctrlr *ctrlr) +{ + assert(ctrlr->transport == SPDK_NVME_TRANSPORT_RDMA); + return (struct nvme_rdma_ctrlr *)((uintptr_t)ctrlr - offsetof(struct nvme_rdma_ctrlr, ctrlr)); +} + +static struct spdk_nvme_rdma_req * +nvme_rdma_req_get(struct nvme_rdma_qpair *rqpair) +{ + struct spdk_nvme_rdma_req *rdma_req; + + if (!rqpair || STAILQ_EMPTY(&rqpair->free_reqs)) { + return NULL; + } + + rdma_req = STAILQ_FIRST(&rqpair->free_reqs); + STAILQ_REMOVE(&rqpair->free_reqs, rdma_req, spdk_nvme_rdma_req, link); + + rqpair->outstanding_reqs++; + return rdma_req; +} + +static void +nvme_rdma_req_put(struct spdk_nvme_rdma_req *rdma_req) +{ + struct nvme_rdma_qpair *rqpair; + + if (!rdma_req) { + return; + } + + rqpair = rdma_req->rqpair; + if (!rqpair) { + return; + } + + if (rqpair->outstanding_reqs) { + rqpair->outstanding_reqs--; + STAILQ_INSERT_HEAD(&rqpair->free_reqs, rdma_req, link); + } else { + SPDK_ERRLOG("There is no outstanding IOs\n"); + } +} + +static void +nvme_rdma_req_complete(struct nvme_request *req, + struct spdk_nvme_cpl *rsp) +{ + req->cb_fn(req->cb_arg, rsp); + nvme_free_request(req); +} + +static int +nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair) +{ + int rc; + struct ibv_qp_init_attr attr; + + rqpair->max_queue_depth = rqpair->qpair.num_entries; + + SPDK_NOTICELOG("rqpair depth =%d\n", rqpair->max_queue_depth); + memset(&attr, 0, sizeof(struct ibv_qp_init_attr)); + attr.qp_type = IBV_QPT_RC; + attr.cap.max_send_wr = rqpair->max_queue_depth; /* SEND operations */ + attr.cap.max_recv_wr = rqpair->max_queue_depth; /* RECV operations */ + attr.cap.max_send_sge = NVME_RDMA_DEFAULT_TX_SGE; + attr.cap.max_recv_sge = NVME_RDMA_DEFAULT_RX_SGE; + + rc = rdma_create_qp(rqpair->cm_id, NULL, &attr); + if (rc) { + SPDK_ERRLOG("rdma_create_qp failed\n"); + return -1; + } + + rc = fcntl(rqpair->cm_id->send_cq_channel->fd, F_SETFL, O_NONBLOCK); + if (rc < 0) { + SPDK_ERRLOG("fcntl to set comp channel to non-blocking failed\n"); + return -1; + } + + rc = fcntl(rqpair->cm_id->recv_cq_channel->fd, F_SETFL, O_NONBLOCK); + if (rc < 0) { + SPDK_ERRLOG("fcntl to set comp channel to non-blocking failed\n"); + return -1; + } + + rqpair->cm_id->context = &rqpair->qpair; + + return 0; +} + +static int +nvme_rdma_pre_copy_mem(struct spdk_nvme_rdma_req *rdma_req) +{ + struct spdk_nvme_sgl_descriptor *nvme_sgl; + void *address; + + assert(rdma_req->bb_mr != NULL); + assert(rdma_req->bb != NULL); + + nvme_sgl = &rdma_req->req->cmd.dptr.sgl1; + address = (void *)nvme_sgl->address; + + if (address != NULL) { + rdma_req->cmd.dptr.sgl1.address = (uint64_t)rdma_req->bb; + if (rdma_req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER || + rdma_req->xfer == SPDK_NVME_DATA_BIDIRECTIONAL) { + memcpy(rdma_req->bb, address, nvme_sgl->keyed.length); + } + } + + nvme_sgl = &rdma_req->cmd.dptr.sgl1; + nvme_sgl->keyed.key = rdma_req->bb_sgl.lkey; + + return 0; +} + +static void +nvme_rdma_post_copy_mem(struct spdk_nvme_rdma_req *rdma_req) +{ + struct spdk_nvme_sgl_descriptor *nvme_sgl; + void *address; + + assert(rdma_req != NULL); + assert(rdma_req->req != NULL); + + nvme_sgl = &rdma_req->req->cmd.dptr.sgl1; + address = (void *)nvme_sgl->address; + + if ((address != NULL) && + (rdma_req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST || + rdma_req->xfer == SPDK_NVME_DATA_BIDIRECTIONAL)) { + memcpy(address, rdma_req->bb, nvme_sgl->keyed.length); + } +} + +static void +nvme_rdma_trace_ibv_sge(struct ibv_sge *sg_list) +{ + if (sg_list) { + SPDK_NOTICELOG("local addr %p length 0x%x lkey 0x%x\n", + (void *)sg_list->addr, sg_list->length, sg_list->lkey); + } +} + +static void +nvme_rdma_ibv_send_wr_init(struct ibv_send_wr *wr, + struct spdk_nvme_rdma_req *req, + struct ibv_sge *sg_list, + uint64_t wr_id, + enum ibv_wr_opcode opcode, + int send_flags) +{ + if (!wr || !sg_list) { + return; + } + + memset(wr, 0, sizeof(*wr)); + wr->wr_id = wr_id; + wr->next = NULL; + wr->opcode = opcode; + wr->send_flags = send_flags; + wr->sg_list = sg_list; + wr->num_sge = 1; + + if (req != NULL) { + struct spdk_nvme_sgl_descriptor *sgl = &req->cmd.dptr.sgl1; + + wr->wr.rdma.rkey = sgl->keyed.key; + wr->wr.rdma.remote_addr = sgl->address; + + SPDK_NOTICELOG("rkey %x remote_addr %p\n", + wr->wr.rdma.rkey, (void *)wr->wr.rdma.remote_addr); + } + + nvme_rdma_trace_ibv_sge(wr->sg_list); +} + +static int +nvme_rdma_post_recv(struct nvme_rdma_qpair *rqpair, + struct spdk_nvme_rdma_rsp *rsp) +{ + struct ibv_recv_wr wr, *bad_wr = NULL; + int rc; + + wr.wr_id = (uintptr_t)rsp; + wr.next = NULL; + wr.sg_list = &rsp->recv_sgl; + wr.num_sge = 1; + + nvme_rdma_trace_ibv_sge(&rsp->recv_sgl); + + rc = ibv_post_recv(rqpair->cm_id->qp, &wr, &bad_wr); + if (rc) { + SPDK_ERRLOG("Failure posting rdma recv, rc = 0x%x\n", rc); + } + + return rc; +} + +static struct spdk_nvme_rdma_rsp * +config_rdma_rsp(struct nvme_rdma_qpair *rqpair, int i) +{ + struct spdk_nvme_rdma_rsp *rdma_rsp; + + rdma_rsp = &rqpair->rdma_rsps[i]; + if (!rdma_rsp) { + return NULL; + } + + rdma_rsp->rsp_mr = rdma_reg_msgs(rqpair->cm_id, &rqpair->rdma_rsps[i].rsp, + sizeof(rqpair->rdma_rsps[i].rsp)); + if (rdma_rsp->rsp_mr == NULL) { + SPDK_ERRLOG("Unable to register rsp_mr\n"); + return NULL; + } + + /* initialize recv_sgl */ + rdma_rsp->recv_sgl.addr = (uint64_t)&rqpair->rdma_rsps[i].rsp; + rdma_rsp->recv_sgl.length = sizeof(rqpair->rdma_rsps[i].rsp); + rdma_rsp->recv_sgl.lkey = rdma_rsp->rsp_mr->lkey; + + return rdma_rsp; +} + +static void +nvme_rdma_free_rsps(struct nvme_rdma_qpair *rqpair) +{ + struct spdk_nvme_rdma_rsp *rdma_rsp; + int i; + + if (!rqpair->rdma_rsps) { + return; + } + + for (i = 0; i < rqpair->max_queue_depth; i++) { + rdma_rsp = &rqpair->rdma_rsps[i]; + if (rdma_rsp->rsp_mr && rdma_dereg_mr(rdma_rsp->rsp_mr)) { + SPDK_ERRLOG("Unable to de-register rsp_mr\n"); + } + } + + free(rqpair->rdma_rsps); +} + +static int +nvme_rdma_alloc_rsps(struct nvme_rdma_qpair *rqpair) +{ + struct spdk_nvme_rdma_rsp *rdma_rsp; + int i; + + rqpair->rdma_rsps = calloc(rqpair->max_queue_depth, sizeof(struct spdk_nvme_rdma_rsp)); + + if (!rqpair->rdma_rsps) { + SPDK_ERRLOG("can not allocate rdma rsps\n"); + return -1; + } + + for (i = 0; i < rqpair->max_queue_depth; i++) { + rdma_rsp = config_rdma_rsp(rqpair, i); + if (rdma_rsp == NULL) { + goto fail; + } + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "rdma_rsq %p: rsp %p\n", + rdma_rsp, &rdma_rsp->rsp); + + if (nvme_rdma_post_recv(rqpair, rdma_rsp)) { + SPDK_ERRLOG("Unable to post connection rx desc\n"); + goto fail; + } + } + + return 0; +fail: + nvme_rdma_free_rsps(rqpair); + return -ENOMEM; +} + +static struct spdk_nvme_rdma_req * +config_rdma_req(struct nvme_rdma_qpair *rqpair, int i) +{ + struct spdk_nvme_rdma_req *rdma_req; + + rdma_req = &rqpair->rdma_reqs[i]; + + if (!rdma_req) { + return NULL; + } + + rdma_req->cmd_mr = rdma_reg_msgs(rqpair->cm_id, &rdma_req->cmd, + sizeof(rdma_req->cmd)); + + if (!rdma_req->cmd_mr) { + SPDK_ERRLOG("Unable to register cmd_mr\n"); + return NULL; + } + + /* initialize send_sgl */ + rdma_req->send_sgl.addr = (uint64_t)&rdma_req->cmd; + rdma_req->send_sgl.length = sizeof(rdma_req->cmd); + rdma_req->send_sgl.lkey = rdma_req->cmd_mr->lkey; + + rdma_req->bb = calloc(1, NVME_RDMA_RW_BUFFER_SIZE); + if (!rdma_req->bb) { + SPDK_ERRLOG("Unable to register allocate read/write buffer\n"); + return NULL; + } + + rdma_req->bb_len = NVME_RDMA_RW_BUFFER_SIZE; + rdma_req->bb_mr = ibv_reg_mr(rqpair->cm_id->qp->pd, rdma_req->bb, rdma_req->bb_len, + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_WRITE); + + if (!rdma_req->bb_mr) { + SPDK_ERRLOG("Unable to register bb_mr\n"); + return NULL; + } + + /* initialize bb_sgl */ + rdma_req->bb_sgl.addr = (uint64_t)rdma_req->bb; + rdma_req->bb_sgl.length = rdma_req->bb_len; + rdma_req->bb_sgl.lkey = rdma_req->bb_mr->lkey; + + return rdma_req; +} + +static void +nvme_rdma_free_reqs(struct nvme_rdma_qpair *rqpair) +{ + struct spdk_nvme_rdma_req *rdma_req; + int i; + + if (!rqpair->rdma_reqs) { + return; + } + + for (i = 0; i < rqpair->max_queue_depth; i++) { + rdma_req = &rqpair->rdma_reqs[i]; + if (rdma_req->cmd_mr && rdma_dereg_mr(rdma_req->cmd_mr)) { + SPDK_ERRLOG("Unable to de-register cmd_mr\n"); + } + if (rdma_req->bb_mr && ibv_dereg_mr(rdma_req->bb_mr)) { + SPDK_ERRLOG("Unable to de-register bb_mr\n"); + } + + if (rdma_req->bb) { + free(rdma_req->bb); + } + } + + free(rqpair->rdma_reqs); +} + +static int +nvme_rdma_alloc_reqs(struct nvme_rdma_qpair *rqpair) +{ + struct spdk_nvme_rdma_req *rdma_req; + int i; + + for (i = 0; i < rqpair->max_queue_depth; i++) { + rdma_req = config_rdma_req(rqpair, i); + if (rdma_req == NULL) { + goto fail; + } + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "rdma_req %p: cmd %p\n", + rdma_req, &rdma_req->cmd); + } + + return 0; + +fail: + nvme_rdma_free_reqs(rqpair); + return -ENOMEM; +} + +static int +nvme_rdma_recv(struct nvme_rdma_qpair *rqpair, struct ibv_wc *wc) +{ + struct spdk_nvme_rdma_req *rdma_req; + struct spdk_nvme_rdma_rsp *rdma_rsp; + struct nvme_request *req; + + rdma_rsp = (struct spdk_nvme_rdma_rsp *)wc->wr_id; + + if (wc->byte_len < sizeof(struct spdk_nvmf_fabric_connect_rsp)) { + SPDK_ERRLOG("recv length %u less than capsule header\n", wc->byte_len); + return -1; + } + + rdma_req = &rqpair->rdma_reqs[rdma_rsp->rsp.cid]; + + nvme_rdma_post_copy_mem(rdma_req); + req = rdma_req->req; + rdma_req->cb(req, &rdma_rsp->rsp); + nvme_rdma_req_put(rdma_req); + + if (nvme_rdma_post_recv(rqpair, rdma_rsp)) { + SPDK_ERRLOG("Unable to re-post rx descriptor\n"); + return -1; + } + + return 0; +} + +static int +nvme_rdma_bind_addr(struct nvme_rdma_qpair *rqpair, + struct sockaddr_storage *sin, + struct rdma_event_channel *cm_channel) +{ + int ret; + struct rdma_cm_event *event; + + ret = rdma_resolve_addr(rqpair->cm_id, NULL, (struct sockaddr *) sin, + NVME_RDMA_TIME_OUT_IN_MS); + if (ret) { + SPDK_ERRLOG("rdma_resolve_addr, %d\n", errno); + return ret; + } + + ret = rdma_get_cm_event(cm_channel, &event); + if (ret) { + SPDK_ERRLOG("rdma address resolution error\n"); + return ret; + } + if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) { + return -1; + } + rdma_ack_cm_event(event); + + + ret = rdma_resolve_route(rqpair->cm_id, NVME_RDMA_TIME_OUT_IN_MS); + if (ret) { + SPDK_ERRLOG("rdma_resolve_route\n"); + return ret; + } + ret = rdma_get_cm_event(cm_channel, &event); + if (ret) { + SPDK_ERRLOG("rdma address resolution error\n"); + return ret; + } + if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) { + SPDK_ERRLOG("rdma route resolution error\n"); + return -1; + } + rdma_ack_cm_event(event); + + SPDK_NOTICELOG("rdma_resolve_addr - rdma_resolve_route successful\n"); + return 0; +} + +static int +nvme_rdma_connect(struct nvme_rdma_qpair *rqpair) +{ + struct rdma_conn_param conn_param; + struct spdk_nvmf_rdma_request_private_data pdata; + const union spdk_nvmf_rdma_private_data *data; + struct rdma_cm_event *event; + int ret; + + memset(&conn_param, 0, sizeof(conn_param)); + /* Note: the following parameters apply only for PS = RDMA_PS_TCP, + and even then it appears that any values supplied here by host + application are over-written by the rdma_cm layer for the given + device. Verified at target side that private data arrived as + specified here, but the other param values either zeroed out or + replaced. + */ + conn_param.responder_resources = 1; /* 0 or 1*/ + conn_param.initiator_depth = rqpair->max_queue_depth; + conn_param.retry_count = 7; + conn_param.rnr_retry_count = 7; + + /* init private data for connect */ + memset(&pdata, 0, sizeof(pdata)); + pdata.qid = rqpair->qpair.id; + pdata.hrqsize = rqpair->max_queue_depth; + pdata.hsqsize = rqpair->max_queue_depth; + conn_param.private_data = &pdata; + conn_param.private_data_len = sizeof(pdata); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "qid =%d\n", pdata.qid); + + ret = rdma_connect(rqpair->cm_id, &conn_param); + if (ret) { + SPDK_ERRLOG("nvme rdma connect eror"); + return ret; + } + ret = rdma_get_cm_event(rqpair->cm_channel, &event); + if (ret) { + SPDK_ERRLOG("rdma address resolution error\n"); + return ret; + } + if (event->event != RDMA_CM_EVENT_ESTABLISHED) { + SPDK_ERRLOG("rdma connect error\n"); + return -1; + } + rdma_ack_cm_event(event); + + + /* Look for any rdma connection returned by server */ + data = event->param.conn.private_data; + + if (event->param.conn.private_data_len >= sizeof(union spdk_nvmf_rdma_private_data) && + data != NULL) { + if (data->pd_accept.recfmt != 0) { + SPDK_ERRLOG("NVMF fabric connect accept: invalid private data format!\n"); + } else { + SPDK_NOTICELOG("NVMF fabric connect accept, Private data length %d\n", + event->param.conn.private_data_len); + SPDK_NOTICELOG("NVMF fabric connect accept, RECFMT %d\n", + data->pd_accept.recfmt); + SPDK_NOTICELOG("NVMF fabric connect accept, CRQSIZE %d\n", + data->pd_accept.crqsize); + } + } + + SPDK_NOTICELOG("connect successful\n"); + return 0; +} + +static int +nvme_rdma_parse_ipaddr(struct sockaddr_in *sin, const char *addr) +{ + struct addrinfo *res; + int ret; + + if (addr == NULL) { + sin->sin_addr.s_addr = htonl(INADDR_ANY); + sin->sin_family = AF_INET; + return 0; + } + + ret = getaddrinfo(addr, NULL, NULL, &res); + if (ret) { + SPDK_ERRLOG("getaddrinfo failed - invalid hostname or IP address\n"); + return ret; + } + + *sin = *(struct sockaddr_in *) res->ai_addr; + + freeaddrinfo(res); + return ret; +} + +static int +nvmf_cm_construct(struct nvme_rdma_qpair *rqpair) +{ + /* create an event channel with rdmacm to receive + connection oriented requests and notifications */ + rqpair->cm_channel = rdma_create_event_channel(); + if (rqpair->cm_channel == NULL) { + SPDK_ERRLOG("rdma_create_event_channel() failed\n"); + return -1; + } + + return 0; +} + +static int +nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair) +{ + struct sockaddr_storage sin; + int rc; + struct nvme_rdma_ctrlr *rctrlr; + + rc = nvmf_cm_construct(rqpair); + if (rc < 0) { + return nvme_transport_qpair_destroy(&rqpair->qpair); + } + + rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr); + memset(&sin, 0, sizeof(struct sockaddr_storage)); + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "port is %s\n", rctrlr->info.trsvcid); + rc = nvme_rdma_parse_ipaddr((struct sockaddr_in *)&sin, rctrlr->info.traddr); + if (rc < 0) { + goto err; + } + + /* need to tranfer the host*/ + if (sin.ss_family == AF_INET) + ((struct sockaddr_in *) &sin)->sin_port = htons(atoi(rctrlr->info.trsvcid)); + else + ((struct sockaddr_in6 *) &sin)->sin6_port = htons(atoi(rctrlr->info.trsvcid)); + + rc = rdma_create_id(rqpair->cm_channel, &rqpair->cm_id, rqpair, RDMA_PS_TCP); + if (rc < 0) { + goto err; + } + + rc = nvme_rdma_bind_addr(rqpair, &sin, rqpair->cm_channel); + if (rc < 0) { + goto err; + } + + rc = nvme_rdma_qpair_init(rqpair); + if (rc < 0) { + goto err; + } + rc = nvme_rdma_alloc_reqs(rqpair); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "rc =%d\n", rc); + if (rc) { + SPDK_ERRLOG("Unable to allocate rqpair RDMA requests\n"); + goto err; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "RDMA requests allocated\n"); + + rc = nvme_rdma_alloc_rsps(rqpair); + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "rc =%d\n", rc); + if (rc < 0) { + SPDK_ERRLOG("Unable to allocate rqpair RDMA responses\n"); + goto err; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "RDMA responses allocated\n"); + + rc = nvme_rdma_connect(rqpair); + if (rc < 0) { + SPDK_ERRLOG("Unable to conduct the rqpair\n"); + goto err; + } + + return 0; +err: + return nvme_transport_qpair_destroy(&rqpair->qpair);; +} + +static struct spdk_nvme_rdma_req * +nvme_rdma_req_init(struct nvme_rdma_qpair *rqpair, struct nvme_request *req) +{ + struct spdk_nvme_rdma_req *rdma_req; + struct spdk_nvme_sgl_descriptor *nvme_sgl; + + if (!rqpair || !req) { + return NULL; + } + + rdma_req = nvme_rdma_req_get(rqpair); + if (!rdma_req) { + return NULL; + } + + rdma_req->req = req; + rdma_req->cb = nvme_rdma_req_complete; + req->cmd.cid = rdma_req->id; + + /* setup the RDMA SGL details */ + nvme_sgl = &req->cmd.dptr.sgl1; + if (req->payload.type == NVME_PAYLOAD_TYPE_CONTIG) { + nvme_sgl->address = (uint64_t)req->payload.u.contig + req->payload_offset; + nvme_sgl->keyed.length = req->payload_size; + } else { + nvme_rdma_req_put(rdma_req); + /* Need to handle other case later */ + return NULL; + } + + rdma_req->req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_SGL; + nvme_sgl->keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK; + nvme_sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS; + + if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) { + struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd; + rdma_req->xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype); + } else { + rdma_req->xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc); + + } + + memcpy(&rdma_req->cmd, &req->cmd, sizeof(req->cmd)); + return rdma_req; +} + +static int +nvme_rdma_qpair_fabric_connect(struct nvme_rdma_qpair *rqpair) +{ + struct nvme_completion_poll_status status; + struct spdk_nvmf_fabric_connect_rsp *rsp; + struct spdk_nvmf_fabric_connect_cmd cmd; + struct spdk_nvmf_fabric_connect_data *nvmf_data; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_rdma_ctrlr *rctrlr; + int rc = 0; + + ctrlr = rqpair->qpair.ctrlr; + if (!ctrlr) { + return -1; + } + + rctrlr = nvme_rdma_ctrlr(ctrlr); + nvmf_data = calloc(1, sizeof(*nvmf_data)); + if (!nvmf_data) { + SPDK_ERRLOG("nvmf_data allocation error\n"); + rc = -1; + return rc; + } + + memset(&cmd, 0, sizeof(cmd)); + memset(&status, 0, sizeof(struct nvme_completion_poll_status)); + + cmd.opcode = SPDK_NVME_OPC_FABRIC; + cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_CONNECT; + cmd.qid = rqpair->qpair.id; + cmd.sqsize = rqpair->qpair.num_entries - 1; + + if (nvme_qpair_is_admin_queue(&rqpair->qpair)) { + nvmf_data->cntlid = 0xFFFF; + } else { + nvmf_data->cntlid = rctrlr->cntlid; + } + + strncpy((char *)&nvmf_data->hostid, (char *)NVME_HOST_ID_DEFAULT, + strlen((char *)NVME_HOST_ID_DEFAULT)); + strncpy((char *)&nvmf_data->hostnqn, (char *)NVME_HOST_NQN, strlen((char *)NVME_HOST_NQN)); + strncpy((char *)&nvmf_data->subnqn, rctrlr->info.nqn, strlen(rctrlr->info.nqn)); + + rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, + (struct spdk_nvme_cmd *)&cmd, + nvmf_data, sizeof(*nvmf_data), + nvme_completion_poll_cb, &status); + + if (rc < 0) { + SPDK_ERRLOG("spdk_nvme_rdma_req_fabric_connect failed\n"); + rc = -1; + goto ret; + } + + while (status.done == false) { + spdk_nvme_qpair_process_completions(&rqpair->qpair, 0); + } + + if (spdk_nvme_cpl_is_error(&status.cpl)) { + SPDK_ERRLOG("nvme_rdma_fabric_prop_get_cmd failed\n"); + return -1; + } + + rsp = (struct spdk_nvmf_fabric_connect_rsp *)&status.cpl; + rctrlr->cntlid = rsp->status_code_specific.success.cntlid; +ret: + free(nvmf_data); + return rc; +} + +static int +nvme_rdma_fabric_prop_set_cmd(struct spdk_nvme_ctrlr *ctrlr, + uint32_t offset, uint8_t size, uint64_t value) +{ + struct spdk_nvmf_fabric_prop_set_cmd cmd = {}; + struct nvme_completion_poll_status status = {}; + int rc; + + cmd.opcode = SPDK_NVME_OPC_FABRIC; + cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET; + cmd.ofst = offset; + cmd.attrib.size = size; + cmd.value.u64 = value; + + rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, (struct spdk_nvme_cmd *)&cmd, + NULL, 0, + nvme_completion_poll_cb, &status); + + if (rc < 0) { + SPDK_ERRLOG("failed to send nvmf_fabric_prop_set_cmd\n"); + return -1; + } + + while (status.done == false) { + spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); + } + + if (spdk_nvme_cpl_is_error(&status.cpl)) { + SPDK_ERRLOG("nvme_rdma_fabric_prop_get_cmd failed\n"); + return -1; + } + + return 0; +} + +static int +nvme_rdma_fabric_prop_get_cmd(struct spdk_nvme_ctrlr *ctrlr, + uint32_t offset, uint8_t size, uint64_t *value) +{ + struct spdk_nvmf_fabric_prop_set_cmd cmd = {}; + struct nvme_completion_poll_status status = {}; + struct spdk_nvmf_fabric_prop_get_rsp *response; + int rc; + + cmd.opcode = SPDK_NVME_OPC_FABRIC; + cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET; + cmd.ofst = offset; + cmd.attrib.size = size; + + rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, (struct spdk_nvme_cmd *)&cmd, + NULL, sizeof(uint32_t), + nvme_completion_poll_cb, &status); + + if (rc < 0) { + SPDK_ERRLOG("failed to send nvme_rdma_fabric_prop_get_cmd\n"); + return -1; + } + + while (status.done == false) { + spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); + } + + if (spdk_nvme_cpl_is_error(&status.cpl)) { + SPDK_ERRLOG("nvme_rdma_fabric_prop_get_cmd failed\n"); + return -1; + } + + response = (struct spdk_nvmf_fabric_prop_get_rsp *)&status.cpl; + + if (!size) { + *value = response->value.u32.low; + } else { + *value = response->value.u64; + } + + return 0; +} + + +static int +_nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, + struct spdk_nvme_qpair *qpair) +{ + int rc; + struct nvme_rdma_qpair *rqpair; + + rqpair = nvme_rdma_qpair(qpair); + rc = nvme_rdma_qpair_connect(rqpair); + if (rc < 0) { + SPDK_ERRLOG("Failed to connect through rdma qpair\n"); + goto err; + } + + rc = nvme_rdma_qpair_fabric_connect(rqpair); + if (rc < 0) { + SPDK_ERRLOG("Failed to send/receive the qpair fabric request\n"); + goto err; + } + + return 0; + +err: + nvme_transport_qpair_destroy(&rqpair->qpair); + return rc; + +} + + +static struct spdk_nvme_qpair * +nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, + enum spdk_nvme_qprio qprio) +{ + struct nvme_rdma_qpair *rqpair; + struct spdk_nvme_qpair *qpair; + struct nvme_rdma_ctrlr *rctrlr; + uint32_t num_entries; + int rc; + + rctrlr = nvme_rdma_ctrlr(ctrlr); + + rqpair = calloc(1, sizeof(struct nvme_rdma_qpair)); + if (!rqpair) { + SPDK_ERRLOG("failed to get create rqpair\n"); + return NULL; + } + + qpair = &rqpair->qpair; + + /* At this time, queue is not initialized, so use the passing parameter qid */ + if (!qid) { + num_entries = SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES; + ctrlr->adminq = qpair; + } else { + num_entries = rctrlr->ctrlr.opts.queue_size; + } + + rc = nvme_qpair_construct(qpair, qid, num_entries, ctrlr, qprio); + if (rc != 0) { + return NULL; + } + + rc = _nvme_rdma_ctrlr_create_qpair(ctrlr, qpair); + + if (rc < 0) { + return NULL; + } + + return qpair; +} + +int +nvme_rdma_qpair_destroy(struct spdk_nvme_qpair *qpair) +{ + struct nvme_rdma_qpair *rqpair; + + rqpair = nvme_rdma_qpair(qpair); + if (!rqpair) { + return -1; + } + + nvme_rdma_free_reqs(rqpair); + nvme_rdma_free_rsps(rqpair); + + if (rqpair->cm_id) { + if (rqpair->cm_id->qp) { + rdma_destroy_qp(rqpair->cm_id); + } + rdma_destroy_id(rqpair->cm_id); + } + + if (rqpair->cm_channel) { + rdma_destroy_event_channel(rqpair->cm_channel); + } + + free(rqpair); + + return 0; +} + +static int +nvme_rdma_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr) +{ + struct spdk_nvme_qpair *qpair; + union spdk_nvme_cc_register cc = {}; + int rc; + + qpair = nvme_rdma_ctrlr_create_qpair(ctrlr, 0, 0); + if (!qpair) { + SPDK_ERRLOG("failed to create admin qpair\n"); + rc = -1; + goto error; + } + + /* Must enable CC.EN, otherwise we can not send nvme commands to the disabled controller */ + cc.raw = 0; + cc.bits.en = 1; + rc = nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), + cc.raw); + if (rc < 0) { + SPDK_ERRLOG("Failed to set cc\n"); + rc = -1; + goto error; + } + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "successfully create admin qpair\n"); + return 0; + +error: + nvme_rdma_qpair_destroy(qpair); + return rc; +} + +struct spdk_nvme_qpair * +nvme_rdma_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, + enum spdk_nvme_qprio qprio) +{ + return nvme_rdma_ctrlr_create_qpair(ctrlr, qid, qprio); +} + +int +nvme_rdma_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) +{ + /* do nothing here */ + return 0; +} + +static int +nvme_fabrics_get_log_discovery_page(struct spdk_nvme_ctrlr *ctrlr, + char *log_page) +{ + struct spdk_nvme_cmd cmd = {}; + struct nvme_completion_poll_status status = {}; + int rc; + + cmd.opc = SPDK_NVME_OPC_GET_LOG_PAGE; + cmd.cdw10 = SPDK_NVME_LOG_DISCOVERY; + rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, (struct spdk_nvme_cmd *)&cmd, + (void *)log_page, 4096, + nvme_completion_poll_cb, &status); + + if (rc < 0) { + return -1; + } + + while (status.done == false) { + spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); + } + + if (spdk_nvme_cpl_is_error(&status.cpl)) { + return -1; + } + + return 0; +} + +/* This function must only be called while holding g_spdk_nvme_driver->lock */ +int +nvme_rdma_ctrlr_scan(enum spdk_nvme_transport_type transport, + struct nvme_enum_ctx *enum_ctx, void *devhandle) +{ + struct spdk_nvme_discover_info *info = (struct spdk_nvme_discover_info *)devhandle; + struct spdk_nvme_probe_info probe_info; + struct spdk_nvme_ctrlr *discovery_ctrlr; + struct spdk_nvmf_discovery_log_page *log_page; + struct spdk_nvme_discover_info discover_info; + char buffer[4096]; + int rc; + uint32_t i; + + memset(buffer, 0x0, 4096); + discovery_ctrlr = nvme_attach(info->type, (void *)info); + if (discovery_ctrlr == NULL) { + return -1; + } + + TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->attached_ctrlrs, discovery_ctrlr, tailq); + rc = nvme_fabrics_get_log_discovery_page(discovery_ctrlr, buffer); + if (rc < 0) { + SPDK_ERRLOG("nvme_fabrics_get_log_discovery_page error\n"); + nvme_ctrlr_destruct(discovery_ctrlr); + } + + log_page = (struct spdk_nvmf_discovery_log_page *)buffer; + discover_info.type = info->type; + for (i = 0; i < log_page->numrec; i++) { + discover_info.nqn = probe_info.nqn = (const char *)log_page->entries[i].subnqn; + discover_info.traddr = probe_info.traddr = (const char *)log_page->entries[i].traddr; + discover_info.trsvcid = probe_info.trsvcid = (const char *)log_page->entries[i].trsvcid; + SPDK_NOTICELOG("nqn=%s, traddr=%s, trsvcid=%s\n", discover_info.nqn, + discover_info.traddr, discover_info.trsvcid); + enum_ctx->enum_cb(info->type, &enum_ctx->usr_ctx, &probe_info, (void *)&discover_info); + } + + nvme_ctrlr_destruct(discovery_ctrlr); + return 0; +} + +struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(enum spdk_nvme_transport_type transport, + void *devhandle) +{ + struct nvme_rdma_ctrlr *rctrlr; + struct spdk_nvme_discover_info *info; + int rc; + + if (!devhandle) { + SPDK_ERRLOG("devhandle is NULL\n"); + return NULL; + } + + info = (struct spdk_nvme_discover_info *)devhandle; + rctrlr = calloc(1, sizeof(struct nvme_rdma_ctrlr)); + if (rctrlr == NULL) { + SPDK_ERRLOG("could not allocate ctrlr\n"); + return NULL; + } + + rctrlr->ctrlr.transport = SPDK_NVME_TRANSPORT_RDMA; + rctrlr->info = *info; + + rc = nvme_ctrlr_construct(&rctrlr->ctrlr); + if (rc != 0) { + nvme_ctrlr_destruct(&rctrlr->ctrlr); + return NULL; + } + + rc = nvme_rdma_ctrlr_construct_admin_qpair(&rctrlr->ctrlr); + if (rc != 0) { + SPDK_ERRLOG("create admin qpair failed\n"); + return NULL; + } + + SPDK_TRACELOG(SPDK_TRACE_DEBUG, "succesully initialized the nvmf ctrlr\n"); + return &rctrlr->ctrlr; +} + +int +nvme_rdma_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) +{ + struct nvme_rdma_ctrlr *rctrlr = nvme_rdma_ctrlr(ctrlr); + + free(rctrlr); + + return 0; +} + +int +nvme_rdma_ctrlr_get_pci_id(struct spdk_nvme_ctrlr *ctrlr, struct spdk_pci_id *pci_id) +{ + assert(ctrlr != NULL); + assert(pci_id != NULL); + + *pci_id = ctrlr->probe_info.pci_id; + + return 0; +} + +int +nvme_rdma_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value) +{ + return nvme_rdma_fabric_prop_set_cmd(ctrlr, offset, SPDK_NVMF_PROP_SIZE_4, value); +} + +int +nvme_rdma_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value) +{ + return nvme_rdma_fabric_prop_set_cmd(ctrlr, offset, SPDK_NVMF_PROP_SIZE_8, value); +} + +int +nvme_rdma_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value) +{ + uint64_t tmp_value; + int rc; + rc = nvme_rdma_fabric_prop_get_cmd(ctrlr, offset, SPDK_NVMF_PROP_SIZE_4, &tmp_value); + + if (!rc) { + *value = (uint32_t)tmp_value; + } + return rc; +} + +int +nvme_rdma_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value) +{ + return nvme_rdma_fabric_prop_get_cmd(ctrlr, offset, SPDK_NVMF_PROP_SIZE_8, value); +} + +int +nvme_rdma_qpair_submit_request(struct spdk_nvme_qpair *qpair, + struct nvme_request *req) +{ + struct nvme_rdma_qpair *rqpair; + struct spdk_nvme_rdma_req *rdma_req; + struct ibv_send_wr wr, *bad_wr = NULL; + int rc; + + rqpair = nvme_rdma_qpair(qpair); + rdma_req = nvme_rdma_req_init(rqpair, req); + if (!rdma_req) { + SPDK_ERRLOG("spdk_nvme_rdma_req memory allocation failed duing read\n"); + return -1; + } + + if (nvme_rdma_pre_copy_mem(rdma_req) < 0) { + return -1; + } + + nvme_rdma_ibv_send_wr_init(&wr, NULL, &rdma_req->send_sgl, (uint64_t)rdma_req, + IBV_WR_SEND, IBV_SEND_SIGNALED); + + rc = ibv_post_send(rqpair->cm_id->qp, &wr, &bad_wr); + if (rc) { + SPDK_ERRLOG("Failure posting rdma send for NVMf completion, rc = 0x%x\n", rc); + } + + return rc; +} + +int +nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) +{ + + return nvme_transport_qpair_destroy(qpair); +} + +int +nvme_rdma_ctrlr_reinit_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) +{ + return _nvme_rdma_ctrlr_create_qpair(ctrlr, qpair); +} + +int +nvme_rdma_qpair_construct(struct spdk_nvme_qpair *qpair) +{ + int32_t i; + struct nvme_rdma_qpair *rqpair; + + rqpair = nvme_rdma_qpair(qpair); + rqpair->rdma_reqs = calloc(qpair->num_entries, sizeof(struct spdk_nvme_rdma_req)); + if (rqpair->rdma_reqs == NULL) { + return -1; + } + + rqpair->outstanding_reqs = 0; + STAILQ_INIT(&rqpair->free_reqs); + + SPDK_NOTICELOG("qpair num entries = %d\n", qpair->num_entries); + for (i = 0; i < qpair->num_entries; i++) { + STAILQ_INSERT_TAIL(&rqpair->free_reqs, &rqpair->rdma_reqs[i], link); + rqpair->rdma_reqs[i].rqpair = rqpair; + rqpair->rdma_reqs[i].id = i; + } + + return 0; +} + +int +nvme_rdma_qpair_enable(struct spdk_nvme_qpair *qpair) +{ + /* Currently, doing nothing here */ + return 0; +} + +int +nvme_rdma_qpair_disable(struct spdk_nvme_qpair *qpair) +{ + /* Currently, doing nothing here */ + return 0; +} + +int +nvme_rdma_qpair_reset(struct spdk_nvme_qpair *qpair) +{ + /* Currently, doing nothing here */ + return 0; +} + +int +nvme_rdma_qpair_fail(struct spdk_nvme_qpair *qpair) +{ + /* Currently, doing nothing here */ + return 0; +} + +int +nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair, + uint32_t max_completions) +{ + struct nvme_rdma_qpair *rqpair; + struct ibv_wc wc; + uint32_t size; + int rc; + uint32_t io_completed = 0; + + rqpair = nvme_rdma_qpair(qpair); + size = qpair->num_entries - 1U; + if (!max_completions || max_completions > size) { + max_completions = size; + } + + /* poll the send_cq */ + while (true) { + rc = ibv_poll_cq(rqpair->cm_id->send_cq, 1, &wc); + if (rc == 0) { + break; + } + + if (rc < 0) { + SPDK_ERRLOG("Poll CQ error!(%d): %s\n", + errno, strerror(errno)); + return -1; + } + + if (wc.status) { + SPDK_NOTICELOG("CQ completion error status %d, exiting handler\n", + wc.status); + break; + } + + if (wc.opcode == IBV_WC_SEND) { + SPDK_NOTICELOG("CQ send completion\n"); + } else { + SPDK_ERRLOG("Poll cq opcode type unknown!!!!! completion\n"); + return -1; + } + } + + /* poll the recv_cq */ + while (true) { + rc = ibv_poll_cq(rqpair->cm_id->recv_cq, 1, &wc); + if (rc == 0) { + break; + } + + if (rc < 0) { + SPDK_ERRLOG("Poll CQ error!(%d): %s\n", + errno, strerror(errno)); + return -1; + } + + if (wc.status) { + SPDK_NOTICELOG("CQ completion error status %d, exiting handler\n", + wc.status); + break; + } + + if (wc.opcode == IBV_WC_RECV) { + SPDK_NOTICELOG("CQ recv completion\n"); + rc = nvme_rdma_recv(rqpair, &wc); + if (rc) { + SPDK_ERRLOG("nvme_rdma_recv processing failure\n"); + + return -1; + } + io_completed++; + } else { + SPDK_ERRLOG("Poll cq opcode type unknown!!!!! completion\n"); + return -1; + } + + if (io_completed == max_completions) { + break; + } + } + + return io_completed; +} + +uint32_t +nvme_rdma_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) +{ + /* Todo, which should get from the NVMF target */ + return 506 * 4096; +} diff --git a/lib/nvme/nvme_transport.c b/lib/nvme/nvme_transport.c index 3f84bce5b9..1ebceb5a60 100644 --- a/lib/nvme/nvme_transport.c +++ b/lib/nvme/nvme_transport.c @@ -39,7 +39,7 @@ #ifdef DEBUG static __attribute__((noreturn)) void -nvme_transport_unknown(enum spdk_nvme_transport transport) +nvme_transport_unknown(enum spdk_nvme_transport_type transport) { SPDK_ERRLOG("Unknown transport %d\n", (int)transport); abort(); @@ -50,23 +50,35 @@ nvme_transport_unknown(enum spdk_nvme_transport transport) #endif #define TRANSPORT_PCIE(func_name, args) case SPDK_NVME_TRANSPORT_PCIE: return nvme_pcie_ ## func_name args; - +#ifdef SPDK_CONFIG_RDMA +#define TRANSPORT_FABRICS_RDMA(func_name, args) case SPDK_NVME_TRANSPORT_RDMA: return nvme_rdma_ ## func_name args; +#else +#define TRANSPORT_FABRICS_RDMA(func_name, args) +#endif #define NVME_TRANSPORT_CALL(transport, func_name, args) \ do { \ switch (transport) { \ TRANSPORT_PCIE(func_name, args) \ + TRANSPORT_FABRICS_RDMA(func_name, args) \ TRANSPORT_DEFAULT(transport) \ } \ SPDK_UNREACHABLE(); \ } while (0) -struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(enum spdk_nvme_transport transport, +struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(enum spdk_nvme_transport_type transport, void *devhandle) { NVME_TRANSPORT_CALL(transport, ctrlr_construct, (transport, devhandle)); } +int +nvme_transport_ctrlr_scan(enum spdk_nvme_transport_type transport, + struct nvme_enum_ctx *enum_ctx, void *devhandle) +{ + NVME_TRANSPORT_CALL(transport, ctrlr_scan, (transport, enum_ctx, devhandle)); +} + int nvme_transport_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) { diff --git a/lib/nvmf/session.c b/lib/nvmf/session.c index 4b68164e36..dafb02cd91 100644 --- a/lib/nvmf/session.c +++ b/lib/nvmf/session.c @@ -395,8 +395,10 @@ nvmf_prop_set_cc(struct spdk_nvmf_session *session, uint64_t value) session->vcprop.cc.bits.en = 1; session->vcprop.csts.bits.rdy = 1; } else { - SPDK_ERRLOG("CC.EN transition from 1 to 0 (reset) not implemented!\n"); - /* TODO: reset */ + /* TODO: reset is not really supported here */ + session->vcprop.cc.bits.en = 0; + session->vcprop.csts.bits.rdy = 0; + } diff.bits.en = 0; } diff --git a/test/lib/bdev/bdevio/Makefile b/test/lib/bdev/bdevio/Makefile index 23eb93129e..9501945f98 100644 --- a/test/lib/bdev/bdevio/Makefile +++ b/test/lib/bdev/bdevio/Makefile @@ -55,6 +55,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_bdev.a \ LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \ $(COPY_MODULES_LINKER_ARGS) +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) -lcunit all : $(APP) diff --git a/test/lib/bdev/bdevperf/Makefile b/test/lib/bdev/bdevperf/Makefile index 229608874c..efecb75f57 100644 --- a/test/lib/bdev/bdevperf/Makefile +++ b/test/lib/bdev/bdevperf/Makefile @@ -55,6 +55,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_bdev.a \ LIBS += $(BLOCKDEV_MODULES_LINKER_ARGS) \ $(COPY_MODULES_LINKER_ARGS) +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/test/lib/nvme/aer/Makefile b/test/lib/nvme/aer/Makefile index 850ddc874e..2838d5aa00 100644 --- a/test/lib/nvme/aer/Makefile +++ b/test/lib/nvme/aer/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/test/lib/nvme/e2edp/Makefile b/test/lib/nvme/e2edp/Makefile index aa18e2b037..9bc26d06d6 100644 --- a/test/lib/nvme/e2edp/Makefile +++ b/test/lib/nvme/e2edp/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/test/lib/nvme/overhead/Makefile b/test/lib/nvme/overhead/Makefile index 3e3627d2dd..1ffb03e228 100644 --- a/test/lib/nvme/overhead/Makefile +++ b/test/lib/nvme/overhead/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) ifeq ($(OS),Linux) diff --git a/test/lib/nvme/reset/Makefile b/test/lib/nvme/reset/Makefile index 8fb681a9ba..77ae5baf90 100644 --- a/test/lib/nvme/reset/Makefile +++ b/test/lib/nvme/reset/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/test/lib/nvme/sgl/Makefile b/test/lib/nvme/sgl/Makefile index 8f176b235e..2372e7bf98 100644 --- a/test/lib/nvme/sgl/Makefile +++ b/test/lib/nvme/sgl/Makefile @@ -44,6 +44,10 @@ SPDK_LIBS += $(SPDK_ROOT_DIR)/build/lib/libspdk_nvme.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_util.a \ $(SPDK_ROOT_DIR)/build/lib/libspdk_log.a \ +ifeq ($(CONFIG_RDMA), y) +LIBS += -libverbs -lrdmacm +endif + LIBS += $(SPDK_LIBS) $(ENV_LINKER_ARGS) all : $(APP) diff --git a/test/lib/nvme/unit/nvme_c/nvme_ut.c b/test/lib/nvme/unit/nvme_c/nvme_ut.c index fa6ab8996a..3878d1214c 100644 --- a/test/lib/nvme/unit/nvme_c/nvme_ut.c +++ b/test/lib/nvme/unit/nvme_c/nvme_ut.c @@ -57,12 +57,19 @@ spdk_pci_device_get_id(struct spdk_pci_device *pci_dev) return pci_id; } -struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(enum spdk_nvme_transport transport, +struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(enum spdk_nvme_transport_type transport, void *devhandle) { return NULL; } +int +nvme_transport_ctrlr_scan(enum spdk_nvme_transport_type transport, + struct nvme_enum_ctx *enum_ctx, void *devhandle) +{ + return 0; +} + void nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) { diff --git a/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c b/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c index 763dd5b5f6..152ae018b7 100644 --- a/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c +++ b/test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut.c @@ -57,7 +57,7 @@ struct spdk_nvme_registers g_ut_nvme_regs = {}; __thread int nvme_thread_ioq_index = -1; -struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(enum spdk_nvme_transport transport, +struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(enum spdk_nvme_transport_type transport, void *devhandle) { return NULL; diff --git a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c index a9ff1966f3..aad487dc60 100644 --- a/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c +++ b/test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c @@ -56,7 +56,7 @@ static int nvme_request_next_sge(void *cb_arg, void **address, uint32_t *length) return 0; } -struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(enum spdk_nvme_transport transport, +struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(enum spdk_nvme_transport_type transport, void *devhandle) { return NULL; @@ -154,6 +154,13 @@ nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr) return 0; } +int +nvme_transport_ctrlr_scan(enum spdk_nvme_transport_type transport, + struct nvme_enum_ctx *enum_ctx, void *devhandle) +{ + return 0; +} + static void prepare_for_test(struct spdk_nvme_ns *ns, struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, diff --git a/test/nvmf/host/identify.sh b/test/nvmf/host/identify.sh new file mode 100755 index 0000000000..c1f745929c --- /dev/null +++ b/test/nvmf/host/identify.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$(readlink -f $testdir/../../..) +source $rootdir/scripts/autotest_common.sh +source $rootdir/test/nvmf/common.sh + +MALLOC_BDEV_SIZE=64 +MALLOC_BLOCK_SIZE=512 + +rpc_py="python $rootdir/scripts/rpc.py" + +set -e + +if ! rdma_nic_available; then + echo "no NIC for nvmf test" + exit 0 +fi + +timing_enter host + +# Start up the NVMf target in another process +$rootdir/app/nvmf_tgt/nvmf_tgt -c $testdir/../nvmf.conf -m 0x2 -p 1 -s 512 -t nvmf & +nvmfpid=$! + +trap "killprocess $nvmfpid; exit 1" SIGINT SIGTERM EXIT + +waitforlisten $nvmfpid ${RPC_PORT} + +bdevs="$bdevs $($rpc_py construct_malloc_bdev $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE)" + +modprobe -v nvme-rdma + +if [ -e "/dev/nvme-fabrics" ]; then + chmod a+rw /dev/nvme-fabrics +fi + +$rpc_py construct_nvmf_subsystem Virtual nqn.2016-06.io.spdk:cnode1 'transport:RDMA traddr:192.168.100.8 trsvcid:4420' '' -s SPDK00000000000001 -n "$bdevs" + +$rootdir/examples/nvme/identify/identify -a "$NVMF_FIRST_TARGET_IP" -s "$NVMF_PORT" -n nqn.2014-08.org.nvmexpress.discovery -t all +sync +$rpc_py delete_nvmf_subsystem nqn.2016-06.io.spdk:cnode1 + +rm -f ./local-job0-0-verify.state +rm -f ./local-job1-1-verify.state +rm -f ./local-job2-2-verify.state + +trap - SIGINT SIGTERM EXIT + +nvmfcleanup +killprocess $nvmfpid +timing_exit host