diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 8a59ce26ad..d1ae9cc248 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -3065,6 +3065,64 @@ Example response: } ~~~ +### bdev_nvme_start_discovery {#rpc_bdev_nvme_start_discovery} + +Start a discovery service for the discovery subsystem of the specified transport ID. + +The discovery service will read the discovery log page for the specified +discovery subsystem, and automatically attach to any subsystems found in the +log page. When determining a controller name to use when attaching, it will use +the 'name' parameter as a prefix, followed by a unique integer for that discovery +service. If the discovery service identifies a subsystem that has been previously +attached but is listed with a different path, it will use the same controller name +as the previous entry, and connect as a multipath. + +When the discovery service sees that a subsystem entry has been removed +from the log page, it will automatically detach from that controller as well. + +The 'name' is also used to later stop the discovery service. + +#### Parameters + +Name | Optional | Type | Description +-------------------------- | -------- | ----------- | ----------- +name | Required | string | Prefix for NVMe controllers +trtype | Required | string | NVMe-oF target trtype: rdma or tcp +traddr | Required | string | NVMe-oF target address: ip +adrfam | Optional | string | NVMe-oF target adrfam: ipv4, ipv6 +trsvcid | Optional | string | NVMe-oF target trsvcid: port number +hostnqn | Optional | string | NVMe-oF target hostnqn + +#### Example + +Example request: + +~~~json +{ + "jsonrpc": "2.0", + "method": "bdev_nvme_start_discovery", + "id": 1, + "params": { + "name": "nvme_auto", + "trtype": "tcp", + "traddr": "127.0.0.1", + "hostnqn": "nqn.2021-12.io.spdk:host1", + "adrfam": "ipv4", + "trsvcid": "4420" + } +} +~~~ + +Example response: + +~~~json +{ + "jsonrpc": "2.0", + "id": 1, + "result": true +} +~~~ + ### bdev_nvme_cuse_register {#rpc_bdev_nvme_cuse_register} Register CUSE device on NVMe controller. diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index a61d25ca41..2533278ca4 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -3872,6 +3872,212 @@ bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id) return rc; } +struct discovery_ctx { + spdk_bdev_nvme_start_discovery_fn start_cb_fn; + spdk_bdev_nvme_stop_discovery_fn stop_cb_fn; + void *cb_ctx; + struct spdk_nvme_probe_ctx *probe_ctx; + struct spdk_nvme_detach_ctx *detach_ctx; + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_poller *poller; + struct spdk_nvme_ctrlr_opts opts; + TAILQ_ENTRY(discovery_ctx) tailq; + int rc; + /* Denotes if a discovery is currently in progress for this context. + * That includes connecting to newly discovered subsystems. Used to + * ensure we do not start a new discovery until an existing one is + * complete. + */ + bool in_progress; + + /* Denotes if another discovery is needed after the one in progress + * completes. Set when we receive an AER completion while a discovery + * is already in progress. + */ + bool pending; + + /* Signal to the discovery context poller that it should detach from + * the discovery controller. + */ + bool detach; + + struct spdk_thread *calling_thread; +}; + +TAILQ_HEAD(discovery_ctxs, discovery_ctx); +static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs); + +static void get_discovery_log_page(struct discovery_ctx *ctx); + +static void +discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl, + struct spdk_nvmf_discovery_log_page *log_page) +{ + struct discovery_ctx *ctx = cb_arg; + + if (rc || spdk_nvme_cpl_is_error(cpl)) { + SPDK_ERRLOG("could not get discovery log page\n"); + return; + } + + free(log_page); + + ctx->in_progress = false; + if (ctx->pending) { + ctx->pending = false; + get_discovery_log_page(ctx); + } +} + +static void +get_discovery_log_page(struct discovery_ctx *ctx) +{ + int rc; + + assert(ctx->in_progress == false); + ctx->in_progress = true; + rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx); + if (rc != 0) { + SPDK_ERRLOG("could not get discovery log page\n"); + } +} + +static void +discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl) +{ + struct discovery_ctx *ctx = arg; + uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16; + + if (spdk_nvme_cpl_is_error(cpl)) { + SPDK_ERRLOG("aer failed\n"); + return; + } + + if (log_page_id != SPDK_NVME_LOG_DISCOVERY) { + SPDK_ERRLOG("unexpected log page 0x%x\n", log_page_id); + return; + } + + SPDK_DEBUGLOG(bdev_nvme, "got aer\n"); + if (ctx->in_progress) { + ctx->pending = true; + return; + } + + get_discovery_log_page(ctx); +} + +static void +start_discovery_done(void *cb_ctx) +{ + struct discovery_ctx *ctx = cb_ctx; + + SPDK_DEBUGLOG(bdev_nvme, "start discovery done\n"); + ctx->start_cb_fn(ctx->cb_ctx, ctx->rc); + if (ctx->rc != 0) { + SPDK_ERRLOG("could not connect to discovery ctrlr\n"); + TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq); + free(ctx); + } +} + +static int +discovery_poller(void *arg) +{ + struct discovery_ctx *ctx = arg; + int rc; + + if (ctx->probe_ctx) { + rc = spdk_nvme_probe_poll_async(ctx->probe_ctx); + if (rc != -EAGAIN) { + ctx->rc = rc; + spdk_thread_send_msg(ctx->calling_thread, start_discovery_done, ctx); + if (rc == 0) { + get_discovery_log_page(ctx); + } + } + } else if (ctx->detach) { + bool detach_done = false; + + if (ctx->detach_ctx == NULL) { + rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx); + if (rc != 0) { + SPDK_ERRLOG("could not detach discovery ctrlr\n"); + detach_done = true; + } + } else { + rc = spdk_nvme_detach_poll_async(ctx->detach_ctx); + if (rc != -EAGAIN) { + detach_done = true; + } + } + if (detach_done) { + spdk_poller_unregister(&ctx->poller); + TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq); + ctx->stop_cb_fn(ctx->cb_ctx); + free(ctx); + } + } else { + spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr); + } + + return SPDK_POLLER_BUSY; +} + +static void +discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) +{ + struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx; + struct discovery_ctx *ctx; + + ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, opts); + + SPDK_DEBUGLOG(bdev_nvme, "discovery ctrlr attached\n"); + ctx->probe_ctx = NULL; + ctx->ctrlr = ctrlr; + spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx); +} + +static void +start_discovery_poller(void *arg) +{ + struct discovery_ctx *ctx = arg; + + TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq); + ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000); +} + +int +bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, + const char *base_name, + struct spdk_nvme_ctrlr_opts *opts, + spdk_bdev_nvme_start_discovery_fn cb_fn, + void *cb_ctx) +{ + struct discovery_ctx *ctx; + + ctx = calloc(1, sizeof(*ctx)); + if (ctx == NULL) { + return -ENOMEM; + } + + ctx->start_cb_fn = cb_fn; + ctx->cb_ctx = cb_ctx; + memcpy(&ctx->opts, opts, sizeof(*opts)); + ctx->calling_thread = spdk_get_thread(); + snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->opts, discovery_attach_cb); + if (ctx->probe_ctx == NULL) { + SPDK_ERRLOG("could not start discovery connect\n"); + free(ctx); + return -EIO; + } + + spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx); + return 0; +} + static int bdev_nvme_library_init(void) { @@ -3920,10 +4126,19 @@ bdev_nvme_fini_destruct_ctrlrs(void) pthread_mutex_unlock(&g_bdev_nvme_mutex); } +static void +check_discovery_fini(void *arg) +{ + if (TAILQ_EMPTY(&g_discovery_ctxs)) { + bdev_nvme_fini_destruct_ctrlrs(); + } +} + static void bdev_nvme_library_fini(void) { struct nvme_probe_skip_entry *entry, *entry_tmp; + struct discovery_ctx *ctx; spdk_poller_unregister(&g_hotplug_poller); free(g_hotplug_probe_ctx); @@ -3934,7 +4149,15 @@ bdev_nvme_library_fini(void) free(entry); } - bdev_nvme_fini_destruct_ctrlrs(); + assert(spdk_get_thread() == g_bdev_nvme_init_thread); + if (TAILQ_EMPTY(&g_discovery_ctxs)) { + bdev_nvme_fini_destruct_ctrlrs(); + } else { + TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) { + ctx->detach = true; + ctx->stop_cb_fn = check_discovery_fini; + } + } } static void diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index 2f377c32b8..8efcef5a42 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -48,6 +48,8 @@ extern bool g_bdev_nvme_module_finish; #define NVME_MAX_CONTROLLERS 1024 typedef void (*spdk_bdev_create_nvme_fn)(void *ctx, size_t bdev_count, int rc); +typedef void (*spdk_bdev_nvme_start_discovery_fn)(void *ctx, int rc); +typedef void (*spdk_bdev_nvme_stop_discovery_fn)(void *ctx); struct nvme_async_probe_ctx { struct spdk_nvme_probe_ctx *probe_ctx; @@ -254,6 +256,11 @@ int bdev_nvme_create(struct spdk_nvme_transport_id *trid, void *cb_ctx, struct spdk_nvme_ctrlr_opts *opts, bool multipath); + +int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name, + struct spdk_nvme_ctrlr_opts *opts, + spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx); + struct spdk_nvme_ctrlr *bdev_nvme_get_ctrlr(struct spdk_bdev *bdev); /** diff --git a/module/bdev/nvme/bdev_nvme_rpc.c b/module/bdev/nvme/bdev_nvme_rpc.c index 08f05ebcaa..33fae0cf18 100644 --- a/module/bdev/nvme/bdev_nvme_rpc.c +++ b/module/bdev/nvme/bdev_nvme_rpc.c @@ -1548,3 +1548,151 @@ rpc_bdev_nvme_get_controller_health_info(struct spdk_jsonrpc_request *request, } SPDK_RPC_REGISTER("bdev_nvme_get_controller_health_info", rpc_bdev_nvme_get_controller_health_info, SPDK_RPC_RUNTIME) + +struct rpc_bdev_nvme_start_discovery { + char *name; + char *trtype; + char *adrfam; + char *traddr; + char *trsvcid; + char *hostnqn; + struct spdk_nvme_ctrlr_opts opts; +}; + +static void +free_rpc_bdev_nvme_start_discovery(struct rpc_bdev_nvme_start_discovery *req) +{ + free(req->name); + free(req->trtype); + free(req->adrfam); + free(req->traddr); + free(req->trsvcid); + free(req->hostnqn); +} + +static const struct spdk_json_object_decoder rpc_bdev_nvme_start_discovery_decoders[] = { + {"name", offsetof(struct rpc_bdev_nvme_start_discovery, name), spdk_json_decode_string}, + {"trtype", offsetof(struct rpc_bdev_nvme_start_discovery, trtype), spdk_json_decode_string}, + {"traddr", offsetof(struct rpc_bdev_nvme_start_discovery, traddr), spdk_json_decode_string}, + {"adrfam", offsetof(struct rpc_bdev_nvme_start_discovery, adrfam), spdk_json_decode_string, true}, + {"trsvcid", offsetof(struct rpc_bdev_nvme_start_discovery, trsvcid), spdk_json_decode_string, true}, + {"hostnqn", offsetof(struct rpc_bdev_nvme_start_discovery, hostnqn), spdk_json_decode_string, true}, +}; + +struct rpc_bdev_nvme_start_discovery_ctx { + struct rpc_bdev_nvme_start_discovery req; + struct spdk_jsonrpc_request *request; +}; + +static void +rpc_bdev_nvme_start_discovery_done(void *cb_ctx, int rc) +{ + struct rpc_bdev_nvme_start_discovery_ctx *ctx = cb_ctx; + struct spdk_jsonrpc_request *request = ctx->request; + + if (rc < 0) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters"); + free_rpc_bdev_nvme_start_discovery(&ctx->req); + free(ctx); + return; + } + + spdk_jsonrpc_send_bool_response(ctx->request, rc == 0); + + free_rpc_bdev_nvme_start_discovery(&ctx->req); + free(ctx); +} + +static void +rpc_bdev_nvme_start_discovery(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_nvme_start_discovery_ctx *ctx; + struct spdk_nvme_transport_id trid = {}; + size_t len, maxlen; + int rc; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM)); + return; + } + + spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->req.opts, sizeof(ctx->req.opts)); + + if (spdk_json_decode_object(params, rpc_bdev_nvme_start_discovery_decoders, + SPDK_COUNTOF(rpc_bdev_nvme_start_discovery_decoders), + &ctx->req)) { + SPDK_ERRLOG("spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + goto cleanup; + } + + /* Parse trstring */ + rc = spdk_nvme_transport_id_populate_trstring(&trid, ctx->req.trtype); + if (rc < 0) { + SPDK_ERRLOG("Failed to parse trtype: %s\n", ctx->req.trtype); + spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "Failed to parse trtype: %s", + ctx->req.trtype); + goto cleanup; + } + + /* Parse trtype */ + rc = spdk_nvme_transport_id_parse_trtype(&trid.trtype, ctx->req.trtype); + assert(rc == 0); + + /* Parse traddr */ + maxlen = sizeof(trid.traddr); + len = strnlen(ctx->req.traddr, maxlen); + if (len == maxlen) { + spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "traddr too long: %s", + ctx->req.traddr); + goto cleanup; + } + memcpy(trid.traddr, ctx->req.traddr, len + 1); + + /* Parse adrfam */ + if (ctx->req.adrfam) { + rc = spdk_nvme_transport_id_parse_adrfam(&trid.adrfam, ctx->req.adrfam); + if (rc < 0) { + SPDK_ERRLOG("Failed to parse adrfam: %s\n", ctx->req.adrfam); + spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "Failed to parse adrfam: %s", + ctx->req.adrfam); + goto cleanup; + } + } + + /* Parse trsvcid */ + if (ctx->req.trsvcid) { + maxlen = sizeof(trid.trsvcid); + len = strnlen(ctx->req.trsvcid, maxlen); + if (len == maxlen) { + spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "trsvcid too long: %s", + ctx->req.trsvcid); + goto cleanup; + } + memcpy(trid.trsvcid, ctx->req.trsvcid, len + 1); + } + + if (ctx->req.hostnqn) { + snprintf(ctx->req.opts.hostnqn, sizeof(ctx->req.opts.hostnqn), "%s", + ctx->req.hostnqn); + } + + ctx->request = request; + rc = bdev_nvme_start_discovery(&trid, ctx->req.name, &ctx->req.opts, + rpc_bdev_nvme_start_discovery_done, ctx); + if (rc) { + spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc)); + goto cleanup; + } + + return; + +cleanup: + free_rpc_bdev_nvme_start_discovery(&ctx->req); + free(ctx); +} +SPDK_RPC_REGISTER("bdev_nvme_start_discovery", rpc_bdev_nvme_start_discovery, + SPDK_RPC_RUNTIME) diff --git a/scripts/rpc.py b/scripts/rpc.py index 40b2839eb8..8b64689077 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -619,6 +619,28 @@ if __name__ == "__main__": p.add_argument('name', help="Name of the NVMe controller") p.set_defaults(func=bdev_nvme_reset_controller) + def bdev_nvme_start_discovery(args): + rpc.bdev.bdev_nvme_start_discovery(args.client, + name=args.name, + trtype=args.trtype, + traddr=args.traddr, + adrfam=args.adrfam, + trsvcid=args.trsvcid, + hostnqn=args.hostnqn) + + p = subparsers.add_parser('bdev_nvme_start_discovery', help='Start automatic discovery') + p.add_argument('-b', '--name', help="Name of the NVMe controller prefix for each bdev name", required=True) + p.add_argument('-t', '--trtype', + help='NVMe-oF target trtype: e.g., rdma, pcie', required=True) + p.add_argument('-a', '--traddr', + help='NVMe-oF target address: e.g., an ip address or BDF', required=True) + p.add_argument('-f', '--adrfam', + help='NVMe-oF target adrfam: e.g., ipv4, ipv6, ib, fc, intra_host') + p.add_argument('-s', '--trsvcid', + help='NVMe-oF target trsvcid: e.g., a port number') + p.add_argument('-q', '--hostnqn', help='NVMe-oF host subnqn') + p.set_defaults(func=bdev_nvme_start_discovery) + def bdev_nvme_cuse_register(args): rpc.bdev.bdev_nvme_cuse_register(args.client, name=args.name) diff --git a/scripts/rpc/bdev.py b/scripts/rpc/bdev.py index 9b13a84e40..f994a8aed3 100644 --- a/scripts/rpc/bdev.py +++ b/scripts/rpc/bdev.py @@ -656,6 +656,34 @@ def bdev_nvme_reset_controller(client, name): return client.call('bdev_nvme_reset_controller', params) +def bdev_nvme_start_discovery(client, name, trtype, traddr, adrfam=None, trsvcid=None, + hostnqn=None): + """Start discovery with the specified discovery subsystem + + Args: + name: bdev name prefix; "n" + namespace ID will be appended to create unique names + trtype: transport type ("PCIe", "RDMA", "FC", "TCP") + traddr: transport address (PCI BDF or IP address) + adrfam: address family ("IPv4", "IPv6", "IB", or "FC") + trsvcid: transport service ID (port number for IP-based addresses) + hostnqn: NQN to connect from (optional) + """ + params = {'name': name, + 'trtype': trtype, + 'traddr': traddr} + + if hostnqn: + params['hostnqn'] = hostnqn + + if adrfam: + params['adrfam'] = adrfam + + if trsvcid: + params['trsvcid'] = trsvcid + + return client.call('bdev_nvme_start_discovery', params) + + def bdev_nvme_cuse_register(client, name): """Register CUSE devices on NVMe controller. diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 4bdcb5217f..8380954ba9 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -72,6 +72,9 @@ DEFINE_STUB_V(accel_engine_destroy_cb, (void *io_device, void *ctx_buf)); DEFINE_RETURN_MOCK(spdk_nvme_ctrlr_get_memory_domain, int); +DEFINE_STUB(spdk_nvme_ctrlr_get_discovery_log_page, int, + (struct spdk_nvme_ctrlr *ctrlr, spdk_nvme_discovery_cb cb_fn, void *cb_arg), 0); + int spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr, struct spdk_memory_domain **domains, int array_size) {