bdev_nvme: add bdev_nvme_start_discovery RPC

This patch adds the framework for a discovery
service in the bdev/nvme module.

Users can specify an IP/port of a discovery service.
The bdev/nvme module will connect to a discovery
controller, get the discovery log page, and then
register for AERs.  It will connect to each
subsystem specified in the initial log page.
AER completions will trigger fetching the log
page again, at which point new subsystems will
be connected to, or removed subsystems will be
detached.

This patch does the following:
* Adds the new start_discovery RPC
* Connects to the discovery controller
* Gets the discovery log page
* Registers for AERs
* Detach from discovery controllers at shutdown

Subsequent patches in this series will:
* Connect to subsystems listed in discovery log page
* Detach from subsystems that were listed in earlier
  discovery log pages but subsequently removed
* Add a stop_discovery RPC

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I54bfa896a48c5619676f156b5ea9f2d1f886c72f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10694
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
This commit is contained in:
Jim Harris 2021-12-04 05:49:54 +00:00 committed by Tomasz Zawadzki
parent 9035986e35
commit b68f2eeb0b
7 changed files with 490 additions and 1 deletions

View File

@ -3065,6 +3065,64 @@ Example response:
}
~~~
### bdev_nvme_start_discovery {#rpc_bdev_nvme_start_discovery}
Start a discovery service for the discovery subsystem of the specified transport ID.
The discovery service will read the discovery log page for the specified
discovery subsystem, and automatically attach to any subsystems found in the
log page. When determining a controller name to use when attaching, it will use
the 'name' parameter as a prefix, followed by a unique integer for that discovery
service. If the discovery service identifies a subsystem that has been previously
attached but is listed with a different path, it will use the same controller name
as the previous entry, and connect as a multipath.
When the discovery service sees that a subsystem entry has been removed
from the log page, it will automatically detach from that controller as well.
The 'name' is also used to later stop the discovery service.
#### Parameters
Name | Optional | Type | Description
-------------------------- | -------- | ----------- | -----------
name | Required | string | Prefix for NVMe controllers
trtype | Required | string | NVMe-oF target trtype: rdma or tcp
traddr | Required | string | NVMe-oF target address: ip
adrfam | Optional | string | NVMe-oF target adrfam: ipv4, ipv6
trsvcid | Optional | string | NVMe-oF target trsvcid: port number
hostnqn | Optional | string | NVMe-oF target hostnqn
#### Example
Example request:
~~~json
{
"jsonrpc": "2.0",
"method": "bdev_nvme_start_discovery",
"id": 1,
"params": {
"name": "nvme_auto",
"trtype": "tcp",
"traddr": "127.0.0.1",
"hostnqn": "nqn.2021-12.io.spdk:host1",
"adrfam": "ipv4",
"trsvcid": "4420"
}
}
~~~
Example response:
~~~json
{
"jsonrpc": "2.0",
"id": 1,
"result": true
}
~~~
### bdev_nvme_cuse_register {#rpc_bdev_nvme_cuse_register}
Register CUSE device on NVMe controller.

View File

@ -3872,6 +3872,212 @@ bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id)
return rc;
}
struct discovery_ctx {
spdk_bdev_nvme_start_discovery_fn start_cb_fn;
spdk_bdev_nvme_stop_discovery_fn stop_cb_fn;
void *cb_ctx;
struct spdk_nvme_probe_ctx *probe_ctx;
struct spdk_nvme_detach_ctx *detach_ctx;
struct spdk_nvme_ctrlr *ctrlr;
struct spdk_poller *poller;
struct spdk_nvme_ctrlr_opts opts;
TAILQ_ENTRY(discovery_ctx) tailq;
int rc;
/* Denotes if a discovery is currently in progress for this context.
* That includes connecting to newly discovered subsystems. Used to
* ensure we do not start a new discovery until an existing one is
* complete.
*/
bool in_progress;
/* Denotes if another discovery is needed after the one in progress
* completes. Set when we receive an AER completion while a discovery
* is already in progress.
*/
bool pending;
/* Signal to the discovery context poller that it should detach from
* the discovery controller.
*/
bool detach;
struct spdk_thread *calling_thread;
};
TAILQ_HEAD(discovery_ctxs, discovery_ctx);
static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
static void get_discovery_log_page(struct discovery_ctx *ctx);
static void
discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
struct spdk_nvmf_discovery_log_page *log_page)
{
struct discovery_ctx *ctx = cb_arg;
if (rc || spdk_nvme_cpl_is_error(cpl)) {
SPDK_ERRLOG("could not get discovery log page\n");
return;
}
free(log_page);
ctx->in_progress = false;
if (ctx->pending) {
ctx->pending = false;
get_discovery_log_page(ctx);
}
}
static void
get_discovery_log_page(struct discovery_ctx *ctx)
{
int rc;
assert(ctx->in_progress == false);
ctx->in_progress = true;
rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
if (rc != 0) {
SPDK_ERRLOG("could not get discovery log page\n");
}
}
static void
discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
{
struct discovery_ctx *ctx = arg;
uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
if (spdk_nvme_cpl_is_error(cpl)) {
SPDK_ERRLOG("aer failed\n");
return;
}
if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
SPDK_ERRLOG("unexpected log page 0x%x\n", log_page_id);
return;
}
SPDK_DEBUGLOG(bdev_nvme, "got aer\n");
if (ctx->in_progress) {
ctx->pending = true;
return;
}
get_discovery_log_page(ctx);
}
static void
start_discovery_done(void *cb_ctx)
{
struct discovery_ctx *ctx = cb_ctx;
SPDK_DEBUGLOG(bdev_nvme, "start discovery done\n");
ctx->start_cb_fn(ctx->cb_ctx, ctx->rc);
if (ctx->rc != 0) {
SPDK_ERRLOG("could not connect to discovery ctrlr\n");
TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
free(ctx);
}
}
static int
discovery_poller(void *arg)
{
struct discovery_ctx *ctx = arg;
int rc;
if (ctx->probe_ctx) {
rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
if (rc != -EAGAIN) {
ctx->rc = rc;
spdk_thread_send_msg(ctx->calling_thread, start_discovery_done, ctx);
if (rc == 0) {
get_discovery_log_page(ctx);
}
}
} else if (ctx->detach) {
bool detach_done = false;
if (ctx->detach_ctx == NULL) {
rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
if (rc != 0) {
SPDK_ERRLOG("could not detach discovery ctrlr\n");
detach_done = true;
}
} else {
rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
if (rc != -EAGAIN) {
detach_done = true;
}
}
if (detach_done) {
spdk_poller_unregister(&ctx->poller);
TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
ctx->stop_cb_fn(ctx->cb_ctx);
free(ctx);
}
} else {
spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
}
return SPDK_POLLER_BUSY;
}
static void
discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
{
struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
struct discovery_ctx *ctx;
ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, opts);
SPDK_DEBUGLOG(bdev_nvme, "discovery ctrlr attached\n");
ctx->probe_ctx = NULL;
ctx->ctrlr = ctrlr;
spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
}
static void
start_discovery_poller(void *arg)
{
struct discovery_ctx *ctx = arg;
TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
}
int
bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
const char *base_name,
struct spdk_nvme_ctrlr_opts *opts,
spdk_bdev_nvme_start_discovery_fn cb_fn,
void *cb_ctx)
{
struct discovery_ctx *ctx;
ctx = calloc(1, sizeof(*ctx));
if (ctx == NULL) {
return -ENOMEM;
}
ctx->start_cb_fn = cb_fn;
ctx->cb_ctx = cb_ctx;
memcpy(&ctx->opts, opts, sizeof(*opts));
ctx->calling_thread = spdk_get_thread();
snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->opts, discovery_attach_cb);
if (ctx->probe_ctx == NULL) {
SPDK_ERRLOG("could not start discovery connect\n");
free(ctx);
return -EIO;
}
spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
return 0;
}
static int
bdev_nvme_library_init(void)
{
@ -3920,10 +4126,19 @@ bdev_nvme_fini_destruct_ctrlrs(void)
pthread_mutex_unlock(&g_bdev_nvme_mutex);
}
static void
check_discovery_fini(void *arg)
{
if (TAILQ_EMPTY(&g_discovery_ctxs)) {
bdev_nvme_fini_destruct_ctrlrs();
}
}
static void
bdev_nvme_library_fini(void)
{
struct nvme_probe_skip_entry *entry, *entry_tmp;
struct discovery_ctx *ctx;
spdk_poller_unregister(&g_hotplug_poller);
free(g_hotplug_probe_ctx);
@ -3934,7 +4149,15 @@ bdev_nvme_library_fini(void)
free(entry);
}
bdev_nvme_fini_destruct_ctrlrs();
assert(spdk_get_thread() == g_bdev_nvme_init_thread);
if (TAILQ_EMPTY(&g_discovery_ctxs)) {
bdev_nvme_fini_destruct_ctrlrs();
} else {
TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
ctx->detach = true;
ctx->stop_cb_fn = check_discovery_fini;
}
}
}
static void

View File

@ -48,6 +48,8 @@ extern bool g_bdev_nvme_module_finish;
#define NVME_MAX_CONTROLLERS 1024
typedef void (*spdk_bdev_create_nvme_fn)(void *ctx, size_t bdev_count, int rc);
typedef void (*spdk_bdev_nvme_start_discovery_fn)(void *ctx, int rc);
typedef void (*spdk_bdev_nvme_stop_discovery_fn)(void *ctx);
struct nvme_async_probe_ctx {
struct spdk_nvme_probe_ctx *probe_ctx;
@ -254,6 +256,11 @@ int bdev_nvme_create(struct spdk_nvme_transport_id *trid,
void *cb_ctx,
struct spdk_nvme_ctrlr_opts *opts,
bool multipath);
int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name,
struct spdk_nvme_ctrlr_opts *opts,
spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx);
struct spdk_nvme_ctrlr *bdev_nvme_get_ctrlr(struct spdk_bdev *bdev);
/**

View File

@ -1548,3 +1548,151 @@ rpc_bdev_nvme_get_controller_health_info(struct spdk_jsonrpc_request *request,
}
SPDK_RPC_REGISTER("bdev_nvme_get_controller_health_info",
rpc_bdev_nvme_get_controller_health_info, SPDK_RPC_RUNTIME)
struct rpc_bdev_nvme_start_discovery {
char *name;
char *trtype;
char *adrfam;
char *traddr;
char *trsvcid;
char *hostnqn;
struct spdk_nvme_ctrlr_opts opts;
};
static void
free_rpc_bdev_nvme_start_discovery(struct rpc_bdev_nvme_start_discovery *req)
{
free(req->name);
free(req->trtype);
free(req->adrfam);
free(req->traddr);
free(req->trsvcid);
free(req->hostnqn);
}
static const struct spdk_json_object_decoder rpc_bdev_nvme_start_discovery_decoders[] = {
{"name", offsetof(struct rpc_bdev_nvme_start_discovery, name), spdk_json_decode_string},
{"trtype", offsetof(struct rpc_bdev_nvme_start_discovery, trtype), spdk_json_decode_string},
{"traddr", offsetof(struct rpc_bdev_nvme_start_discovery, traddr), spdk_json_decode_string},
{"adrfam", offsetof(struct rpc_bdev_nvme_start_discovery, adrfam), spdk_json_decode_string, true},
{"trsvcid", offsetof(struct rpc_bdev_nvme_start_discovery, trsvcid), spdk_json_decode_string, true},
{"hostnqn", offsetof(struct rpc_bdev_nvme_start_discovery, hostnqn), spdk_json_decode_string, true},
};
struct rpc_bdev_nvme_start_discovery_ctx {
struct rpc_bdev_nvme_start_discovery req;
struct spdk_jsonrpc_request *request;
};
static void
rpc_bdev_nvme_start_discovery_done(void *cb_ctx, int rc)
{
struct rpc_bdev_nvme_start_discovery_ctx *ctx = cb_ctx;
struct spdk_jsonrpc_request *request = ctx->request;
if (rc < 0) {
spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS, "Invalid parameters");
free_rpc_bdev_nvme_start_discovery(&ctx->req);
free(ctx);
return;
}
spdk_jsonrpc_send_bool_response(ctx->request, rc == 0);
free_rpc_bdev_nvme_start_discovery(&ctx->req);
free(ctx);
}
static void
rpc_bdev_nvme_start_discovery(struct spdk_jsonrpc_request *request,
const struct spdk_json_val *params)
{
struct rpc_bdev_nvme_start_discovery_ctx *ctx;
struct spdk_nvme_transport_id trid = {};
size_t len, maxlen;
int rc;
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM));
return;
}
spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->req.opts, sizeof(ctx->req.opts));
if (spdk_json_decode_object(params, rpc_bdev_nvme_start_discovery_decoders,
SPDK_COUNTOF(rpc_bdev_nvme_start_discovery_decoders),
&ctx->req)) {
SPDK_ERRLOG("spdk_json_decode_object failed\n");
spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
"spdk_json_decode_object failed");
goto cleanup;
}
/* Parse trstring */
rc = spdk_nvme_transport_id_populate_trstring(&trid, ctx->req.trtype);
if (rc < 0) {
SPDK_ERRLOG("Failed to parse trtype: %s\n", ctx->req.trtype);
spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "Failed to parse trtype: %s",
ctx->req.trtype);
goto cleanup;
}
/* Parse trtype */
rc = spdk_nvme_transport_id_parse_trtype(&trid.trtype, ctx->req.trtype);
assert(rc == 0);
/* Parse traddr */
maxlen = sizeof(trid.traddr);
len = strnlen(ctx->req.traddr, maxlen);
if (len == maxlen) {
spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "traddr too long: %s",
ctx->req.traddr);
goto cleanup;
}
memcpy(trid.traddr, ctx->req.traddr, len + 1);
/* Parse adrfam */
if (ctx->req.adrfam) {
rc = spdk_nvme_transport_id_parse_adrfam(&trid.adrfam, ctx->req.adrfam);
if (rc < 0) {
SPDK_ERRLOG("Failed to parse adrfam: %s\n", ctx->req.adrfam);
spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "Failed to parse adrfam: %s",
ctx->req.adrfam);
goto cleanup;
}
}
/* Parse trsvcid */
if (ctx->req.trsvcid) {
maxlen = sizeof(trid.trsvcid);
len = strnlen(ctx->req.trsvcid, maxlen);
if (len == maxlen) {
spdk_jsonrpc_send_error_response_fmt(request, -EINVAL, "trsvcid too long: %s",
ctx->req.trsvcid);
goto cleanup;
}
memcpy(trid.trsvcid, ctx->req.trsvcid, len + 1);
}
if (ctx->req.hostnqn) {
snprintf(ctx->req.opts.hostnqn, sizeof(ctx->req.opts.hostnqn), "%s",
ctx->req.hostnqn);
}
ctx->request = request;
rc = bdev_nvme_start_discovery(&trid, ctx->req.name, &ctx->req.opts,
rpc_bdev_nvme_start_discovery_done, ctx);
if (rc) {
spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
goto cleanup;
}
return;
cleanup:
free_rpc_bdev_nvme_start_discovery(&ctx->req);
free(ctx);
}
SPDK_RPC_REGISTER("bdev_nvme_start_discovery", rpc_bdev_nvme_start_discovery,
SPDK_RPC_RUNTIME)

View File

@ -619,6 +619,28 @@ if __name__ == "__main__":
p.add_argument('name', help="Name of the NVMe controller")
p.set_defaults(func=bdev_nvme_reset_controller)
def bdev_nvme_start_discovery(args):
rpc.bdev.bdev_nvme_start_discovery(args.client,
name=args.name,
trtype=args.trtype,
traddr=args.traddr,
adrfam=args.adrfam,
trsvcid=args.trsvcid,
hostnqn=args.hostnqn)
p = subparsers.add_parser('bdev_nvme_start_discovery', help='Start automatic discovery')
p.add_argument('-b', '--name', help="Name of the NVMe controller prefix for each bdev name", required=True)
p.add_argument('-t', '--trtype',
help='NVMe-oF target trtype: e.g., rdma, pcie', required=True)
p.add_argument('-a', '--traddr',
help='NVMe-oF target address: e.g., an ip address or BDF', required=True)
p.add_argument('-f', '--adrfam',
help='NVMe-oF target adrfam: e.g., ipv4, ipv6, ib, fc, intra_host')
p.add_argument('-s', '--trsvcid',
help='NVMe-oF target trsvcid: e.g., a port number')
p.add_argument('-q', '--hostnqn', help='NVMe-oF host subnqn')
p.set_defaults(func=bdev_nvme_start_discovery)
def bdev_nvme_cuse_register(args):
rpc.bdev.bdev_nvme_cuse_register(args.client,
name=args.name)

View File

@ -656,6 +656,34 @@ def bdev_nvme_reset_controller(client, name):
return client.call('bdev_nvme_reset_controller', params)
def bdev_nvme_start_discovery(client, name, trtype, traddr, adrfam=None, trsvcid=None,
hostnqn=None):
"""Start discovery with the specified discovery subsystem
Args:
name: bdev name prefix; "n" + namespace ID will be appended to create unique names
trtype: transport type ("PCIe", "RDMA", "FC", "TCP")
traddr: transport address (PCI BDF or IP address)
adrfam: address family ("IPv4", "IPv6", "IB", or "FC")
trsvcid: transport service ID (port number for IP-based addresses)
hostnqn: NQN to connect from (optional)
"""
params = {'name': name,
'trtype': trtype,
'traddr': traddr}
if hostnqn:
params['hostnqn'] = hostnqn
if adrfam:
params['adrfam'] = adrfam
if trsvcid:
params['trsvcid'] = trsvcid
return client.call('bdev_nvme_start_discovery', params)
def bdev_nvme_cuse_register(client, name):
"""Register CUSE devices on NVMe controller.

View File

@ -72,6 +72,9 @@ DEFINE_STUB_V(accel_engine_destroy_cb, (void *io_device, void *ctx_buf));
DEFINE_RETURN_MOCK(spdk_nvme_ctrlr_get_memory_domain, int);
DEFINE_STUB(spdk_nvme_ctrlr_get_discovery_log_page, int,
(struct spdk_nvme_ctrlr *ctrlr, spdk_nvme_discovery_cb cb_fn, void *cb_arg), 0);
int spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr,
struct spdk_memory_domain **domains, int array_size)
{