nvmf: Update controller desctruction process

There is a race condition between controller destruction and
subsystem state change, e.g. admin qpair may already be freed
when a namespace is added or removed. As result in function
poll_group_update_subsystem we may get heap-use-after-free error

Another problem is that some qpair's live time may exceed controller's
life time. To avoid it, start controller destruction process when the last
qpair finished the disconnect process (previously controller started
the descruction process before the last qpair starts to disconnect
and it could lead to raise conditions)

Fixes #2055

Change-Id: I11612979eb914a5fdcc7b9e3c812bf1e450b6120
Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8963
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Matt Dumm <matt.dumm@hpe.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Alexey Marchuk 2021-07-28 17:21:24 +03:00 committed by Jim Harris
parent 077d0f3eac
commit 3b1f13ef29
3 changed files with 50 additions and 34 deletions

View File

@ -921,10 +921,12 @@ nvmf_ctrlr_association_remove(void *ctx)
SPDK_DEBUGLOG(nvmf, "Disconnecting host from subsystem %s due to association timeout.\n",
ctrlr->subsys->subnqn);
rc = spdk_nvmf_qpair_disconnect(ctrlr->admin_qpair, NULL, NULL);
if (rc < 0) {
SPDK_ERRLOG("Fail to disconnect admin ctrlr qpair\n");
assert(false);
if (ctrlr->admin_qpair) {
rc = spdk_nvmf_qpair_disconnect(ctrlr->admin_qpair, NULL, NULL);
if (rc < 0) {
SPDK_ERRLOG("Fail to disconnect admin ctrlr qpair\n");
assert(false);
}
}
return SPDK_POLLER_BUSY;
@ -2390,6 +2392,7 @@ spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr,
nvmf_bdev_ctrlr_identify_ns(ns, nsdata, ctrlr->dif_insert_or_strip);
assert(ctrlr->admin_qpair);
/* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */
max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size /
(1U << nsdata->lbaf[nsdata->flbas.format].lbads);
@ -2440,11 +2443,13 @@ int
spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata)
{
struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
struct spdk_nvmf_transport *transport = ctrlr->admin_qpair->transport;
struct spdk_nvmf_transport *transport;
/*
* Common fields for discovery and NVM subsystems
*/
assert(ctrlr->admin_qpair);
transport = ctrlr->admin_qpair->transport;
spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
assert((transport->opts.max_io_size % 4096) == 0);
cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096);

View File

@ -930,26 +930,6 @@ _nvmf_ctrlr_destruct(void *ctx)
nvmf_ctrlr_destruct(ctrlr);
}
static void
_nvmf_transport_qpair_fini_complete(void *cb_ctx)
{
struct nvmf_qpair_disconnect_ctx *qpair_ctx = cb_ctx;
if (qpair_ctx->cb_fn) {
spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
}
free(qpair_ctx);
}
static void
_nvmf_transport_qpair_fini(void *ctx)
{
struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
spdk_nvmf_poll_group_remove(qpair_ctx->qpair);
nvmf_transport_qpair_fini(qpair_ctx->qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
}
static void
_nvmf_ctrlr_free_from_qpair(void *ctx)
{
@ -961,11 +941,47 @@ _nvmf_ctrlr_free_from_qpair(void *ctx)
count = spdk_bit_array_count_set(ctrlr->qpair_mask);
if (count == 0) {
assert(!ctrlr->in_destruct);
SPDK_DEBUGLOG(nvmf, "Last qpair %u, destroy ctrlr %hx\n", qpair_ctx->qid, ctrlr->cntlid);
ctrlr->in_destruct = true;
spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr);
}
free(qpair_ctx);
}
spdk_thread_send_msg(qpair_ctx->thread, _nvmf_transport_qpair_fini, qpair_ctx);
static void
_nvmf_transport_qpair_fini_complete(void *cb_ctx)
{
struct nvmf_qpair_disconnect_ctx *qpair_ctx = cb_ctx;
struct spdk_nvmf_ctrlr *ctrlr;
/* Store cb args since cb_ctx can be freed in _nvmf_ctrlr_free_from_qpair */
nvmf_qpair_disconnect_cb cb_fn = qpair_ctx->cb_fn;
void *cb_arg = qpair_ctx->ctx;
struct spdk_thread *cb_thread = qpair_ctx->thread;
ctrlr = qpair_ctx->ctrlr;
SPDK_DEBUGLOG(nvmf, "Finish destroying qid %u\n", qpair_ctx->qid);
if (ctrlr) {
if (qpair_ctx->qid == 0) {
/* Admin qpair is removed, so set the pointer to NULL.
* This operation is safe since we are on ctrlr thread now, admin qpair's thread is the same
* as controller's thread */
ctrlr->admin_qpair = NULL;
}
/* Free qpair id from controller's bit mask and destroy the controller if it is the last qpair */
if (ctrlr->thread) {
spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx);
} else {
_nvmf_ctrlr_free_from_qpair(qpair_ctx);
}
} else {
free(qpair_ctx);
}
if (cb_fn) {
spdk_thread_send_msg(cb_thread, cb_fn, cb_arg);
}
}
void
@ -1026,14 +1042,9 @@ _nvmf_qpair_destroy(void *ctx, int status)
}
}
if (!ctrlr || !ctrlr->thread) {
spdk_nvmf_poll_group_remove(qpair);
nvmf_transport_qpair_fini(qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
return;
}
qpair_ctx->ctrlr = ctrlr;
spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx);
spdk_nvmf_poll_group_remove(qpair);
nvmf_transport_qpair_fini(qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
}
static void

View File

@ -3088,7 +3088,7 @@ subsystem_listener_update_on_pg(struct spdk_io_channel_iter *i)
group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
TAILQ_FOREACH(ctrlr, &listener->subsystem->ctrlrs, link) {
if (ctrlr->admin_qpair->group == group && ctrlr->listener == listener) {
if (ctrlr->admin_qpair && ctrlr->admin_qpair->group == group && ctrlr->listener == listener) {
nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
}
}