nvmf: Update controller desctruction process

There is a race condition between controller destruction and
subsystem state change, e.g. admin qpair may already be freed
when a namespace is added or removed. As result in function
poll_group_update_subsystem we may get heap-use-after-free error

Another problem is that some qpair's live time may exceed controller's
life time. To avoid it, start controller destruction process when the last
qpair finished the disconnect process (previously controller started
the descruction process before the last qpair starts to disconnect
and it could lead to raise conditions)

Fixes #2055

Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Change-Id: Ibc99b1d840e4796e1588cc217d65834bb556b909
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9995
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Alexey Marchuk 2021-10-26 10:45:34 +03:00 committed by Jim Harris
parent c144d3669c
commit 478f652436
3 changed files with 53 additions and 35 deletions

View File

@ -497,6 +497,7 @@ _nvmf_ctrlr_destruct(void *ctx)
assert(spdk_get_thread() == ctrlr->thread);
assert(ctrlr->in_destruct);
SPDK_DEBUGLOG(nvmf, "Destroy ctrlr 0x%hx\n", ctrlr->cntlid);
if (ctrlr->disconnect_in_progress) {
SPDK_ERRLOG("freeing ctrlr with disconnect in progress\n");
spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_destruct, ctrlr);
@ -938,10 +939,12 @@ nvmf_ctrlr_association_remove(void *ctx)
SPDK_DEBUGLOG(nvmf, "Disconnecting host from subsystem %s due to association timeout.\n",
ctrlr->subsys->subnqn);
rc = spdk_nvmf_qpair_disconnect(ctrlr->admin_qpair, NULL, NULL);
if (rc < 0) {
SPDK_ERRLOG("Fail to disconnect admin ctrlr qpair\n");
assert(false);
if (ctrlr->admin_qpair) {
rc = spdk_nvmf_qpair_disconnect(ctrlr->admin_qpair, NULL, NULL);
if (rc < 0) {
SPDK_ERRLOG("Fail to disconnect admin ctrlr qpair\n");
assert(false);
}
}
return SPDK_POLLER_BUSY;
@ -2501,6 +2504,7 @@ spdk_nvmf_ctrlr_identify_ns(struct spdk_nvmf_ctrlr *ctrlr,
nvmf_bdev_ctrlr_identify_ns(ns, nsdata, ctrlr->dif_insert_or_strip);
assert(ctrlr->admin_qpair);
/* Due to bug in the Linux kernel NVMe driver we have to set noiob no larger than mdts */
max_num_blocks = ctrlr->admin_qpair->transport->opts.max_io_size /
(1U << nsdata->lbaf[nsdata->flbas.format].lbads);
@ -2556,11 +2560,13 @@ int
spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_ctrlr_data *cdata)
{
struct spdk_nvmf_subsystem *subsystem = ctrlr->subsys;
struct spdk_nvmf_transport *transport = ctrlr->admin_qpair->transport;
struct spdk_nvmf_transport *transport;
/*
* Common fields for discovery and NVM subsystems
*/
assert(ctrlr->admin_qpair);
transport = ctrlr->admin_qpair->transport;
spdk_strcpy_pad(cdata->fr, FW_VERSION, sizeof(cdata->fr), ' ');
assert((transport->opts.max_io_size % 4096) == 0);
cdata->mdts = spdk_u32log2(transport->opts.max_io_size / 4096);

View File

@ -938,26 +938,6 @@ _nvmf_ctrlr_destruct(void *ctx)
nvmf_ctrlr_destruct(ctrlr);
}
static void
_nvmf_transport_qpair_fini_complete(void *cb_ctx)
{
struct nvmf_qpair_disconnect_ctx *qpair_ctx = cb_ctx;
if (qpair_ctx->cb_fn) {
spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
}
free(qpair_ctx);
}
static void
_nvmf_transport_qpair_fini(void *ctx)
{
struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
spdk_nvmf_poll_group_remove(qpair_ctx->qpair);
nvmf_transport_qpair_fini(qpair_ctx->qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
}
static void
_nvmf_ctrlr_free_from_qpair(void *ctx)
{
@ -969,11 +949,47 @@ _nvmf_ctrlr_free_from_qpair(void *ctx)
count = spdk_bit_array_count_set(ctrlr->qpair_mask);
if (count == 0) {
assert(!ctrlr->in_destruct);
SPDK_DEBUGLOG(nvmf, "Last qpair %u, destroy ctrlr 0x%hx\n", qpair_ctx->qid, ctrlr->cntlid);
ctrlr->in_destruct = true;
spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr);
}
free(qpair_ctx);
}
spdk_thread_send_msg(qpair_ctx->thread, _nvmf_transport_qpair_fini, qpair_ctx);
static void
_nvmf_transport_qpair_fini_complete(void *cb_ctx)
{
struct nvmf_qpair_disconnect_ctx *qpair_ctx = cb_ctx;
struct spdk_nvmf_ctrlr *ctrlr;
/* Store cb args since cb_ctx can be freed in _nvmf_ctrlr_free_from_qpair */
nvmf_qpair_disconnect_cb cb_fn = qpair_ctx->cb_fn;
void *cb_arg = qpair_ctx->ctx;
struct spdk_thread *cb_thread = qpair_ctx->thread;
ctrlr = qpair_ctx->ctrlr;
SPDK_DEBUGLOG(nvmf, "Finish destroying qid %u\n", qpair_ctx->qid);
if (ctrlr) {
if (qpair_ctx->qid == 0) {
/* Admin qpair is removed, so set the pointer to NULL.
* This operation is safe since we are on ctrlr thread now, admin qpair's thread is the same
* as controller's thread */
assert(ctrlr->thread == spdk_get_thread());
ctrlr->admin_qpair = NULL;
}
/* Free qpair id from controller's bit mask and destroy the controller if it is the last qpair */
if (ctrlr->thread) {
spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx);
} else {
_nvmf_ctrlr_free_from_qpair(qpair_ctx);
}
} else {
free(qpair_ctx);
}
if (cb_fn) {
spdk_thread_send_msg(cb_thread, cb_fn, cb_arg);
}
}
void
@ -1034,14 +1050,9 @@ _nvmf_qpair_destroy(void *ctx, int status)
}
}
if (!ctrlr || !ctrlr->thread) {
spdk_nvmf_poll_group_remove(qpair);
nvmf_transport_qpair_fini(qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
return;
}
qpair_ctx->ctrlr = ctrlr;
spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx);
spdk_nvmf_poll_group_remove(qpair);
nvmf_transport_qpair_fini(qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
}
static void

View File

@ -1974,7 +1974,8 @@ nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
assert(spdk_get_thread() == subsystem->thread);
assert(subsystem == ctrlr->subsys);
SPDK_DEBUGLOG(nvmf, "remove ctrlr %p from subsys %p %s\n", ctrlr, subsystem, subsystem->subnqn);
SPDK_DEBUGLOG(nvmf, "remove ctrlr %p id 0x%x from subsys %p %s\n", ctrlr, ctrlr->cntlid, subsystem,
subsystem->subnqn);
TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link);
}
@ -3154,7 +3155,7 @@ subsystem_listener_update_on_pg(struct spdk_io_channel_iter *i)
group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
TAILQ_FOREACH(ctrlr, &listener->subsystem->ctrlrs, link) {
if (ctrlr->admin_qpair->group == group && ctrlr->listener == listener) {
if (ctrlr->admin_qpair && ctrlr->admin_qpair->group == group && ctrlr->listener == listener) {
nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
}
}