nvmf: Destroy subsystems before destroying poll groups

When nvmf_tgt application shuts down, it stops all
subsystems, than destroyes poll groups and than
destroyes nvmf_tgt. Part of nvmf_tgt destruction is
destruction of subsystems and this process may require
cross thread communication but since poll groups and
threads are already destroyed, we may get segfaults.

One possible solution is to change the order and destroy
nvmf_tgt before destroying poll groups but it doesn't
work since nvmf_tgt is registered as io_device and
poll groups have its channel, so it can't be destroyed
while poll groups exist.

This patch adds a new state to nvmf_tgt state machine
which destroys all subsystems before destroying poll
groups and nvmf_tgt. It guarantees that all threads
exist when subsystems are destroyed.

Also rename state NVMF_TGT_FINI_FREE_RESOURCES to
NVMF_TGT_FINI_DESTROY_TARGET, the new name better
reflects the purpose of this state.

Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Change-Id: I08971d78cc9ad70d43cd43c346fd74d35c8bda60
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9668
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Alexey Marchuk 2021-10-26 10:45:56 +03:00 committed by Jim Harris
parent 478f652436
commit 11f0a6ec0f

View File

@ -48,8 +48,9 @@ enum nvmf_tgt_state {
NVMF_TGT_INIT_START_SUBSYSTEMS,
NVMF_TGT_RUNNING,
NVMF_TGT_FINI_STOP_SUBSYSTEMS,
NVMF_TGT_FINI_DESTROY_SUBSYSTEMS,
NVMF_TGT_FINI_DESTROY_POLL_GROUPS,
NVMF_TGT_FINI_FREE_RESOURCES,
NVMF_TGT_FINI_DESTROY_TARGET,
NVMF_TGT_STOPPED,
NVMF_TGT_ERROR,
};
@ -93,7 +94,7 @@ nvmf_shutdown_cb(void *arg1)
if (g_tgt_state == NVMF_TGT_ERROR) {
/* Parse configuration error */
g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
g_tgt_state = NVMF_TGT_FINI_DESTROY_TARGET;
} else {
g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
}
@ -112,7 +113,7 @@ _nvmf_tgt_destroy_poll_group_done(void *ctx)
assert(g_num_poll_groups > 0);
if (--g_num_poll_groups == 0) {
g_tgt_state = NVMF_TGT_FINI_FREE_RESOURCES;
g_tgt_state = NVMF_TGT_FINI_DESTROY_TARGET;
nvmf_tgt_advance_state();
}
}
@ -265,6 +266,33 @@ nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
return;
}
g_tgt_state = NVMF_TGT_FINI_DESTROY_SUBSYSTEMS;
nvmf_tgt_advance_state();
}
static void
_nvmf_tgt_subsystem_destroy(void *cb_arg)
{
struct spdk_nvmf_subsystem *subsystem, *next_subsystem;
int rc;
subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);
while (subsystem != NULL) {
next_subsystem = spdk_nvmf_subsystem_get_next(subsystem);
rc = spdk_nvmf_subsystem_destroy(subsystem, _nvmf_tgt_subsystem_destroy, NULL);
if (rc) {
if (rc == -EINPROGRESS) {
/* If ret is -EINPROGRESS, nvmf_tgt_subsystem_destroyed will be called when subsystem
* is destroyed, _nvmf_tgt_subsystem_destroy will continue to destroy other subsystems if any */
return;
} else {
SPDK_ERRLOG("Unable to destroy NVMe-oF subsystem, rc %d. Trying others.\n", rc);
}
}
subsystem = next_subsystem;
}
g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
nvmf_tgt_advance_state();
}
@ -455,15 +483,20 @@ nvmf_tgt_advance_state(void)
nvmf_tgt_subsystem_stopped(subsystem, NULL, 0);
}
} else {
g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
g_tgt_state = NVMF_TGT_FINI_DESTROY_SUBSYSTEMS;
}
break;
}
case NVMF_TGT_FINI_DESTROY_SUBSYSTEMS:
_nvmf_tgt_subsystem_destroy(NULL);
/* Function above can be asynchronous, it will call nvmf_tgt_advance_state() once done.
* So just return here */
return;
case NVMF_TGT_FINI_DESTROY_POLL_GROUPS:
/* Send a message to each poll group thread, and terminate the thread */
nvmf_tgt_destroy_poll_groups();
break;
case NVMF_TGT_FINI_FREE_RESOURCES:
case NVMF_TGT_FINI_DESTROY_TARGET:
spdk_nvmf_tgt_destroy(g_spdk_nvmf_tgt, nvmf_tgt_destroy_done, NULL);
break;
case NVMF_TGT_STOPPED: