nvme: continue probing ctrlrs even if one fails

It is possible that a single probe_ctx could be used
to probe multiple newly attached nvme controllers.  If
one of those controllers is removed during this process,
the rest of the controllers do not get probed and can
even get stuck in a zombie state.

It is better to just continue with probing the rest of
the controllers.

Fixes issue #1611.

Signed-off-by: Jim Harris <james.r.harris@intel.com>

Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4945 (master)

(cherry picked from commit ddf86600bb)
Change-Id: I4156ee8b50e8d52cfeee7224f210a58bb773e939
Signed-off-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4958
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Jim Harris 2020-10-27 11:08:26 -07:00 committed by Tomasz Zawadzki
parent c64c931c52
commit d3b788e9ab
3 changed files with 17 additions and 19 deletions

View File

@ -773,9 +773,16 @@ struct spdk_nvme_probe_ctx *spdk_nvme_probe_async(const struct spdk_nvme_transpo
spdk_nvme_remove_cb remove_cb);
/**
* Start controllers in the context list.
* Proceed with attaching contollers associated with the probe context.
*
* Users may call the function util it returns True.
* The probe context is one returned from a previous call to
* spdk_nvme_probe_async(). Users must call this function on the
* probe context until it returns 0.
*
* If any controllers fail to attach, there is no explicit notification.
* Users can detect attachment failure by comparing attach_cb invocations
* with the number of times where the user returned true for the
* probe_cb.
*
* \param probe_ctx Context used to track probe actions.
*
@ -783,7 +790,6 @@ struct spdk_nvme_probe_ctx *spdk_nvme_probe_async(const struct spdk_nvme_transpo
* is also freed and no longer valid.
* \return -EAGAIN if there are still pending probe operations; user must call
* spdk_nvme_probe_poll_async again to continue progress.
* \return value other than 0 and -EAGAIN probe error with one controller.
*/
int spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx);

View File

@ -688,11 +688,11 @@ nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid,
return 1;
}
static int
static void
nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_probe_ctx *probe_ctx)
{
int rc = 0;
int rc = 0;
rc = nvme_ctrlr_process_init(ctrlr);
@ -704,11 +704,11 @@ nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
nvme_ctrlr_fail(ctrlr, false);
nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
nvme_ctrlr_destruct(ctrlr);
return rc;
return;
}
if (ctrlr->state != NVME_CTRLR_STATE_READY) {
return 0;
return;
}
STAILQ_INIT(&ctrlr->io_producers);
@ -735,10 +735,7 @@ nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
if (probe_ctx->attach_cb) {
probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
return 0;
}
return 0;
}
static int
@ -1545,7 +1542,6 @@ spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid,
int
spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
{
int rc = 0;
struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
@ -1554,19 +1550,15 @@ spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
}
TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
rc = nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
if (rc != 0) {
rc = -EIO;
break;
}
nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
}
if (rc != 0 || TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) {
if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) {
nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
g_spdk_nvme_driver->initialized = true;
nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
free(probe_ctx);
return rc;
return 0;
}
return -EAGAIN;

View File

@ -382,7 +382,7 @@ test_nvme_init_controllers(void)
probe_ctx->attach_cb = attach_cb;
probe_ctx->trid.trtype = SPDK_NVME_TRANSPORT_PCIE;
rc = nvme_init_controllers(probe_ctx);
CU_ASSERT(rc != 0);
CU_ASSERT(rc == 0);
CU_ASSERT(g_spdk_nvme_driver->initialized == true);
CU_ASSERT(ut_destruct_called == true);