nvmf: retry connect commands internally when subsys not ready

It is better to not fail connect commands when a subsystem
is not ready.  The host will not be expecting that and will
typically treat it as a catastrophic failure (i.e. it won't
retry the connect).

So instead when this situation occurs, start a poller for
the connect request.  We will continue to retry processing
it until the subsystem is ready to handle it.

Fixes issue #1985.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: Id8835df8f0edf1e889fdd7e754e261c2a880cbb6
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8571
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Jim Harris 2021-06-30 20:30:13 +00:00 committed by Tomasz Zawadzki
parent 65ef1f32a6
commit ac3a42b15c

View File

@ -801,6 +801,25 @@ out:
return status;
}
static int nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req);
static int
retry_connect(void *arg)
{
struct spdk_nvmf_request *req = arg;
struct spdk_nvmf_subsystem_poll_group *sgroup;
int rc;
sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
sgroup->mgmt_io_outstanding++;
spdk_poller_unregister(&req->poller);
rc = nvmf_ctrlr_cmd_connect(req);
if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
_nvmf_request_complete(req);
}
return SPDK_POLLER_BUSY;
}
static int
nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req)
{
@ -825,10 +844,27 @@ nvmf_ctrlr_cmd_connect(struct spdk_nvmf_request *req)
(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) ||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) ||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
SPDK_ERRLOG("Subsystem '%s' is not ready\n", subsystem->subnqn);
rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
struct spdk_nvmf_subsystem_poll_group *sgroup;
if (req->timeout_tsc == 0) {
/* We will only retry the request up to 1 second. */
req->timeout_tsc = spdk_get_ticks() + spdk_get_ticks_hz();
} else if (spdk_get_ticks() > req->timeout_tsc) {
SPDK_ERRLOG("Subsystem '%s' was not ready for 1 second\n", subsystem->subnqn);
rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
rsp->status.sc = SPDK_NVMF_FABRIC_SC_CONTROLLER_BUSY;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
/* Subsystem is not ready to handle a connect. Use a poller to retry it
* again later. Decrement the mgmt_io_outstanding to avoid the
* subsystem waiting for this command to complete before unpausing.
*/
sgroup = nvmf_subsystem_pg_from_connect_cmd(req);
sgroup->mgmt_io_outstanding--;
SPDK_DEBUGLOG(nvmf, "Subsystem '%s' is not ready for connect, retrying...\n", subsystem->subnqn);
req->poller = SPDK_POLLER_REGISTER(retry_connect, req, 100);
return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
}
/* Ensure that hostnqn is null terminated */