nvme: add an enum for why a qpair disconnected

Change-Id: I1a9517d9673051615942c873416505704740691a
Signed-off-by: Seth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/475805
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Seth Howell 2019-11-25 16:17:50 -07:00 committed by Tomasz Zawadzki
parent 3911922005
commit 24bca2eadd
6 changed files with 69 additions and 1 deletions

View File

@ -235,6 +235,21 @@ bool spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr);
void spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts,
size_t opts_size);
/**
* Reason for qpair disconnect at the transport layer.
*
* NONE implies that the qpair is still connected while UNKNOWN means that the
* qpair is disconnected, but the cause was not apparent.
*/
enum spdk_nvme_qp_failure_reason {
SPDK_NVME_QPAIR_FAILURE_NONE = 0,
SPDK_NVME_QPAIR_FAILURE_LOCAL,
SPDK_NVME_QPAIR_FAILURE_REMOTE,
SPDK_NVME_QPAIR_FAILURE_UNKNOWN,
};
typedef enum spdk_nvme_qp_failure_reason spdk_nvme_qp_failure_reason;
/**
* NVMe library transports
*
@ -1106,6 +1121,16 @@ struct spdk_nvme_qpair *spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *c
*/
int spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair);
/**
* Returns the reason the admin qpair for a given controller is disconnected.
*
* \param ctrlr The controller to check.
*
* \return a valid spdk_nvme_qp_failure_reason.
*/
spdk_nvme_qp_failure_reason spdk_nvme_ctrlr_get_admin_qp_failure_reason(
struct spdk_nvme_ctrlr *ctrlr);
/**
* Free an I/O queue pair that was allocated by spdk_nvme_ctrlr_alloc_io_qpair().
*
@ -1252,6 +1277,15 @@ int spdk_nvme_ctrlr_cmd_io_raw_with_md(struct spdk_nvme_ctrlr *ctrlr,
int32_t spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair,
uint32_t max_completions);
/**
* Returns the reason the qpair is disconnected.
*
* \param qpair The qpair to check.
*
* \return a valid spdk_nvme_qp_failure_reason.
*/
spdk_nvme_qp_failure_reason spdk_nvme_qpair_get_failure_reason(struct spdk_nvme_qpair *qpair);
/**
* Send the given admin command to the NVMe controller.
*

View File

@ -422,6 +422,7 @@ spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair)
rc = -EAGAIN;
goto out;
}
qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_NONE;
nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
out:
@ -429,6 +430,12 @@ out:
return rc;
}
spdk_nvme_qp_failure_reason
spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr)
{
return ctrlr->adminq->transport_failure_reason;
}
/*
* This internal function will attempt to take the controller
* lock before calling disconnect on a controller qpair.
@ -1076,11 +1083,13 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
/* Disable all queues before disabling the controller hardware. */
TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED);
}
nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_DISABLED);
nvme_qpair_complete_error_reqs(ctrlr->adminq);
nvme_transport_qpair_abort_reqs(ctrlr->adminq, 0 /* retry */);
ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
if (nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq) != 0) {
SPDK_ERRLOG("Controller reinitialization failed.\n");
@ -1088,6 +1097,7 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
rc = -1;
goto out;
}
ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_NONE;
nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_CONNECTED);
/* Doorbell buffer config is invalid during reset */
@ -1116,10 +1126,12 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
/* Reinitialize qpairs */
TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
if (nvme_transport_ctrlr_connect_qpair(ctrlr, qpair) != 0) {
qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED);
rc = -1;
continue;
}
qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_NONE;
nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
}
}

View File

@ -381,6 +381,8 @@ struct spdk_nvme_qpair {
struct spdk_nvme_ctrlr_process *active_proc;
void *req_buf;
uint8_t transport_failure_reason: 2;
};
struct spdk_nvme_ns {

View File

@ -506,6 +506,12 @@ spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
return ret;
}
spdk_nvme_qp_failure_reason
spdk_nvme_qpair_get_failure_reason(struct spdk_nvme_qpair *qpair)
{
return qpair->transport_failure_reason;
}
int
nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id,
struct spdk_nvme_ctrlr *ctrlr,

View File

@ -287,13 +287,18 @@ nvme_rdma_qpair_process_cm_event(struct nvme_rdma_qpair *rqpair)
}
break;
case RDMA_CM_EVENT_DISCONNECTED:
rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_REMOTE;
nvme_qpair_set_state(&rqpair->qpair, NVME_QPAIR_DISABLED);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
nvme_qpair_set_state(&rqpair->qpair, NVME_QPAIR_DISABLED);
break;
case RDMA_CM_EVENT_MULTICAST_JOIN:
case RDMA_CM_EVENT_MULTICAST_ERROR:
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
nvme_qpair_set_state(&rqpair->qpair, NVME_QPAIR_DISABLED);
break;
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
@ -1060,6 +1065,7 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
rc = nvme_fabric_qpair_connect(&rqpair->qpair, rqpair->num_entries);
if (rc < 0) {
rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
nvme_qpair_set_state(&rqpair->qpair, NVME_QPAIR_DISABLED);
SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
return -1;
@ -1876,7 +1882,7 @@ nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
{
struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair);
struct ibv_wc wc[MAX_COMPLETIONS_PER_POLL];
int i, rc, batch_size;
int i, rc = 0, batch_size;
uint32_t reaped;
struct ibv_cq *cq;
struct spdk_nvme_rdma_req *rdma_req;
@ -1967,6 +1973,12 @@ fail:
* we can call nvme_rdma_qpair_disconnect. For other qpairs we need
* to call the generic function which will take the lock for us.
*/
if (rc == IBV_WC_RETRY_EXC_ERR) {
qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_REMOTE;
} else if (qpair->transport_failure_reason == SPDK_NVME_QPAIR_FAILURE_NONE) {
qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
}
if (nvme_qpair_is_admin_queue(qpair)) {
nvme_rdma_qpair_disconnect(qpair);
} else {

View File

@ -1511,6 +1511,8 @@ fail:
* we can call nvme_tcp_qpair_disconnect. For other qpairs we need
* to call the generic function which will take the lock for us.
*/
qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
if (nvme_qpair_is_admin_queue(qpair)) {
nvme_tcp_qpair_disconnect(qpair);
} else {