Revert "nvme/rdma: Correct qpair disconnect process"

This reverts commit eb09178a59.

Reason for revert:

This caused a degradation for adminq.
For adminq, ctrlr_delete_io_qpair() is not called until ctrlr is destructed.
So necessary delete operations are not done for adminq.

Reverting the patch is practical for now.

Change-Id: Ib55ff81dfe97ee1e2c83876912e851c61f20e354
Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10878
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Shuhei Matsumoto 2022-01-24 03:43:59 +00:00 committed by Tomasz Zawadzki
parent 194dc9e2f9
commit c8f986c7ee
2 changed files with 46 additions and 77 deletions

View File

@ -308,7 +308,6 @@ static const char *rdma_cm_event_str[] = {
struct nvme_rdma_qpair *nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group, struct nvme_rdma_qpair *nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group,
uint32_t qp_num); uint32_t qp_num);
static void nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);
static TAILQ_HEAD(, nvme_rdma_memory_domain) g_memory_domains = TAILQ_HEAD_INITIALIZER( static TAILQ_HEAD(, nvme_rdma_memory_domain) g_memory_domains = TAILQ_HEAD_INITIALIZER(
g_memory_domains); g_memory_domains);
@ -1794,33 +1793,12 @@ nvme_rdma_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme
{ {
struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair);
struct nvme_rdma_ctrlr *rctrlr = NULL; struct nvme_rdma_ctrlr *rctrlr = NULL;
struct nvme_rdma_cm_event_entry *entry, *tmp;
int rc; int rc;
if (rqpair->rdma_qp) { spdk_rdma_free_mem_map(&rqpair->mr_map);
rc = spdk_rdma_qp_disconnect(rqpair->rdma_qp); nvme_rdma_unregister_reqs(rqpair);
rctrlr = nvme_rdma_ctrlr(qpair->ctrlr); nvme_rdma_unregister_rsps(rqpair);
if (rctrlr != NULL && rc == 0) {
if (nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_DISCONNECTED)) {
SPDK_DEBUGLOG(nvme, "Target did not respond to qpair disconnect.\n");
}
}
}
}
static int
nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
struct nvme_rdma_qpair *rqpair;
struct nvme_rdma_ctrlr *rctrlr = NULL;
struct nvme_rdma_cm_event_entry *entry, *tmp;
assert(qpair != NULL);
rqpair = nvme_rdma_qpair(qpair);
if (rqpair->defer_deletion_to_pg) {
nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
return 0;
}
if (rqpair->evt) { if (rqpair->evt) {
rdma_ack_cm_event(rqpair->evt); rdma_ack_cm_event(rqpair->evt);
@ -1842,24 +1820,18 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
} }
} }
spdk_rdma_free_mem_map(&rqpair->mr_map);
nvme_rdma_unregister_reqs(rqpair);
nvme_rdma_unregister_rsps(rqpair);
nvme_rdma_qpair_abort_reqs(qpair, 0);
nvme_qpair_deinit(qpair);
nvme_rdma_put_memory_domain(rqpair->memory_domain);
nvme_rdma_free_reqs(rqpair);
nvme_rdma_free_rsps(rqpair);
if (rqpair->cm_id) { if (rqpair->cm_id) {
if (rqpair->rdma_qp) { if (rqpair->rdma_qp) {
/* qpair is already disconnected at this stage */ rc = spdk_rdma_qp_disconnect(rqpair->rdma_qp);
if ((rctrlr != NULL) && (rc == 0)) {
if (nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_DISCONNECTED)) {
SPDK_DEBUGLOG(nvme, "Target did not respond to qpair disconnect.\n");
}
}
spdk_rdma_qp_destroy(rqpair->rdma_qp); spdk_rdma_qp_destroy(rqpair->rdma_qp);
rqpair->rdma_qp = NULL; rqpair->rdma_qp = NULL;
} }
rdma_destroy_id(rqpair->cm_id); rdma_destroy_id(rqpair->cm_id);
rqpair->cm_id = NULL; rqpair->cm_id = NULL;
} }
@ -1868,7 +1840,30 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
ibv_destroy_cq(rqpair->cq); ibv_destroy_cq(rqpair->cq);
rqpair->cq = NULL; rqpair->cq = NULL;
} }
}
static void nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);
static int
nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
struct nvme_rdma_qpair *rqpair;
assert(qpair != NULL);
rqpair = nvme_rdma_qpair(qpair);
if (rqpair->defer_deletion_to_pg) {
nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
return 0;
}
nvme_rdma_qpair_abort_reqs(qpair, 1);
nvme_qpair_deinit(qpair);
nvme_rdma_put_memory_domain(rqpair->memory_domain);
nvme_rdma_free_reqs(rqpair);
nvme_rdma_free_rsps(rqpair);
nvme_rdma_free(rqpair); nvme_rdma_free(rqpair);
return 0; return 0;
@ -2180,24 +2175,16 @@ nvme_rdma_log_wc_status(struct nvme_rdma_qpair *rqpair, struct ibv_wc *wc)
if (wc->status == IBV_WC_WR_FLUSH_ERR) { if (wc->status == IBV_WC_WR_FLUSH_ERR) {
/* If qpair is in ERR state, we will receive completions for all posted and not completed /* If qpair is in ERR state, we will receive completions for all posted and not completed
* Work Requests with IBV_WC_WR_FLUSH_ERR status. Don't log an error in that case */ * Work Requests with IBV_WC_WR_FLUSH_ERR status. Don't log an error in that case */
SPDK_DEBUGLOG(nvme, SPDK_DEBUGLOG(nvme, "WC error, qid %u, qp state %d, request 0x%lu type %d, status: (%d): %s\n",
"WC error, qid %u, qp state %d, request 0x%"PRIx64" type %d, status: (%d): %s\n",
rqpair->qpair.id, rqpair->qpair.state, wc->wr_id, rdma_wr->type, wc->status, rqpair->qpair.id, rqpair->qpair.state, wc->wr_id, rdma_wr->type, wc->status,
ibv_wc_status_str(wc->status)); ibv_wc_status_str(wc->status));
} else { } else {
SPDK_ERRLOG("WC error, qid %u, qp state %d, request 0x%"PRIx64" type %d, status: (%d): %s\n", SPDK_ERRLOG("WC error, qid %u, qp state %d, request 0x%lu type %d, status: (%d): %s\n",
rqpair->qpair.id, rqpair->qpair.state, wc->wr_id, rdma_wr->type, wc->status, rqpair->qpair.id, rqpair->qpair.state, wc->wr_id, rdma_wr->type, wc->status,
ibv_wc_status_str(wc->status)); ibv_wc_status_str(wc->status));
} }
} }
static inline bool
nvme_rdma_is_rxe_device(struct ibv_device_attr *dev_attr)
{
return dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_OLD ||
dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_NEW;
}
static int static int
nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size, nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size,
struct nvme_rdma_poll_group *group, struct nvme_rdma_poll_group *group,
@ -2273,7 +2260,12 @@ nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size,
wc[i].qp_num); wc[i].qp_num);
} }
if (!rqpair) { if (!rqpair) {
assert(0); /* When poll_group is used, several qpairs share the same CQ and it is possible to
* receive a completion with error (e.g. IBV_WC_WR_FLUSH_ERR) for already disconnected qpair
* That happens due to qpair is destroyed while there are submitted but not completed send/receive
* Work Requests
* TODO: ibv qpair must be destroyed only when all submitted Work Requests are completed */
assert(group);
continue; continue;
} }
assert(rqpair->current_num_sends > 0); assert(rqpair->current_num_sends > 0);
@ -2284,26 +2276,6 @@ nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size,
continue; continue;
} }
if (spdk_unlikely(rdma_req->req == NULL)) {
struct ibv_device_attr dev_attr;
int query_status;
/* Bug in Soft Roce - we may receive a completion without error status when qpair is disconnected/destroyed.
* As sanity check - log an error if we use a real HW (it should never happen) */
query_status = ibv_query_device(cq->context, &dev_attr);
if (query_status == 0) {
if (!nvme_rdma_is_rxe_device(&dev_attr)) {
SPDK_ERRLOG("Received malformed completion: request 0x%"PRIx64" type %d\n", wc->wr_id,
rdma_wr->type);
assert(0);
}
} else {
SPDK_ERRLOG("Failed to query ib device\n");
assert(0);
}
continue;
}
rqpair = nvme_rdma_qpair(rdma_req->req->qpair); rqpair = nvme_rdma_qpair(rdma_req->req->qpair);
rdma_req->completion_flags |= NVME_RDMA_SEND_COMPLETED; rdma_req->completion_flags |= NVME_RDMA_SEND_COMPLETED;
rqpair->current_num_sends--; rqpair->current_num_sends--;
@ -2647,14 +2619,12 @@ nvme_rdma_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)
rqpair->cq = NULL; rqpair->cq = NULL;
/* qpair has requests submitted to HW, need to wait for their completion. /*
* Allocate a tracker and attach it to poll group */ * If this fails, the system is in serious trouble,
* just let the qpair get cleaned up immediately.
*/
destroyed_qpair = calloc(1, sizeof(*destroyed_qpair)); destroyed_qpair = calloc(1, sizeof(*destroyed_qpair));
if (destroyed_qpair == NULL) { if (destroyed_qpair == NULL) {
/*
* If this fails, the system is in serious trouble,
* just let the qpair get cleaned up immediately.
*/
return 0; return 0;
} }

View File

@ -58,7 +58,6 @@ DEFINE_STUB(rdma_ack_cm_event, int, (struct rdma_cm_event *event), 0);
DEFINE_STUB_V(rdma_free_devices, (struct ibv_context **list)); DEFINE_STUB_V(rdma_free_devices, (struct ibv_context **list));
DEFINE_STUB(fcntl, int, (int fd, int cmd, ...), 0); DEFINE_STUB(fcntl, int, (int fd, int cmd, ...), 0);
DEFINE_STUB_V(rdma_destroy_event_channel, (struct rdma_event_channel *channel)); DEFINE_STUB_V(rdma_destroy_event_channel, (struct rdma_event_channel *channel));
DEFINE_STUB(rdma_destroy_id, int, (struct rdma_cm_id *id), 0);
DEFINE_STUB(ibv_dereg_mr, int, (struct ibv_mr *mr), 0); DEFINE_STUB(ibv_dereg_mr, int, (struct ibv_mr *mr), 0);
DEFINE_STUB(ibv_resize_cq, int, (struct ibv_cq *cq, int cqe), 0); DEFINE_STUB(ibv_resize_cq, int, (struct ibv_cq *cq, int cqe), 0);