diff --git a/lib/nvme/nvme_rdma.c b/lib/nvme/nvme_rdma.c index 7ea5b0a090..9d64f71282 100644 --- a/lib/nvme/nvme_rdma.c +++ b/lib/nvme/nvme_rdma.c @@ -308,7 +308,6 @@ static const char *rdma_cm_event_str[] = { struct nvme_rdma_qpair *nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group, uint32_t qp_num); -static void nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); static TAILQ_HEAD(, nvme_rdma_memory_domain) g_memory_domains = TAILQ_HEAD_INITIALIZER( g_memory_domains); @@ -1794,33 +1793,12 @@ nvme_rdma_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme { struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair); struct nvme_rdma_ctrlr *rctrlr = NULL; + struct nvme_rdma_cm_event_entry *entry, *tmp; int rc; - if (rqpair->rdma_qp) { - rc = spdk_rdma_qp_disconnect(rqpair->rdma_qp); - rctrlr = nvme_rdma_ctrlr(qpair->ctrlr); - if (rctrlr != NULL && rc == 0) { - if (nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_DISCONNECTED)) { - SPDK_DEBUGLOG(nvme, "Target did not respond to qpair disconnect.\n"); - } - } - } -} - -static int -nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) -{ - struct nvme_rdma_qpair *rqpair; - struct nvme_rdma_ctrlr *rctrlr = NULL; - struct nvme_rdma_cm_event_entry *entry, *tmp; - - assert(qpair != NULL); - rqpair = nvme_rdma_qpair(qpair); - - if (rqpair->defer_deletion_to_pg) { - nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); - return 0; - } + spdk_rdma_free_mem_map(&rqpair->mr_map); + nvme_rdma_unregister_reqs(rqpair); + nvme_rdma_unregister_rsps(rqpair); if (rqpair->evt) { rdma_ack_cm_event(rqpair->evt); @@ -1842,24 +1820,18 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ } } - spdk_rdma_free_mem_map(&rqpair->mr_map); - nvme_rdma_unregister_reqs(rqpair); - nvme_rdma_unregister_rsps(rqpair); - - nvme_rdma_qpair_abort_reqs(qpair, 0); - nvme_qpair_deinit(qpair); - - nvme_rdma_put_memory_domain(rqpair->memory_domain); - - nvme_rdma_free_reqs(rqpair); - nvme_rdma_free_rsps(rqpair); - if (rqpair->cm_id) { if (rqpair->rdma_qp) { - /* qpair is already disconnected at this stage */ + rc = spdk_rdma_qp_disconnect(rqpair->rdma_qp); + if ((rctrlr != NULL) && (rc == 0)) { + if (nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_DISCONNECTED)) { + SPDK_DEBUGLOG(nvme, "Target did not respond to qpair disconnect.\n"); + } + } spdk_rdma_qp_destroy(rqpair->rdma_qp); rqpair->rdma_qp = NULL; } + rdma_destroy_id(rqpair->cm_id); rqpair->cm_id = NULL; } @@ -1868,7 +1840,30 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ ibv_destroy_cq(rqpair->cq); rqpair->cq = NULL; } +} +static void nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); + +static int +nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) +{ + struct nvme_rdma_qpair *rqpair; + + assert(qpair != NULL); + rqpair = nvme_rdma_qpair(qpair); + + if (rqpair->defer_deletion_to_pg) { + nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING); + return 0; + } + + nvme_rdma_qpair_abort_reqs(qpair, 1); + nvme_qpair_deinit(qpair); + + nvme_rdma_put_memory_domain(rqpair->memory_domain); + + nvme_rdma_free_reqs(rqpair); + nvme_rdma_free_rsps(rqpair); nvme_rdma_free(rqpair); return 0; @@ -2180,24 +2175,16 @@ nvme_rdma_log_wc_status(struct nvme_rdma_qpair *rqpair, struct ibv_wc *wc) if (wc->status == IBV_WC_WR_FLUSH_ERR) { /* If qpair is in ERR state, we will receive completions for all posted and not completed * Work Requests with IBV_WC_WR_FLUSH_ERR status. Don't log an error in that case */ - SPDK_DEBUGLOG(nvme, - "WC error, qid %u, qp state %d, request 0x%"PRIx64" type %d, status: (%d): %s\n", + SPDK_DEBUGLOG(nvme, "WC error, qid %u, qp state %d, request 0x%lu type %d, status: (%d): %s\n", rqpair->qpair.id, rqpair->qpair.state, wc->wr_id, rdma_wr->type, wc->status, ibv_wc_status_str(wc->status)); } else { - SPDK_ERRLOG("WC error, qid %u, qp state %d, request 0x%"PRIx64" type %d, status: (%d): %s\n", + SPDK_ERRLOG("WC error, qid %u, qp state %d, request 0x%lu type %d, status: (%d): %s\n", rqpair->qpair.id, rqpair->qpair.state, wc->wr_id, rdma_wr->type, wc->status, ibv_wc_status_str(wc->status)); } } -static inline bool -nvme_rdma_is_rxe_device(struct ibv_device_attr *dev_attr) -{ - return dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_OLD || - dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_NEW; -} - static int nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size, struct nvme_rdma_poll_group *group, @@ -2273,7 +2260,12 @@ nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size, wc[i].qp_num); } if (!rqpair) { - assert(0); + /* When poll_group is used, several qpairs share the same CQ and it is possible to + * receive a completion with error (e.g. IBV_WC_WR_FLUSH_ERR) for already disconnected qpair + * That happens due to qpair is destroyed while there are submitted but not completed send/receive + * Work Requests + * TODO: ibv qpair must be destroyed only when all submitted Work Requests are completed */ + assert(group); continue; } assert(rqpair->current_num_sends > 0); @@ -2284,26 +2276,6 @@ nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size, continue; } - if (spdk_unlikely(rdma_req->req == NULL)) { - struct ibv_device_attr dev_attr; - int query_status; - - /* Bug in Soft Roce - we may receive a completion without error status when qpair is disconnected/destroyed. - * As sanity check - log an error if we use a real HW (it should never happen) */ - query_status = ibv_query_device(cq->context, &dev_attr); - if (query_status == 0) { - if (!nvme_rdma_is_rxe_device(&dev_attr)) { - SPDK_ERRLOG("Received malformed completion: request 0x%"PRIx64" type %d\n", wc->wr_id, - rdma_wr->type); - assert(0); - } - } else { - SPDK_ERRLOG("Failed to query ib device\n"); - assert(0); - } - continue; - } - rqpair = nvme_rdma_qpair(rdma_req->req->qpair); rdma_req->completion_flags |= NVME_RDMA_SEND_COMPLETED; rqpair->current_num_sends--; @@ -2647,14 +2619,12 @@ nvme_rdma_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) rqpair->cq = NULL; - /* qpair has requests submitted to HW, need to wait for their completion. - * Allocate a tracker and attach it to poll group */ + /* + * If this fails, the system is in serious trouble, + * just let the qpair get cleaned up immediately. + */ destroyed_qpair = calloc(1, sizeof(*destroyed_qpair)); if (destroyed_qpair == NULL) { - /* - * If this fails, the system is in serious trouble, - * just let the qpair get cleaned up immediately. - */ return 0; } diff --git a/test/unit/lib/nvme/nvme_rdma.c/nvme_rdma_ut.c b/test/unit/lib/nvme/nvme_rdma.c/nvme_rdma_ut.c index 253a82a1c5..859c06d770 100644 --- a/test/unit/lib/nvme/nvme_rdma.c/nvme_rdma_ut.c +++ b/test/unit/lib/nvme/nvme_rdma.c/nvme_rdma_ut.c @@ -58,7 +58,6 @@ DEFINE_STUB(rdma_ack_cm_event, int, (struct rdma_cm_event *event), 0); DEFINE_STUB_V(rdma_free_devices, (struct ibv_context **list)); DEFINE_STUB(fcntl, int, (int fd, int cmd, ...), 0); DEFINE_STUB_V(rdma_destroy_event_channel, (struct rdma_event_channel *channel)); -DEFINE_STUB(rdma_destroy_id, int, (struct rdma_cm_id *id), 0); DEFINE_STUB(ibv_dereg_mr, int, (struct ibv_mr *mr), 0); DEFINE_STUB(ibv_resize_cq, int, (struct ibv_cq *cq, int cqe), 0);