From b38d67d0811f8ce085c5585cd3193e46abb9f4a9 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Tue, 25 Jan 2022 07:13:13 +0900 Subject: [PATCH] nvme_rdma: Check only if Soft RoCE receive normal completion after disconnect We saw this unexpected behavior by the current SPDK master. Add the check to clarify this behavior occurs only when we use Soft RoCE. Signed-off-by: Alexey Marchuk Signed-off-by: Shuhei Matsumoto Change-Id: I3a5eaa9064a0601c65139e7868898545926d0dbf Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11229 Tested-by: SPDK CI Jenkins Reviewed-by: Tomasz Zawadzki --- lib/nvme/nvme_rdma.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/nvme/nvme_rdma.c b/lib/nvme/nvme_rdma.c index 9d64f71282..c1e6887758 100644 --- a/lib/nvme/nvme_rdma.c +++ b/lib/nvme/nvme_rdma.c @@ -2185,6 +2185,13 @@ nvme_rdma_log_wc_status(struct nvme_rdma_qpair *rqpair, struct ibv_wc *wc) } } +static inline bool +nvme_rdma_is_rxe_device(struct ibv_device_attr *dev_attr) +{ + return dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_OLD || + dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_NEW; +} + static int nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size, struct nvme_rdma_poll_group *group, @@ -2276,6 +2283,26 @@ nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size, continue; } + if (spdk_unlikely(rdma_req->req == NULL)) { + struct ibv_device_attr dev_attr; + int query_status; + + /* Bug in Soft Roce - we may receive a completion without error status when qpair is disconnected/destroyed. + * As sanity check - log an error if we use a real HW (it should never happen) */ + query_status = ibv_query_device(cq->context, &dev_attr); + if (query_status == 0) { + if (!nvme_rdma_is_rxe_device(&dev_attr)) { + SPDK_ERRLOG("Received malformed completion: request 0x%"PRIx64" type %d\n", wc->wr_id, + rdma_wr->type); + assert(0); + } + } else { + SPDK_ERRLOG("Failed to query ib device\n"); + assert(0); + } + continue; + } + rqpair = nvme_rdma_qpair(rdma_req->req->qpair); rdma_req->completion_flags |= NVME_RDMA_SEND_COMPLETED; rqpair->current_num_sends--;