nvme_rdma: Check only if Soft RoCE receive normal completion after disconnect

We saw this unexpected behavior by the current SPDK master.
Add the check to clarify this behavior occurs only when we use
Soft RoCE.

Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: I3a5eaa9064a0601c65139e7868898545926d0dbf
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11229
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
Shuhei Matsumoto 2022-01-25 07:13:13 +09:00 committed by Tomasz Zawadzki
parent 5c36e1758c
commit b38d67d081

View File

@ -2185,6 +2185,13 @@ nvme_rdma_log_wc_status(struct nvme_rdma_qpair *rqpair, struct ibv_wc *wc)
}
}
static inline bool
nvme_rdma_is_rxe_device(struct ibv_device_attr *dev_attr)
{
return dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_OLD ||
dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_NEW;
}
static int
nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size,
struct nvme_rdma_poll_group *group,
@ -2276,6 +2283,26 @@ nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size,
continue;
}
if (spdk_unlikely(rdma_req->req == NULL)) {
struct ibv_device_attr dev_attr;
int query_status;
/* Bug in Soft Roce - we may receive a completion without error status when qpair is disconnected/destroyed.
* As sanity check - log an error if we use a real HW (it should never happen) */
query_status = ibv_query_device(cq->context, &dev_attr);
if (query_status == 0) {
if (!nvme_rdma_is_rxe_device(&dev_attr)) {
SPDK_ERRLOG("Received malformed completion: request 0x%"PRIx64" type %d\n", wc->wr_id,
rdma_wr->type);
assert(0);
}
} else {
SPDK_ERRLOG("Failed to query ib device\n");
assert(0);
}
continue;
}
rqpair = nvme_rdma_qpair(rdma_req->req->qpair);
rdma_req->completion_flags |= NVME_RDMA_SEND_COMPLETED;
rqpair->current_num_sends--;