From ca5ce67f6ed461ddd5a34cb949ba0c41f6e279ce Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Tue, 3 Aug 2021 18:01:07 +0300 Subject: [PATCH] nvme/rdma: Ignore completion when we can't find qpair When poll_group is used, several qpairs share the same CQ and it is possible to receive a completion with error (e.g. IBV_WC_WR_FLUSH_ERR) for already disconnected qpair That happens due to qpair is destroyed while there are submitted but not completed send/receive Work Requests To avoid such situation, we should not detroy ibv qpair until we reap completions for all submitted send/receive work requests. That requires some rework in rdma transport and will be implemented later Signed-off-by: Alexey Marchuk Change-Id: Idb6213d45c2a7954b9ab280f5eb5e021be00505f Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9056 Tested-by: SPDK CI Jenkins Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Changpeng Liu Reviewed-by: Jim Harris --- lib/nvme/nvme_rdma.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/nvme/nvme_rdma.c b/lib/nvme/nvme_rdma.c index d6d36f9f61..1a1ee850ca 100644 --- a/lib/nvme/nvme_rdma.c +++ b/lib/nvme/nvme_rdma.c @@ -2240,7 +2240,15 @@ nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size, rqpair = rdma_qpair != NULL ? rdma_qpair : nvme_rdma_poll_group_get_qpair_by_id(group, wc[i].qp_num); } - assert(rqpair); + if (!rqpair) { + /* When poll_group is used, several qpairs share the same CQ and it is possible to + * receive a completion with error (e.g. IBV_WC_WR_FLUSH_ERR) for already disconnected qpair + * That happens due to qpair is destroyed while there are submitted but not completed send/receive + * Work Requests + * TODO: ibv qpair must be destroyed only when all submitted Work Requests are completed */ + assert(group); + continue; + } assert(rqpair->current_num_sends > 0); rqpair->current_num_sends--; nvme_rdma_conditional_fail_qpair(rqpair, group);