From c3884f943c3060a4cd11591ed6c0dd0dadc807d5 Mon Sep 17 00:00:00 2001 From: Seth Howell Date: Tue, 26 Mar 2019 13:30:48 -0700 Subject: [PATCH] rdma: batch rdma recvs per poll. This will help save MMIO overhead. Especially in the SRQ case. Change-Id: I6fb70cf6de4763450f97961f41ccdce3acec2e63 Signed-off-by: Seth Howell Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/449265 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Jim Harris --- lib/nvmf/rdma.c | 109 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 95 insertions(+), 14 deletions(-) diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c index b48ead9320..0d8d304782 100644 --- a/lib/nvmf/rdma.c +++ b/lib/nvmf/rdma.c @@ -291,6 +291,11 @@ struct spdk_nvmf_send_wr_list { struct ibv_send_wr *last; }; +struct spdk_nvmf_recv_wr_list { + struct ibv_recv_wr *first; + struct ibv_recv_wr *last; +}; + struct spdk_nvmf_rdma_resources { /* Array of size "max_queue_depth" containing RDMA requests. */ struct spdk_nvmf_rdma_request *reqs; @@ -316,6 +321,9 @@ struct spdk_nvmf_rdma_resources { void *bufs; struct ibv_mr *bufs_mr; + /* The list of pending recvs to transfer */ + struct spdk_nvmf_recv_wr_list recvs_to_post; + /* Receives that are waiting for a request object */ STAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue; @@ -1029,6 +1037,26 @@ error: return -1; } +/* Append the given recv wr structure to the resource structs outstanding recvs list. */ +/* This function accepts either a single wr or the first wr in a linked list. */ +static void +nvmf_rdma_qpair_queue_recv_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *first) +{ + struct ibv_recv_wr *last; + + last = first; + while (last->next != NULL) { + last = last->next; + } + + if (rqpair->resources->recvs_to_post.first == NULL) { + rqpair->resources->recvs_to_post.first = first; + rqpair->resources->recvs_to_post.last = last; + } else { + rqpair->resources->recvs_to_post.last->next = first; + rqpair->resources->recvs_to_post.last = last; + } +} /* Append the given send wr structure to the qpair's outstanding sends list. */ /* This function accepts either a single wr or the first wr in a linked list. */ @@ -1075,13 +1103,11 @@ request_transfer_in(struct spdk_nvmf_request *req) static int request_transfer_out(struct spdk_nvmf_request *req, int *data_posted) { - int rc; int num_outstanding_data_wr = 0; struct spdk_nvmf_rdma_request *rdma_req; struct spdk_nvmf_qpair *qpair; struct spdk_nvmf_rdma_qpair *rqpair; struct spdk_nvme_cpl *rsp; - struct ibv_recv_wr *bad_recv_wr = NULL; struct ibv_send_wr *first = NULL; *data_posted = 0; @@ -1098,20 +1124,11 @@ request_transfer_out(struct spdk_nvmf_request *req, int *data_posted) } rsp->sqhd = qpair->sq_head; - /* Post the capsule to the recv buffer */ + /* queue the capsule for the recv buffer */ assert(rdma_req->recv != NULL); - SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA RECV POSTED. Recv: %p Connection: %p\n", rdma_req->recv, - rqpair); - if (rqpair->srq == NULL) { - rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_req->recv->wr, &bad_recv_wr); - } else { - rc = ibv_post_srq_recv(rqpair->srq, &rdma_req->recv->wr, &bad_recv_wr); - } - if (rc) { - SPDK_ERRLOG("Unable to re-post rx descriptor\n"); - return rc; - } + nvmf_rdma_qpair_queue_recv_wrs(rqpair, &rdma_req->recv->wr); + rdma_req->recv = NULL; assert(rqpair->current_recv_depth > 0); rqpair->current_recv_depth--; @@ -3214,6 +3231,68 @@ spdk_nvmf_rdma_req_is_completing(struct spdk_nvmf_rdma_request *rdma_req) } #endif +static void +_poller_reset_failed_recvs(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_recv_wr *bad_recv_wr, + int rc) +{ + struct spdk_nvmf_rdma_recv *rdma_recv; + struct spdk_nvmf_rdma_wr *bad_rdma_wr; + + SPDK_ERRLOG("Failed to post a recv for the poller %p with errno %d\n", rpoller, -rc); + while (bad_recv_wr != NULL) { + bad_rdma_wr = (struct spdk_nvmf_rdma_wr *)bad_recv_wr->wr_id; + rdma_recv = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr); + + rdma_recv->qpair->current_recv_depth++; + bad_recv_wr = bad_recv_wr->next; + SPDK_ERRLOG("Failed to post a recv for the qpair %p with errno %d\n", rdma_recv->qpair, -rc); + spdk_nvmf_rdma_start_disconnect(rdma_recv->qpair); + } +} + +static void +_qp_reset_failed_recvs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *bad_recv_wr, int rc) +{ + SPDK_ERRLOG("Failed to post a recv for the qpair %p with errno %d\n", rqpair, -rc); + while (bad_recv_wr != NULL) { + bad_recv_wr = bad_recv_wr->next; + rqpair->current_recv_depth++; + } + spdk_nvmf_rdma_start_disconnect(rqpair); +} + +static void +_poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_poller *rpoller) +{ + struct spdk_nvmf_rdma_qpair *rqpair; + struct ibv_recv_wr *bad_recv_wr; + int rc; + + if (rpoller->srq) { + if (rpoller->resources->recvs_to_post.first != NULL) { + rc = ibv_post_srq_recv(rpoller->srq, rpoller->resources->recvs_to_post.first, &bad_recv_wr); + if (rc) { + _poller_reset_failed_recvs(rpoller, bad_recv_wr, rc); + } + rpoller->resources->recvs_to_post.first = NULL; + rpoller->resources->recvs_to_post.last = NULL; + } + } else { + TAILQ_FOREACH(rqpair, &rpoller->qpairs, link) { + if (!rqpair->resources->recvs_to_post.first) { + continue; + } + rc = ibv_post_recv(rqpair->cm_id->qp, rqpair->resources->recvs_to_post.first, &bad_recv_wr); + if (rc) { + _qp_reset_failed_recvs(rqpair, bad_recv_wr, rc); + } + rqpair->resources->recvs_to_post.first = NULL; + rqpair->resources->recvs_to_post.last = NULL; + } + } +} + static void _qp_reset_failed_sends(struct spdk_nvmf_rdma_transport *rtransport, struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_send_wr *bad_wr, int rc) @@ -3359,6 +3438,7 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport, } } + rdma_recv->wr.next = NULL; rqpair->current_recv_depth++; STAILQ_INSERT_TAIL(&rqpair->resources->incoming_queue, rdma_recv, link); break; @@ -3428,6 +3508,7 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport, } /* submit outstanding work requests. */ + _poller_submit_recvs(rtransport, rpoller); _poller_submit_sends(rtransport, rpoller); return count;