nvmf: Separate the send and recv completion queues.

This allows the target to poll for internal completions
at higher priority.

Change-Id: I895c33a594a7d7c0545aa3a8405a296be3c106fb
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Ben Walker 2016-07-27 09:38:34 -07:00
parent 04beb5661e
commit 7e23841d28

View File

@ -77,10 +77,6 @@ struct spdk_nvmf_rdma_conn {
struct spdk_nvmf_conn conn;
struct rdma_cm_id *cm_id;
struct ibv_context *ctx;
struct ibv_comp_channel *comp_channel;
struct ibv_cq *cq;
struct ibv_qp *qp;
/* The maximum number of I/O outstanding on this connection at one time */
uint16_t queue_depth;
@ -169,14 +165,6 @@ spdk_nvmf_rdma_conn_destroy(struct spdk_nvmf_rdma_conn *rdma_conn)
rdma_destroy_qp(rdma_conn->cm_id);
}
if (rdma_conn->cq) {
ibv_destroy_cq(rdma_conn->cq);
}
if (rdma_conn->comp_channel) {
ibv_destroy_comp_channel(rdma_conn->comp_channel);
}
if (rdma_conn->cm_id) {
rdma_destroy_id(rdma_conn->cm_id);
}
@ -206,38 +194,10 @@ spdk_nvmf_rdma_conn_create(struct rdma_cm_id *id, uint16_t queue_depth, uint16_t
rdma_conn->queue_depth = queue_depth;
rdma_conn->rw_depth = rw_depth;
rdma_conn->ctx = id->verbs;
rdma_conn->cm_id = id;
rdma_conn->comp_channel = ibv_create_comp_channel(id->verbs);
if (!rdma_conn->comp_channel) {
SPDK_ERRLOG("create completion channel error!\n");
spdk_nvmf_rdma_conn_destroy(rdma_conn);
return NULL;
}
rc = fcntl(rdma_conn->comp_channel->fd, F_SETFL, O_NONBLOCK);
if (rc < 0) {
SPDK_ERRLOG("fcntl to set comp channel to non-blocking failed\n");
spdk_nvmf_rdma_conn_destroy(rdma_conn);
return NULL;
}
/*
* Size the CQ to handle completions for RECV, SEND, and either READ or WRITE.
*/
rdma_conn->cq = ibv_create_cq(id->verbs, (queue_depth * 3), rdma_conn, rdma_conn->comp_channel,
0);
if (!rdma_conn->cq) {
SPDK_ERRLOG("create cq error!\n");
spdk_nvmf_rdma_conn_destroy(rdma_conn);
return NULL;
}
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
attr.qp_type = IBV_QPT_RC;
attr.send_cq = rdma_conn->cq;
attr.recv_cq = rdma_conn->cq;
attr.cap.max_send_wr = rdma_conn->queue_depth * 2; /* SEND, READ, and WRITE operations */
attr.cap.max_recv_wr = rdma_conn->queue_depth; /* RECV operations */
attr.cap.max_send_sge = NVMF_DEFAULT_TX_SGE;
@ -249,7 +209,24 @@ spdk_nvmf_rdma_conn_create(struct rdma_cm_id *id, uint16_t queue_depth, uint16_t
spdk_nvmf_rdma_conn_destroy(rdma_conn);
return NULL;
}
rdma_conn->qp = rdma_conn->cm_id->qp;
SPDK_TRACELOG(SPDK_TRACE_RDMA,
"New RDMA QP created. Send CQ: %p Recv CQ: %p Send Queue Depth: %d Recv Queue Depth: %d\n",
rdma_conn->cm_id->send_cq, rdma_conn->cm_id->recv_cq, attr.cap.max_send_wr, attr.cap.max_recv_wr);
rc = fcntl(rdma_conn->cm_id->send_cq_channel->fd, F_SETFL, O_NONBLOCK);
if (rc < 0) {
SPDK_ERRLOG("fcntl to set comp channel to non-blocking failed\n");
spdk_nvmf_rdma_conn_destroy(rdma_conn);
return NULL;
}
rc = fcntl(rdma_conn->cm_id->recv_cq_channel->fd, F_SETFL, O_NONBLOCK);
if (rc < 0) {
SPDK_ERRLOG("fcntl to set comp channel to non-blocking failed\n");
spdk_nvmf_rdma_conn_destroy(rdma_conn);
return NULL;
}
conn = &rdma_conn->conn;
conn->transport = &spdk_nvmf_transport_rdma;
@ -363,7 +340,7 @@ nvmf_post_rdma_read(struct spdk_nvmf_request *req)
nvmf_ibv_send_wr_set_rkey(&wr, req);
spdk_trace_record(TRACE_RDMA_READ_START, 0, 0, (uintptr_t)req, 0);
rc = ibv_post_send(rdma_conn->qp, &wr, &bad_wr);
rc = ibv_post_send(rdma_conn->cm_id->qp, &wr, &bad_wr);
if (rc) {
SPDK_ERRLOG("Failure posting rdma read send, rc = 0x%x\n", rc);
}
@ -392,7 +369,7 @@ nvmf_post_rdma_write(struct spdk_nvmf_request *req)
nvmf_ibv_send_wr_set_rkey(&wr, req);
spdk_trace_record(TRACE_RDMA_WRITE_START, 0, 0, (uintptr_t)req, 0);
rc = ibv_post_send(rdma_conn->qp, &wr, &bad_wr);
rc = ibv_post_send(rdma_conn->cm_id->qp, &wr, &bad_wr);
if (rc) {
SPDK_ERRLOG("Failure posting rdma write send, rc = 0x%x\n", rc);
}
@ -428,7 +405,7 @@ nvmf_post_rdma_recv(struct spdk_nvmf_request *req)
wr.sg_list = sg_list;
wr.num_sge = 2;
rc = ibv_post_recv(rdma_conn->qp, &wr, &bad_wr);
rc = ibv_post_recv(rdma_conn->cm_id->qp, &wr, &bad_wr);
if (rc) {
SPDK_ERRLOG("Failure posting rdma recv, rc = 0x%x\n", rc);
}
@ -455,7 +432,7 @@ nvmf_post_rdma_send(struct spdk_nvmf_request *req)
nvmf_ibv_send_wr_init(&wr, req, &sge, IBV_WR_SEND, IBV_SEND_SIGNALED);
spdk_trace_record(TRACE_NVMF_IO_COMPLETE, 0, 0, (uintptr_t)req, 0);
rc = ibv_post_send(rdma_conn->qp, &wr, &bad_wr);
rc = ibv_post_send(rdma_conn->cm_id->qp, &wr, &bad_wr);
if (rc) {
SPDK_ERRLOG("Failure posting rdma send for NVMf completion, rc = 0x%x\n", rc);
}
@ -1072,30 +1049,30 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
struct spdk_nvmf_rdma_conn *rdma_conn = get_rdma_conn(conn);
struct spdk_nvmf_rdma_request *rdma_req;
struct spdk_nvmf_request *req;
int rc, count;
int rc;
int count = 0;
count = 0;
/* Poll the send completion queue to check for completing
* operations that the target initiated. */
while (true) {
rc = ibv_poll_cq(rdma_conn->cq, 1, &wc);
if (rc == 0) // No completions at this time
rc = ibv_poll_cq(rdma_conn->cm_id->send_cq, 1, &wc);
if (rc == 0) {
break;
if (rc < 0) {
SPDK_ERRLOG("Poll CQ error!(%d): %s\n",
} else if (rc < 0) {
SPDK_ERRLOG("Error polling Send CQ! (%d): %s\n",
errno, strerror(errno));
return -1;
}
/* OK, process the single successful cq event */
if (wc.status) {
SPDK_TRACELOG(SPDK_TRACE_RDMA, "CQ completion error status %d (%s), exiting handler\n",
wc.status, ibv_wc_status_str(wc.status));
SPDK_ERRLOG("Send CQ error (%d): %s\n",
wc.status, ibv_wc_status_str(wc.status));
return -1;
}
rdma_req = (struct spdk_nvmf_rdma_request *)wc.wr_id;
if (rdma_req == NULL) {
SPDK_ERRLOG("Got CQ completion for NULL rdma_req\n");
SPDK_ERRLOG("NULL wr_id in RDMA work completion\n");
return -1;
}
@ -1136,6 +1113,43 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
count++;
break;
case IBV_WC_RECV:
SPDK_ERRLOG("Unexpectedly received a RECV completion on the Send CQ\n");
return -1;
default:
SPDK_ERRLOG("Received an unknown opcode on the Send CQ: %d\n", wc.opcode);
return -1;
}
}
/* Poll the recv completion queue for incoming requests */
while (rdma_conn->num_outstanding_reqs < rdma_conn->queue_depth) {
rc = ibv_poll_cq(rdma_conn->cm_id->recv_cq, 1, &wc);
if (rc == 0) {
break;
} else if (rc < 0) {
SPDK_ERRLOG("Error polling Send CQ! (%d): %s\n",
errno, strerror(errno));
return -1;
}
if (wc.status) {
SPDK_ERRLOG("Send CQ error (%d): %s\n",
wc.status, ibv_wc_status_str(wc.status));
return -1;
}
rdma_req = (struct spdk_nvmf_rdma_request *)wc.wr_id;
if (rdma_req == NULL) {
SPDK_ERRLOG("NULL wr_id in RDMA work completion\n");
return -1;
}
req = &rdma_req->req;
switch (wc.opcode) {
case IBV_WC_RECV:
if (wc.byte_len < sizeof(struct spdk_nvmf_capsule_cmd)) {
SPDK_ERRLOG("recv length %u less than capsule header\n", wc.byte_len);
@ -1171,8 +1185,15 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
}
break;
case IBV_WC_SEND:
case IBV_WC_RDMA_WRITE:
case IBV_WC_RDMA_READ:
SPDK_ERRLOG("Unexpectedly received a Send/Write/Read completion on the Recv CQ\n");
return -1;
break;
default:
SPDK_ERRLOG("Poll cq opcode type unknown!!!!! completion\n");
SPDK_ERRLOG("Received an unknown opcode on the Recv CQ: %d\n", wc.opcode);
return -1;
}
}