RDMA: Remove the state_queues

Since we no longer rely on the state queues for draining qpairs, we can
get rid of most of them. We cn keep just a few, and since we don't ever
remove arbitrary elements, we can use stailqs to perform those
operations. Operations on Stailqs carry about half the overhead as
operations on tailqs

Change-Id: I8f184e6269db853619a3581d387d97a795034798
Signed-off-by: Seth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/445332
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Seth Howell 2019-02-19 09:56:31 -07:00 committed by Jim Harris
parent 51f411c213
commit 04ebc6ea28

View File

@ -249,7 +249,7 @@ struct spdk_nvmf_rdma_request {
uint32_t num_outstanding_data_wr; uint32_t num_outstanding_data_wr;
TAILQ_ENTRY(spdk_nvmf_rdma_request) link; TAILQ_ENTRY(spdk_nvmf_rdma_request) link;
TAILQ_ENTRY(spdk_nvmf_rdma_request) state_link; STAILQ_ENTRY(spdk_nvmf_rdma_request) state_link;
}; };
enum spdk_nvmf_rdma_qpair_disconnect_flags { enum spdk_nvmf_rdma_qpair_disconnect_flags {
@ -298,8 +298,12 @@ struct spdk_nvmf_rdma_qpair {
/* Receives that are waiting for a request object */ /* Receives that are waiting for a request object */
TAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue; TAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue;
/* Queues to track the requests in all states */ /* Queues to track requests in critical states */
TAILQ_HEAD(, spdk_nvmf_rdma_request) state_queue[RDMA_REQUEST_NUM_STATES]; STAILQ_HEAD(, spdk_nvmf_rdma_request) free_queue;
STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_read_queue;
STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_write_queue;
/* Number of requests in each state */ /* Number of requests in each state */
uint32_t state_cntr[RDMA_REQUEST_NUM_STATES]; uint32_t state_cntr[RDMA_REQUEST_NUM_STATES];
@ -560,12 +564,10 @@ spdk_nvmf_rdma_request_set_state(struct spdk_nvmf_rdma_request *rdma_req,
qpair = rdma_req->req.qpair; qpair = rdma_req->req.qpair;
rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair); rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
TAILQ_REMOVE(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
rqpair->state_cntr[rdma_req->state]--; rqpair->state_cntr[rdma_req->state]--;
rdma_req->state = state; rdma_req->state = state;
TAILQ_INSERT_TAIL(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
rqpair->state_cntr[rdma_req->state]++; rqpair->state_cntr[rdma_req->state]++;
} }
@ -592,12 +594,11 @@ static void
nvmf_rdma_dump_qpair_contents(struct spdk_nvmf_rdma_qpair *rqpair) nvmf_rdma_dump_qpair_contents(struct spdk_nvmf_rdma_qpair *rqpair)
{ {
int i; int i;
struct spdk_nvmf_rdma_request *req;
SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", rqpair->qpair.qid); SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", rqpair->qpair.qid);
for (i = 1; i < RDMA_REQUEST_NUM_STATES; i++) { for (i = 0; i < rqpair->max_queue_depth; i++) {
SPDK_ERRLOG("\tdumping requests in state %d\n", i); if (rqpair->reqs[i].state != RDMA_REQUEST_STATE_FREE) {
TAILQ_FOREACH(req, &rqpair->state_queue[i], state_link) { nvmf_rdma_dump_request(&rqpair->reqs[i]);
nvmf_rdma_dump_request(req);
} }
} }
} }
@ -782,10 +783,13 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
/* Initialise request state queues and counters of the queue pair */ /* Initialise request state queues and counters of the queue pair */
for (i = RDMA_REQUEST_STATE_FREE; i < RDMA_REQUEST_NUM_STATES; i++) { for (i = RDMA_REQUEST_STATE_FREE; i < RDMA_REQUEST_NUM_STATES; i++) {
TAILQ_INIT(&rqpair->state_queue[i]);
rqpair->state_cntr[i] = 0; rqpair->state_cntr[i] = 0;
} }
STAILQ_INIT(&rqpair->free_queue);
STAILQ_INIT(&rqpair->pending_rdma_read_queue);
STAILQ_INIT(&rqpair->pending_rdma_write_queue);
rqpair->current_recv_depth = rqpair->max_queue_depth; rqpair->current_recv_depth = rqpair->max_queue_depth;
for (i = 0; i < rqpair->max_queue_depth; i++) { for (i = 0; i < rqpair->max_queue_depth; i++) {
struct ibv_recv_wr *bad_wr = NULL; struct ibv_recv_wr *bad_wr = NULL;
@ -858,7 +862,7 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
/* Initialize request state to FREE */ /* Initialize request state to FREE */
rdma_req->state = RDMA_REQUEST_STATE_FREE; rdma_req->state = RDMA_REQUEST_STATE_FREE;
TAILQ_INSERT_TAIL(&rqpair->state_queue[rdma_req->state], rdma_req, state_link); STAILQ_INSERT_HEAD(&rqpair->free_queue, rdma_req, state_link);
rqpair->state_cntr[rdma_req->state]++; rqpair->state_cntr[rdma_req->state]++;
} }
@ -1410,8 +1414,8 @@ nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
struct spdk_nvmf_rdma_qpair *rqpair; struct spdk_nvmf_rdma_qpair *rqpair;
struct spdk_nvmf_rdma_poll_group *rgroup; struct spdk_nvmf_rdma_poll_group *rgroup;
rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
if (rdma_req->data_from_pool) { if (rdma_req->data_from_pool) {
rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
rgroup = rqpair->poller->group; rgroup = rqpair->poller->group;
spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport); spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport);
@ -1420,6 +1424,7 @@ nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
rdma_req->req.length = 0; rdma_req->req.length = 0;
rdma_req->req.iovcnt = 0; rdma_req->req.iovcnt = 0;
rdma_req->req.data = NULL; rdma_req->req.data = NULL;
STAILQ_INSERT_HEAD(&rqpair->free_queue, rdma_req, state_link);
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_FREE); spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_FREE);
} }
@ -1522,6 +1527,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
* arrive using in capsule data, we need to do a transfer from the host. * arrive using in capsule data, we need to do a transfer from the host.
*/ */
if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER && rdma_req->data_from_pool) { if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER && rdma_req->data_from_pool) {
STAILQ_INSERT_TAIL(&rqpair->pending_rdma_read_queue, rdma_req, state_link);
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING); spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING);
break; break;
} }
@ -1532,8 +1538,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, 0, 0, spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, 0, 0,
(uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
if (rdma_req != TAILQ_FIRST( if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_read_queue)) {
&rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING])) {
/* This request needs to wait in line to perform RDMA */ /* This request needs to wait in line to perform RDMA */
break; break;
} }
@ -1542,6 +1547,10 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
/* We can only have so many WRs outstanding. we have to wait until some finish. */ /* We can only have so many WRs outstanding. we have to wait until some finish. */
break; break;
} }
/* We have already verified that this request is the head of the queue. */
STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_read_queue, state_link);
rc = request_transfer_in(&rdma_req->req); rc = request_transfer_in(&rdma_req->req);
if (!rc) { if (!rc) {
spdk_nvmf_rdma_request_set_state(rdma_req, spdk_nvmf_rdma_request_set_state(rdma_req,
@ -1574,6 +1583,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTED, 0, 0, spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTED, 0, 0,
(uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) { if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
STAILQ_INSERT_TAIL(&rqpair->pending_rdma_write_queue, rdma_req, state_link);
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING); spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING);
} else { } else {
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_COMPLETE); spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_READY_TO_COMPLETE);
@ -1583,8 +1593,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, 0, 0, spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, 0, 0,
(uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id); (uintptr_t)rdma_req, (uintptr_t)rqpair->cm_id);
if (rdma_req != TAILQ_FIRST( if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_write_queue)) {
&rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING])) {
/* This request needs to wait in line to perform RDMA */ /* This request needs to wait in line to perform RDMA */
break; break;
} }
@ -1594,6 +1603,10 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
* +1 since each request has an additional wr in the resp. */ * +1 since each request has an additional wr in the resp. */
break; break;
} }
/* We have already verified that this request is the head of the queue. */
STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_write_queue, state_link);
/* The data transfer will be kicked off from /* The data transfer will be kicked off from
* RDMA_REQUEST_STATE_READY_TO_COMPLETE state. * RDMA_REQUEST_STATE_READY_TO_COMPLETE state.
*/ */
@ -2175,17 +2188,14 @@ spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp; struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
/* We process I/O in the data transfer pending queue at the highest priority. RDMA reads first */ /* We process I/O in the data transfer pending queue at the highest priority. RDMA reads first */
TAILQ_FOREACH_SAFE(rdma_req, STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_read_queue, state_link, req_tmp) {
&rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING],
state_link, req_tmp) {
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) { if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
break; break;
} }
} }
/* Then RDMA writes sincereads have stronger restrictions than writes */ /* Then RDMA writes since reads have stronger restrictions than writes */
TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING], STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_write_queue, state_link, req_tmp) {
state_link, req_tmp) {
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) { if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
break; break;
} }
@ -2201,12 +2211,14 @@ spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport
/* The lowest priority is processing newly received commands */ /* The lowest priority is processing newly received commands */
TAILQ_FOREACH_SAFE(rdma_recv, &rqpair->incoming_queue, link, recv_tmp) { TAILQ_FOREACH_SAFE(rdma_recv, &rqpair->incoming_queue, link, recv_tmp) {
if (TAILQ_EMPTY(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE])) { if (STAILQ_EMPTY(&rqpair->free_queue)) {
break; break;
} }
rdma_req = TAILQ_FIRST(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE]); rdma_req = STAILQ_FIRST(&rqpair->free_queue);
rdma_req->recv = rdma_recv; rdma_req->recv = rdma_recv;
STAILQ_REMOVE_HEAD(&rqpair->free_queue, state_link);
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_NEW); spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_NEW);
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) { if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
break; break;