nvmf/rdma: Add support for multiple sgl descriptors to sgl parser

Enable parsing an nvmf request that contains an inline
nvme_sgl_last_segment_descriptor element. This is the next step
towards NVMe-oF SGL support in the NVMe-oF target.

Change-Id: Ia2f1f7054e0de8a9e2bfe4dabe6af4085e3f12c4
Signed-off-by: Seth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/428745
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Seth Howell 2018-10-10 14:24:17 -07:00 committed by Jim Harris
parent 1e6cdc190d
commit 62700dac2e
2 changed files with 177 additions and 3 deletions

View File

@ -584,6 +584,46 @@ spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
return 0;
}
static void
nvmf_rdma_request_free_data(struct spdk_nvmf_rdma_request *rdma_req,
struct spdk_nvmf_rdma_transport *rtransport)
{
struct spdk_nvmf_rdma_request_data *current_data_wr = NULL, *next_data_wr = NULL;
struct ibv_send_wr *send_wr;
int i;
rdma_req->num_outstanding_data_wr = 0;
current_data_wr = &rdma_req->data;
for (i = 0; i < current_data_wr->wr.num_sge; i++) {
current_data_wr->wr.sg_list[i].addr = 0;
current_data_wr->wr.sg_list[i].length = 0;
current_data_wr->wr.sg_list[i].lkey = 0;
}
current_data_wr->wr.num_sge = 0;
send_wr = current_data_wr->wr.next;
if (send_wr != NULL && send_wr != &rdma_req->rsp.wr) {
next_data_wr = SPDK_CONTAINEROF(send_wr, struct spdk_nvmf_rdma_request_data, wr);
}
while (next_data_wr) {
current_data_wr = next_data_wr;
send_wr = current_data_wr->wr.next;
if (send_wr != NULL && send_wr != &rdma_req->rsp.wr) {
next_data_wr = SPDK_CONTAINEROF(send_wr, struct spdk_nvmf_rdma_request_data, wr);
} else {
next_data_wr = NULL;
}
for (i = 0; i < current_data_wr->wr.num_sge; i++) {
current_data_wr->wr.sg_list[i].addr = 0;
current_data_wr->wr.sg_list[i].length = 0;
current_data_wr->wr.sg_list[i].lkey = 0;
}
current_data_wr->wr.num_sge = 0;
spdk_mempool_put(rtransport->data_wr_pool, current_data_wr);
}
}
static void
nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req)
{
@ -1032,8 +1072,8 @@ request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
assert(rqpair->current_recv_depth > 0);
rqpair->current_recv_depth--;
/* Build the response which consists of an optional
* RDMA WRITE to transfer data, plus an RDMA SEND
/* Build the response which consists of optional
* RDMA WRITEs to transfer data, plus an RDMA SEND
* containing the response.
*/
send_wr = &rdma_req->rsp.wr;
@ -1326,6 +1366,46 @@ spdk_nvmf_rdma_request_get_xfer(struct spdk_nvmf_rdma_request *rdma_req)
return xfer;
}
static int
nvmf_request_alloc_wrs(struct spdk_nvmf_rdma_transport *rtransport,
struct spdk_nvmf_rdma_request *rdma_req,
uint32_t num_sgl_descriptors)
{
struct spdk_nvmf_rdma_request_data *work_requests[SPDK_NVMF_MAX_SGL_ENTRIES];
struct spdk_nvmf_rdma_request_data *current_data_wr;
uint32_t i;
if (spdk_mempool_get_bulk(rtransport->data_wr_pool, (void **)work_requests, num_sgl_descriptors)) {
return -ENOMEM;
}
current_data_wr = &rdma_req->data;
for (i = 0; i < num_sgl_descriptors; i++) {
if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
current_data_wr->wr.opcode = IBV_WR_RDMA_WRITE;
} else if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
current_data_wr->wr.opcode = IBV_WR_RDMA_READ;
} else {
assert(false);
}
work_requests[i]->wr.send_flags = IBV_SEND_SIGNALED;
work_requests[i]->wr.sg_list = work_requests[i]->sgl;
work_requests[i]->wr.wr_id = rdma_req->data.wr.wr_id;
current_data_wr->wr.next = &work_requests[i]->wr;
current_data_wr = work_requests[i];
}
if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
current_data_wr->wr.opcode = IBV_WR_RDMA_WRITE;
current_data_wr->wr.next = &rdma_req->rsp.wr;
} else if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
current_data_wr->wr.opcode = IBV_WR_RDMA_READ;
current_data_wr->wr.next = NULL;
}
return 0;
}
static int
nvmf_rdma_fill_buffers(struct spdk_nvmf_rdma_transport *rtransport,
struct spdk_nvmf_rdma_poll_group *rgroup,
@ -1423,6 +1503,83 @@ err_exit:
return rc;
}
static int
nvmf_rdma_request_fill_iovs_multi_sgl(struct spdk_nvmf_rdma_transport *rtransport,
struct spdk_nvmf_rdma_device *device,
struct spdk_nvmf_rdma_request *rdma_req)
{
struct spdk_nvmf_rdma_qpair *rqpair;
struct spdk_nvmf_rdma_poll_group *rgroup;
struct ibv_send_wr *current_wr;
struct spdk_nvmf_request *req = &rdma_req->req;
struct spdk_nvme_sgl_descriptor *inline_segment, *desc;
uint32_t num_sgl_descriptors;
uint32_t i;
int rc;
rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
rgroup = rqpair->poller->group;
inline_segment = &req->cmd->nvme_cmd.dptr.sgl1;
assert(inline_segment->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT);
assert(inline_segment->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET);
desc = (struct spdk_nvme_sgl_descriptor *)rdma_req->recv->buf + inline_segment->address;
num_sgl_descriptors = inline_segment->unkeyed.length / sizeof(struct spdk_nvme_sgl_descriptor);
assert(num_sgl_descriptors <= SPDK_NVMF_MAX_SGL_ENTRIES);
if (nvmf_request_alloc_wrs(rtransport, rdma_req, num_sgl_descriptors - 1) != 0) {
return -ENOMEM;
}
/* The first WR must always be the embedded data WR. This is how we unwind them later. */
current_wr = &rdma_req->data.wr;
req->iovcnt = 0;
for (i = 0; i < num_sgl_descriptors; i++) {
/* The descriptors must be keyed data block descriptors with an address, not an offset. */
if (spdk_unlikely(desc->generic.type != SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK ||
desc->keyed.subtype != SPDK_NVME_SGL_SUBTYPE_ADDRESS)) {
rc = -EINVAL;
goto err_exit;
}
current_wr->num_sge = 0;
req->length += desc->keyed.length;
rc = nvmf_rdma_fill_buffers(rtransport, rgroup, device, rdma_req, current_wr,
desc->keyed.length);
if (rc != 0) {
rc = -ENOMEM;
goto err_exit;
}
current_wr->wr.rdma.rkey = desc->keyed.key;
current_wr->wr.rdma.remote_addr = desc->address;
current_wr = current_wr->next;
desc++;
}
#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) {
if (desc->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) {
rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV;
rdma_req->rsp.wr.imm_data = desc->keyed.key;
}
}
#endif
rdma_req->num_outstanding_data_wr = num_sgl_descriptors;
rdma_req->data_from_pool = true;
return 0;
err_exit:
spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport);
nvmf_rdma_request_free_data(rdma_req, rtransport);
return rc;
}
static int
spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
struct spdk_nvmf_rdma_device *device,
@ -1517,6 +1674,20 @@ spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
rdma_req->req.iov[0].iov_len = rdma_req->req.length;
rdma_req->req.iovcnt = 1;
return 0;
} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT &&
sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
if (nvmf_rdma_request_fill_iovs_multi_sgl(rtransport, device, rdma_req) < 0) {
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req);
return 0;
}
/* backward compatible */
rdma_req->req.data = rdma_req->req.iov[0].iov_base;
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req,
rdma_req->req.iovcnt);
return 0;
}
@ -1539,10 +1710,11 @@ nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport);
}
rdma_req->num_outstanding_data_wr = 0;
nvmf_rdma_request_free_data(rdma_req, rtransport);
rdma_req->req.length = 0;
rdma_req->req.iovcnt = 0;
rdma_req->req.data = NULL;
rdma_req->data.wr.next = NULL;
rqpair->qd--;
STAILQ_INSERT_HEAD(&rqpair->resources->free_queue, rdma_req, state_link);

View File

@ -139,6 +139,8 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
rdma_req.req.xfer = SPDK_NVME_DATA_CONTROLLER_TO_HOST;
rtransport.transport.opts = g_rdma_ut_transport_opts;
rtransport.data_wr_pool = NULL;
rtransport.transport.data_buf_pool = NULL;
device.attr.device_cap_flags = 0;
g_rdma_mr.lkey = 0xABCD;