nvmf/rdma: Add support for multiple sgl descriptors to sgl parser
Enable parsing an nvmf request that contains an inline nvme_sgl_last_segment_descriptor element. This is the next step towards NVMe-oF SGL support in the NVMe-oF target. Change-Id: Ia2f1f7054e0de8a9e2bfe4dabe6af4085e3f12c4 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/428745 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
1e6cdc190d
commit
62700dac2e
178
lib/nvmf/rdma.c
178
lib/nvmf/rdma.c
@ -584,6 +584,46 @@ spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_rdma_request_free_data(struct spdk_nvmf_rdma_request *rdma_req,
|
||||
struct spdk_nvmf_rdma_transport *rtransport)
|
||||
{
|
||||
struct spdk_nvmf_rdma_request_data *current_data_wr = NULL, *next_data_wr = NULL;
|
||||
struct ibv_send_wr *send_wr;
|
||||
int i;
|
||||
|
||||
rdma_req->num_outstanding_data_wr = 0;
|
||||
current_data_wr = &rdma_req->data;
|
||||
for (i = 0; i < current_data_wr->wr.num_sge; i++) {
|
||||
current_data_wr->wr.sg_list[i].addr = 0;
|
||||
current_data_wr->wr.sg_list[i].length = 0;
|
||||
current_data_wr->wr.sg_list[i].lkey = 0;
|
||||
}
|
||||
current_data_wr->wr.num_sge = 0;
|
||||
|
||||
send_wr = current_data_wr->wr.next;
|
||||
if (send_wr != NULL && send_wr != &rdma_req->rsp.wr) {
|
||||
next_data_wr = SPDK_CONTAINEROF(send_wr, struct spdk_nvmf_rdma_request_data, wr);
|
||||
}
|
||||
while (next_data_wr) {
|
||||
current_data_wr = next_data_wr;
|
||||
send_wr = current_data_wr->wr.next;
|
||||
if (send_wr != NULL && send_wr != &rdma_req->rsp.wr) {
|
||||
next_data_wr = SPDK_CONTAINEROF(send_wr, struct spdk_nvmf_rdma_request_data, wr);
|
||||
} else {
|
||||
next_data_wr = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < current_data_wr->wr.num_sge; i++) {
|
||||
current_data_wr->wr.sg_list[i].addr = 0;
|
||||
current_data_wr->wr.sg_list[i].length = 0;
|
||||
current_data_wr->wr.sg_list[i].lkey = 0;
|
||||
}
|
||||
current_data_wr->wr.num_sge = 0;
|
||||
spdk_mempool_put(rtransport->data_wr_pool, current_data_wr);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req)
|
||||
{
|
||||
@ -1032,8 +1072,8 @@ request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
|
||||
assert(rqpair->current_recv_depth > 0);
|
||||
rqpair->current_recv_depth--;
|
||||
|
||||
/* Build the response which consists of an optional
|
||||
* RDMA WRITE to transfer data, plus an RDMA SEND
|
||||
/* Build the response which consists of optional
|
||||
* RDMA WRITEs to transfer data, plus an RDMA SEND
|
||||
* containing the response.
|
||||
*/
|
||||
send_wr = &rdma_req->rsp.wr;
|
||||
@ -1326,6 +1366,46 @@ spdk_nvmf_rdma_request_get_xfer(struct spdk_nvmf_rdma_request *rdma_req)
|
||||
return xfer;
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_request_alloc_wrs(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_request *rdma_req,
|
||||
uint32_t num_sgl_descriptors)
|
||||
{
|
||||
struct spdk_nvmf_rdma_request_data *work_requests[SPDK_NVMF_MAX_SGL_ENTRIES];
|
||||
struct spdk_nvmf_rdma_request_data *current_data_wr;
|
||||
uint32_t i;
|
||||
|
||||
if (spdk_mempool_get_bulk(rtransport->data_wr_pool, (void **)work_requests, num_sgl_descriptors)) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
current_data_wr = &rdma_req->data;
|
||||
|
||||
for (i = 0; i < num_sgl_descriptors; i++) {
|
||||
if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
|
||||
current_data_wr->wr.opcode = IBV_WR_RDMA_WRITE;
|
||||
} else if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
|
||||
current_data_wr->wr.opcode = IBV_WR_RDMA_READ;
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
work_requests[i]->wr.send_flags = IBV_SEND_SIGNALED;
|
||||
work_requests[i]->wr.sg_list = work_requests[i]->sgl;
|
||||
work_requests[i]->wr.wr_id = rdma_req->data.wr.wr_id;
|
||||
current_data_wr->wr.next = &work_requests[i]->wr;
|
||||
current_data_wr = work_requests[i];
|
||||
}
|
||||
|
||||
if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
|
||||
current_data_wr->wr.opcode = IBV_WR_RDMA_WRITE;
|
||||
current_data_wr->wr.next = &rdma_req->rsp.wr;
|
||||
} else if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
|
||||
current_data_wr->wr.opcode = IBV_WR_RDMA_READ;
|
||||
current_data_wr->wr.next = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_rdma_fill_buffers(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_poll_group *rgroup,
|
||||
@ -1423,6 +1503,83 @@ err_exit:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_rdma_request_fill_iovs_multi_sgl(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_device *device,
|
||||
struct spdk_nvmf_rdma_request *rdma_req)
|
||||
{
|
||||
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||
struct spdk_nvmf_rdma_poll_group *rgroup;
|
||||
struct ibv_send_wr *current_wr;
|
||||
struct spdk_nvmf_request *req = &rdma_req->req;
|
||||
struct spdk_nvme_sgl_descriptor *inline_segment, *desc;
|
||||
uint32_t num_sgl_descriptors;
|
||||
uint32_t i;
|
||||
int rc;
|
||||
|
||||
rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
||||
rgroup = rqpair->poller->group;
|
||||
|
||||
inline_segment = &req->cmd->nvme_cmd.dptr.sgl1;
|
||||
assert(inline_segment->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT);
|
||||
assert(inline_segment->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET);
|
||||
|
||||
desc = (struct spdk_nvme_sgl_descriptor *)rdma_req->recv->buf + inline_segment->address;
|
||||
num_sgl_descriptors = inline_segment->unkeyed.length / sizeof(struct spdk_nvme_sgl_descriptor);
|
||||
assert(num_sgl_descriptors <= SPDK_NVMF_MAX_SGL_ENTRIES);
|
||||
|
||||
if (nvmf_request_alloc_wrs(rtransport, rdma_req, num_sgl_descriptors - 1) != 0) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* The first WR must always be the embedded data WR. This is how we unwind them later. */
|
||||
current_wr = &rdma_req->data.wr;
|
||||
|
||||
req->iovcnt = 0;
|
||||
for (i = 0; i < num_sgl_descriptors; i++) {
|
||||
/* The descriptors must be keyed data block descriptors with an address, not an offset. */
|
||||
if (spdk_unlikely(desc->generic.type != SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK ||
|
||||
desc->keyed.subtype != SPDK_NVME_SGL_SUBTYPE_ADDRESS)) {
|
||||
rc = -EINVAL;
|
||||
goto err_exit;
|
||||
}
|
||||
|
||||
current_wr->num_sge = 0;
|
||||
req->length += desc->keyed.length;
|
||||
|
||||
rc = nvmf_rdma_fill_buffers(rtransport, rgroup, device, rdma_req, current_wr,
|
||||
desc->keyed.length);
|
||||
if (rc != 0) {
|
||||
rc = -ENOMEM;
|
||||
goto err_exit;
|
||||
}
|
||||
|
||||
current_wr->wr.rdma.rkey = desc->keyed.key;
|
||||
current_wr->wr.rdma.remote_addr = desc->address;
|
||||
current_wr = current_wr->next;
|
||||
desc++;
|
||||
}
|
||||
|
||||
#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
|
||||
if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) {
|
||||
if (desc->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) {
|
||||
rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV;
|
||||
rdma_req->rsp.wr.imm_data = desc->keyed.key;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
rdma_req->num_outstanding_data_wr = num_sgl_descriptors;
|
||||
rdma_req->data_from_pool = true;
|
||||
|
||||
return 0;
|
||||
|
||||
err_exit:
|
||||
spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport);
|
||||
nvmf_rdma_request_free_data(rdma_req, rtransport);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
struct spdk_nvmf_rdma_device *device,
|
||||
@ -1517,6 +1674,20 @@ spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
|
||||
rdma_req->req.iov[0].iov_len = rdma_req->req.length;
|
||||
rdma_req->req.iovcnt = 1;
|
||||
|
||||
return 0;
|
||||
} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT &&
|
||||
sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
|
||||
if (nvmf_rdma_request_fill_iovs_multi_sgl(rtransport, device, rdma_req) < 0) {
|
||||
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* backward compatible */
|
||||
rdma_req->req.data = rdma_req->req.iov[0].iov_base;
|
||||
|
||||
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req,
|
||||
rdma_req->req.iovcnt);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1539,10 +1710,11 @@ nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
|
||||
|
||||
spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport);
|
||||
}
|
||||
rdma_req->num_outstanding_data_wr = 0;
|
||||
nvmf_rdma_request_free_data(rdma_req, rtransport);
|
||||
rdma_req->req.length = 0;
|
||||
rdma_req->req.iovcnt = 0;
|
||||
rdma_req->req.data = NULL;
|
||||
rdma_req->data.wr.next = NULL;
|
||||
rqpair->qd--;
|
||||
|
||||
STAILQ_INSERT_HEAD(&rqpair->resources->free_queue, rdma_req, state_link);
|
||||
|
@ -139,6 +139,8 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
|
||||
rdma_req.req.xfer = SPDK_NVME_DATA_CONTROLLER_TO_HOST;
|
||||
|
||||
rtransport.transport.opts = g_rdma_ut_transport_opts;
|
||||
rtransport.data_wr_pool = NULL;
|
||||
rtransport.transport.data_buf_pool = NULL;
|
||||
|
||||
device.attr.device_cap_flags = 0;
|
||||
g_rdma_mr.lkey = 0xABCD;
|
||||
|
Loading…
Reference in New Issue
Block a user