nvmf/rdma: Use RDMA provider memory translation

Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Change-Id: I78420118d3ec1bf78c0301a51ab320503b6f2fca
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5123
Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Alexey Marchuk 2020-11-11 16:20:36 +03:00 committed by Tomasz Zawadzki
parent b6efb964cd
commit 01f2d0bd2d
3 changed files with 30 additions and 124 deletions

View File

@ -462,7 +462,7 @@ struct spdk_nvmf_rdma_device {
struct ibv_device_attr attr;
struct ibv_context *context;
struct spdk_mem_map *map;
struct spdk_rdma_mem_map *map;
struct ibv_pd *pd;
int num_srq;
@ -1331,56 +1331,6 @@ nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *e
return 0;
}
static int
nvmf_rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map,
enum spdk_mem_map_notify_action action,
void *vaddr, size_t size)
{
struct ibv_pd *pd = cb_ctx;
struct ibv_mr *mr;
int rc;
switch (action) {
case SPDK_MEM_MAP_NOTIFY_REGISTER:
if (!g_nvmf_hooks.get_rkey) {
mr = ibv_reg_mr(pd, vaddr, size,
IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_REMOTE_WRITE);
if (mr == NULL) {
SPDK_ERRLOG("ibv_reg_mr() failed\n");
return -1;
} else {
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
}
} else {
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
g_nvmf_hooks.get_rkey(pd, vaddr, size));
}
break;
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
if (!g_nvmf_hooks.get_rkey) {
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
if (mr) {
ibv_dereg_mr(mr);
}
}
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
break;
default:
SPDK_UNREACHABLE();
}
return rc;
}
static int
nvmf_rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2)
{
/* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */
return addr_1 == addr_2;
}
static inline void
nvmf_rdma_setup_wr(struct ibv_send_wr *wr, struct ibv_send_wr *next,
enum spdk_nvme_data_transfer xfer)
@ -1490,31 +1440,6 @@ nvmf_rdma_replace_buffer(struct spdk_nvmf_rdma_poll_group *rgroup, void **buf)
return 0;
}
static bool
nvmf_rdma_get_lkey(struct spdk_nvmf_rdma_device *device, struct iovec *iov,
uint32_t *_lkey)
{
uint64_t translation_len;
uint32_t lkey;
translation_len = iov->iov_len;
if (!g_nvmf_hooks.get_rkey) {
lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map,
(uint64_t)iov->iov_base, &translation_len))->lkey;
} else {
lkey = spdk_mem_map_translate(device->map,
(uint64_t)iov->iov_base, &translation_len);
}
if (spdk_unlikely(translation_len < iov->iov_len)) {
return false;
}
*_lkey = lkey;
return true;
}
static bool
nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device,
struct iovec *iov, struct ibv_send_wr **_wr,
@ -1524,13 +1449,18 @@ nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device,
{
struct ibv_send_wr *wr = *_wr;
struct ibv_sge *sg_ele = &wr->sg_list[wr->num_sge];
struct spdk_rdma_memory_translation mem_translation;
int rc;
uint32_t lkey = 0;
uint32_t remaining, data_block_size, md_size, sge_len;
if (spdk_unlikely(!nvmf_rdma_get_lkey(device, iov, &lkey))) {
rc = spdk_rdma_get_translation(device->map, iov->iov_base, iov->iov_len, &mem_translation);
if (spdk_unlikely(rc)) {
return false;
}
lkey = spdk_rdma_memory_translation_get_lkey(&mem_translation);
if (spdk_likely(!dif_ctx)) {
sg_ele->lkey = lkey;
sg_ele->addr = (uintptr_t)(iov->iov_base);
@ -2285,11 +2215,6 @@ nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts)
opts->transport_specific = NULL;
}
const struct spdk_mem_map_ops g_nvmf_rdma_map_ops = {
.notify_cb = nvmf_rdma_mem_notify,
.are_contiguous = nvmf_rdma_check_contiguous_entries
};
static int nvmf_rdma_destroy(struct spdk_nvmf_transport *transport,
spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg);
@ -2513,7 +2438,7 @@ nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
assert(device->map == NULL);
device->map = spdk_mem_map_alloc(0, &g_nvmf_rdma_map_ops, device->pd);
device->map = spdk_rdma_create_mem_map(device->pd, &g_nvmf_hooks);
if (!device->map) {
SPDK_ERRLOG("Unable to allocate memory map for listen address\n");
rc = -ENOMEM;
@ -2604,9 +2529,7 @@ nvmf_rdma_destroy(struct spdk_nvmf_transport *transport,
TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) {
TAILQ_REMOVE(&rtransport->devices, device, link);
if (device->map) {
spdk_mem_map_free(&device->map);
}
spdk_rdma_free_mem_map(&device->map);
if (device->pd) {
if (!g_nvmf_hooks.get_ibv_pd) {
ibv_dealloc_pd(device->pd);

View File

@ -56,6 +56,7 @@ DEFINE_STUB_V(spdk_rdma_free_mem_map, (struct spdk_rdma_mem_map **map));
/* used to mock out having to split an SGL over a memory region */
size_t g_mr_size;
uint64_t g_mr_next_size;
struct ibv_mr g_rdma_mr = {
.addr = (void *)0xC0FFEE,
.lkey = RDMA_UT_LKEY,
@ -72,6 +73,9 @@ spdk_rdma_get_translation(struct spdk_rdma_mem_map *map, void *address,
HANDLE_RETURN_MOCK(spdk_rdma_get_translation);
if (g_mr_size && length > g_mr_size) {
if (g_mr_next_size) {
g_mr_size = g_mr_next_size;
}
return -ERANGE;
}

View File

@ -38,10 +38,6 @@
#include "nvmf/rdma.c"
#include "nvmf/transport.c"
uint64_t g_mr_size;
uint64_t g_mr_next_size;
struct ibv_mr g_rdma_mr;
#define RDMA_UT_UNITS_IN_MAX_IO 16
struct spdk_nvmf_transport_opts g_rdma_ut_transport_opts = {
@ -126,19 +122,6 @@ spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, co
return 0;
}
uint64_t
spdk_mem_map_translate(const struct spdk_mem_map *map, uint64_t vaddr, uint64_t *size)
{
if (g_mr_size != 0) {
*(uint32_t *)size = g_mr_size;
if (g_mr_next_size != 0) {
g_mr_size = g_mr_next_size;
}
}
return (uint64_t)&g_rdma_mr;
}
static void reset_nvmf_rdma_request(struct spdk_nvmf_rdma_request *rdma_req)
{
int i;
@ -205,7 +188,6 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
rtransport.transport.data_buf_pool = NULL;
device.attr.device_cap_flags = 0;
g_rdma_mr.lkey = 0xABCD;
sgl->keyed.key = 0xEEEE;
sgl->address = 0xFFFF;
rdma_req.recv->buf = (void *)0xDDDD;
@ -231,7 +213,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
CU_ASSERT((uint64_t)rdma_req.req.buffers[0] == 0x2000);
CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0x2000);
CU_ASSERT(rdma_req.data.wr.sg_list[0].length == rtransport.transport.opts.io_unit_size / 2);
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == RDMA_UT_LKEY);
/* Part 2: simple I/O, one SGL larger than the transport io unit size (equal to the max io size) */
reset_nvmf_rdma_request(&rdma_req);
@ -248,7 +230,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
CU_ASSERT((uint64_t)rdma_req.req.buffers[i] == 0x2000);
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000);
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == rtransport.transport.opts.io_unit_size);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
}
/* Part 3: simple I/O one SGL larger than the transport max io size */
@ -404,7 +386,6 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
sgl->address = 0xFFFF;
rdma_req.recv->buf = (void *)0xDDDD;
g_rdma_mr.lkey = 0xABCD;
sgl->keyed.key = 0xEEEE;
for (i = 0; i < 4; i++) {
@ -508,7 +489,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == (((uint64_t)&buffer + NVMF_DATA_BUFFER_MASK) &
~NVMF_DATA_BUFFER_MASK));
CU_ASSERT(rdma_req.data.wr.sg_list[0].length == rtransport.transport.opts.io_unit_size / 2);
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == RDMA_UT_LKEY);
buffer_ptr = STAILQ_FIRST(&group.retired_bufs);
CU_ASSERT(buffer_ptr == &buffer);
STAILQ_REMOVE(&group.retired_bufs, buffer_ptr, spdk_nvmf_transport_pg_cache_buf, link);
@ -637,7 +618,6 @@ test_spdk_nvmf_rdma_request_process(void)
device.attr.device_cap_flags = 0;
device.map = (void *)0x0;
g_rdma_mr.lkey = 0xABCD;
/* Test 1: single SGL READ request */
rdma_recv = create_recv(&rqpair, SPDK_NVME_OPC_READ);
@ -923,7 +903,6 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
device.attr.device_cap_flags = 0;
device.map = NULL;
g_rdma_mr.lkey = 0xABCD;
sgl->keyed.key = 0xEEEE;
sgl->address = 0xFFFF;
rdma_req.recv->buf = (void *)0xDDDD;
@ -958,7 +937,7 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
for (i = 0; i < 4; ++i) {
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
}
/* Part 2: simple I/O, one SGL equal to io unit size, io_unit_size is not aligned with md_size,
@ -988,16 +967,16 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
for (i = 0; i < 3; ++i) {
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
}
CU_ASSERT(rdma_req.data.wr.sg_list[3].addr == 0x2000 + 3 * (data_bs + md_size));
CU_ASSERT(rdma_req.data.wr.sg_list[3].length == 488);
CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == RDMA_UT_LKEY);
/* 2nd buffer consumed */
CU_ASSERT(rdma_req.data.wr.sg_list[4].addr == 0x2000);
CU_ASSERT(rdma_req.data.wr.sg_list[4].length == 24);
CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == RDMA_UT_LKEY);
/* Part 3: simple I/O, one SGL equal io unit size, io_unit_size is equal to block size 512 bytes */
MOCK_SET(spdk_mempool_get, (void *)0x2000);
@ -1024,7 +1003,7 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0x2000);
CU_ASSERT(rdma_req.data.wr.sg_list[0].length == data_bs);
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == RDMA_UT_LKEY);
CU_ASSERT(rdma_req.req.iovcnt == 2);
CU_ASSERT(rdma_req.req.iov[0].iov_base == (void *)((unsigned long)0x2000));
@ -1060,7 +1039,7 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
for (i = 0; i < 4; ++i) {
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
}
/* Part 5: simple I/O, one SGL equal to 2x io unit size, io_unit_size is aligned with md_size,
@ -1123,24 +1102,24 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
for (i = 0; i < 3; ++i) {
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
}
CU_ASSERT(rdma_req.data.wr.sg_list[3].addr == 0x2000 + 3 * (data_bs + md_size));
CU_ASSERT(rdma_req.data.wr.sg_list[3].length == 488);
CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == RDMA_UT_LKEY);
/* 2nd IO buffer consumed */
CU_ASSERT(rdma_req.data.wr.sg_list[4].addr == 0x2000);
CU_ASSERT(rdma_req.data.wr.sg_list[4].length == 24);
CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == RDMA_UT_LKEY);
CU_ASSERT(rdma_req.data.wr.sg_list[5].addr == 0x2000 + 24 + md_size);
CU_ASSERT(rdma_req.data.wr.sg_list[5].length == 512);
CU_ASSERT(rdma_req.data.wr.sg_list[5].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[5].lkey == RDMA_UT_LKEY);
CU_ASSERT(rdma_req.data.wr.sg_list[6].addr == 0x2000 + 24 + 512 + md_size * 2);
CU_ASSERT(rdma_req.data.wr.sg_list[6].length == 512);
CU_ASSERT(rdma_req.data.wr.sg_list[6].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[6].lkey == RDMA_UT_LKEY);
/* Part 7: simple I/O, number of SGL entries exceeds the number of entries
one WR can hold. Additional WR is chained */
@ -1197,13 +1176,13 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0x2000);
CU_ASSERT(rdma_req.data.wr.sg_list[0].length == 512);
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == RDMA_UT_LKEY);
/* 2nd IO buffer consumed, offset 4 bytes due to part of the metadata
is located at the beginning of that buffer */
CU_ASSERT(rdma_req.data.wr.sg_list[1].addr == 0x2000 + 4);
CU_ASSERT(rdma_req.data.wr.sg_list[1].length == 512);
CU_ASSERT(rdma_req.data.wr.sg_list[1].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[1].lkey == RDMA_UT_LKEY);
/* Test 9 dealing with a buffer split over two Memory Regions */
MOCK_SET(spdk_mempool_get, (void *)&buffer);
@ -1231,7 +1210,7 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == (uint64_t)rdma_req.req.data + i *
(data_bs + md_size));
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
}
buffer_ptr = STAILQ_FIRST(&group.retired_bufs);
CU_ASSERT(buffer_ptr == &buffer);