nvmf/rdma: Use RDMA provider memory translation
Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com> Change-Id: I78420118d3ec1bf78c0301a51ab320503b6f2fca Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5123 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
parent
b6efb964cd
commit
01f2d0bd2d
@ -462,7 +462,7 @@ struct spdk_nvmf_rdma_device {
|
||||
struct ibv_device_attr attr;
|
||||
struct ibv_context *context;
|
||||
|
||||
struct spdk_mem_map *map;
|
||||
struct spdk_rdma_mem_map *map;
|
||||
struct ibv_pd *pd;
|
||||
|
||||
int num_srq;
|
||||
@ -1331,56 +1331,6 @@ nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *e
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map,
|
||||
enum spdk_mem_map_notify_action action,
|
||||
void *vaddr, size_t size)
|
||||
{
|
||||
struct ibv_pd *pd = cb_ctx;
|
||||
struct ibv_mr *mr;
|
||||
int rc;
|
||||
|
||||
switch (action) {
|
||||
case SPDK_MEM_MAP_NOTIFY_REGISTER:
|
||||
if (!g_nvmf_hooks.get_rkey) {
|
||||
mr = ibv_reg_mr(pd, vaddr, size,
|
||||
IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_READ |
|
||||
IBV_ACCESS_REMOTE_WRITE);
|
||||
if (mr == NULL) {
|
||||
SPDK_ERRLOG("ibv_reg_mr() failed\n");
|
||||
return -1;
|
||||
} else {
|
||||
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
|
||||
}
|
||||
} else {
|
||||
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
|
||||
g_nvmf_hooks.get_rkey(pd, vaddr, size));
|
||||
}
|
||||
break;
|
||||
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
|
||||
if (!g_nvmf_hooks.get_rkey) {
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
|
||||
if (mr) {
|
||||
ibv_dereg_mr(mr);
|
||||
}
|
||||
}
|
||||
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
|
||||
break;
|
||||
default:
|
||||
SPDK_UNREACHABLE();
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
nvmf_rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2)
|
||||
{
|
||||
/* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */
|
||||
return addr_1 == addr_2;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nvmf_rdma_setup_wr(struct ibv_send_wr *wr, struct ibv_send_wr *next,
|
||||
enum spdk_nvme_data_transfer xfer)
|
||||
@ -1490,31 +1440,6 @@ nvmf_rdma_replace_buffer(struct spdk_nvmf_rdma_poll_group *rgroup, void **buf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
nvmf_rdma_get_lkey(struct spdk_nvmf_rdma_device *device, struct iovec *iov,
|
||||
uint32_t *_lkey)
|
||||
{
|
||||
uint64_t translation_len;
|
||||
uint32_t lkey;
|
||||
|
||||
translation_len = iov->iov_len;
|
||||
|
||||
if (!g_nvmf_hooks.get_rkey) {
|
||||
lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map,
|
||||
(uint64_t)iov->iov_base, &translation_len))->lkey;
|
||||
} else {
|
||||
lkey = spdk_mem_map_translate(device->map,
|
||||
(uint64_t)iov->iov_base, &translation_len);
|
||||
}
|
||||
|
||||
if (spdk_unlikely(translation_len < iov->iov_len)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*_lkey = lkey;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device,
|
||||
struct iovec *iov, struct ibv_send_wr **_wr,
|
||||
@ -1524,13 +1449,18 @@ nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device,
|
||||
{
|
||||
struct ibv_send_wr *wr = *_wr;
|
||||
struct ibv_sge *sg_ele = &wr->sg_list[wr->num_sge];
|
||||
struct spdk_rdma_memory_translation mem_translation;
|
||||
int rc;
|
||||
uint32_t lkey = 0;
|
||||
uint32_t remaining, data_block_size, md_size, sge_len;
|
||||
|
||||
if (spdk_unlikely(!nvmf_rdma_get_lkey(device, iov, &lkey))) {
|
||||
rc = spdk_rdma_get_translation(device->map, iov->iov_base, iov->iov_len, &mem_translation);
|
||||
if (spdk_unlikely(rc)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lkey = spdk_rdma_memory_translation_get_lkey(&mem_translation);
|
||||
|
||||
if (spdk_likely(!dif_ctx)) {
|
||||
sg_ele->lkey = lkey;
|
||||
sg_ele->addr = (uintptr_t)(iov->iov_base);
|
||||
@ -2285,11 +2215,6 @@ nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts)
|
||||
opts->transport_specific = NULL;
|
||||
}
|
||||
|
||||
const struct spdk_mem_map_ops g_nvmf_rdma_map_ops = {
|
||||
.notify_cb = nvmf_rdma_mem_notify,
|
||||
.are_contiguous = nvmf_rdma_check_contiguous_entries
|
||||
};
|
||||
|
||||
static int nvmf_rdma_destroy(struct spdk_nvmf_transport *transport,
|
||||
spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg);
|
||||
|
||||
@ -2513,7 +2438,7 @@ nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
|
||||
|
||||
assert(device->map == NULL);
|
||||
|
||||
device->map = spdk_mem_map_alloc(0, &g_nvmf_rdma_map_ops, device->pd);
|
||||
device->map = spdk_rdma_create_mem_map(device->pd, &g_nvmf_hooks);
|
||||
if (!device->map) {
|
||||
SPDK_ERRLOG("Unable to allocate memory map for listen address\n");
|
||||
rc = -ENOMEM;
|
||||
@ -2604,9 +2529,7 @@ nvmf_rdma_destroy(struct spdk_nvmf_transport *transport,
|
||||
|
||||
TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) {
|
||||
TAILQ_REMOVE(&rtransport->devices, device, link);
|
||||
if (device->map) {
|
||||
spdk_mem_map_free(&device->map);
|
||||
}
|
||||
spdk_rdma_free_mem_map(&device->map);
|
||||
if (device->pd) {
|
||||
if (!g_nvmf_hooks.get_ibv_pd) {
|
||||
ibv_dealloc_pd(device->pd);
|
||||
|
@ -56,6 +56,7 @@ DEFINE_STUB_V(spdk_rdma_free_mem_map, (struct spdk_rdma_mem_map **map));
|
||||
|
||||
/* used to mock out having to split an SGL over a memory region */
|
||||
size_t g_mr_size;
|
||||
uint64_t g_mr_next_size;
|
||||
struct ibv_mr g_rdma_mr = {
|
||||
.addr = (void *)0xC0FFEE,
|
||||
.lkey = RDMA_UT_LKEY,
|
||||
@ -72,6 +73,9 @@ spdk_rdma_get_translation(struct spdk_rdma_mem_map *map, void *address,
|
||||
HANDLE_RETURN_MOCK(spdk_rdma_get_translation);
|
||||
|
||||
if (g_mr_size && length > g_mr_size) {
|
||||
if (g_mr_next_size) {
|
||||
g_mr_size = g_mr_next_size;
|
||||
}
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
|
@ -38,10 +38,6 @@
|
||||
#include "nvmf/rdma.c"
|
||||
#include "nvmf/transport.c"
|
||||
|
||||
uint64_t g_mr_size;
|
||||
uint64_t g_mr_next_size;
|
||||
struct ibv_mr g_rdma_mr;
|
||||
|
||||
#define RDMA_UT_UNITS_IN_MAX_IO 16
|
||||
|
||||
struct spdk_nvmf_transport_opts g_rdma_ut_transport_opts = {
|
||||
@ -126,19 +122,6 @@ spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, co
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
spdk_mem_map_translate(const struct spdk_mem_map *map, uint64_t vaddr, uint64_t *size)
|
||||
{
|
||||
if (g_mr_size != 0) {
|
||||
*(uint32_t *)size = g_mr_size;
|
||||
if (g_mr_next_size != 0) {
|
||||
g_mr_size = g_mr_next_size;
|
||||
}
|
||||
}
|
||||
|
||||
return (uint64_t)&g_rdma_mr;
|
||||
}
|
||||
|
||||
static void reset_nvmf_rdma_request(struct spdk_nvmf_rdma_request *rdma_req)
|
||||
{
|
||||
int i;
|
||||
@ -205,7 +188,6 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
|
||||
rtransport.transport.data_buf_pool = NULL;
|
||||
|
||||
device.attr.device_cap_flags = 0;
|
||||
g_rdma_mr.lkey = 0xABCD;
|
||||
sgl->keyed.key = 0xEEEE;
|
||||
sgl->address = 0xFFFF;
|
||||
rdma_req.recv->buf = (void *)0xDDDD;
|
||||
@ -231,7 +213,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
|
||||
CU_ASSERT((uint64_t)rdma_req.req.buffers[0] == 0x2000);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0x2000);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].length == rtransport.transport.opts.io_unit_size / 2);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == RDMA_UT_LKEY);
|
||||
|
||||
/* Part 2: simple I/O, one SGL larger than the transport io unit size (equal to the max io size) */
|
||||
reset_nvmf_rdma_request(&rdma_req);
|
||||
@ -248,7 +230,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
|
||||
CU_ASSERT((uint64_t)rdma_req.req.buffers[i] == 0x2000);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == rtransport.transport.opts.io_unit_size);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
|
||||
}
|
||||
|
||||
/* Part 3: simple I/O one SGL larger than the transport max io size */
|
||||
@ -404,7 +386,6 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
|
||||
sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
||||
sgl->address = 0xFFFF;
|
||||
rdma_req.recv->buf = (void *)0xDDDD;
|
||||
g_rdma_mr.lkey = 0xABCD;
|
||||
sgl->keyed.key = 0xEEEE;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
@ -508,7 +489,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == (((uint64_t)&buffer + NVMF_DATA_BUFFER_MASK) &
|
||||
~NVMF_DATA_BUFFER_MASK));
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].length == rtransport.transport.opts.io_unit_size / 2);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == RDMA_UT_LKEY);
|
||||
buffer_ptr = STAILQ_FIRST(&group.retired_bufs);
|
||||
CU_ASSERT(buffer_ptr == &buffer);
|
||||
STAILQ_REMOVE(&group.retired_bufs, buffer_ptr, spdk_nvmf_transport_pg_cache_buf, link);
|
||||
@ -637,7 +618,6 @@ test_spdk_nvmf_rdma_request_process(void)
|
||||
|
||||
device.attr.device_cap_flags = 0;
|
||||
device.map = (void *)0x0;
|
||||
g_rdma_mr.lkey = 0xABCD;
|
||||
|
||||
/* Test 1: single SGL READ request */
|
||||
rdma_recv = create_recv(&rqpair, SPDK_NVME_OPC_READ);
|
||||
@ -923,7 +903,6 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
|
||||
|
||||
device.attr.device_cap_flags = 0;
|
||||
device.map = NULL;
|
||||
g_rdma_mr.lkey = 0xABCD;
|
||||
sgl->keyed.key = 0xEEEE;
|
||||
sgl->address = 0xFFFF;
|
||||
rdma_req.recv->buf = (void *)0xDDDD;
|
||||
@ -958,7 +937,7 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
|
||||
for (i = 0; i < 4; ++i) {
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
|
||||
}
|
||||
|
||||
/* Part 2: simple I/O, one SGL equal to io unit size, io_unit_size is not aligned with md_size,
|
||||
@ -988,16 +967,16 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
|
||||
for (i = 0; i < 3; ++i) {
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
|
||||
}
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[3].addr == 0x2000 + 3 * (data_bs + md_size));
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[3].length == 488);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == RDMA_UT_LKEY);
|
||||
|
||||
/* 2nd buffer consumed */
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[4].addr == 0x2000);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[4].length == 24);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == RDMA_UT_LKEY);
|
||||
|
||||
/* Part 3: simple I/O, one SGL equal io unit size, io_unit_size is equal to block size 512 bytes */
|
||||
MOCK_SET(spdk_mempool_get, (void *)0x2000);
|
||||
@ -1024,7 +1003,7 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
|
||||
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0x2000);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].length == data_bs);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == RDMA_UT_LKEY);
|
||||
|
||||
CU_ASSERT(rdma_req.req.iovcnt == 2);
|
||||
CU_ASSERT(rdma_req.req.iov[0].iov_base == (void *)((unsigned long)0x2000));
|
||||
@ -1060,7 +1039,7 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
|
||||
for (i = 0; i < 4; ++i) {
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
|
||||
}
|
||||
|
||||
/* Part 5: simple I/O, one SGL equal to 2x io unit size, io_unit_size is aligned with md_size,
|
||||
@ -1123,24 +1102,24 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
|
||||
for (i = 0; i < 3; ++i) {
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
|
||||
}
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[3].addr == 0x2000 + 3 * (data_bs + md_size));
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[3].length == 488);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == RDMA_UT_LKEY);
|
||||
|
||||
/* 2nd IO buffer consumed */
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[4].addr == 0x2000);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[4].length == 24);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == RDMA_UT_LKEY);
|
||||
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[5].addr == 0x2000 + 24 + md_size);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[5].length == 512);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[5].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[5].lkey == RDMA_UT_LKEY);
|
||||
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[6].addr == 0x2000 + 24 + 512 + md_size * 2);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[6].length == 512);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[6].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[6].lkey == RDMA_UT_LKEY);
|
||||
|
||||
/* Part 7: simple I/O, number of SGL entries exceeds the number of entries
|
||||
one WR can hold. Additional WR is chained */
|
||||
@ -1197,13 +1176,13 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
|
||||
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0x2000);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].length == 512);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == RDMA_UT_LKEY);
|
||||
|
||||
/* 2nd IO buffer consumed, offset 4 bytes due to part of the metadata
|
||||
is located at the beginning of that buffer */
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[1].addr == 0x2000 + 4);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[1].length == 512);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[1].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[1].lkey == RDMA_UT_LKEY);
|
||||
|
||||
/* Test 9 dealing with a buffer split over two Memory Regions */
|
||||
MOCK_SET(spdk_mempool_get, (void *)&buffer);
|
||||
@ -1231,7 +1210,7 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == (uint64_t)rdma_req.req.data + i *
|
||||
(data_bs + md_size));
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
|
||||
CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == RDMA_UT_LKEY);
|
||||
}
|
||||
buffer_ptr = STAILQ_FIRST(&group.retired_bufs);
|
||||
CU_ASSERT(buffer_ptr == &buffer);
|
||||
|
Loading…
x
Reference in New Issue
Block a user