memory: forbid registering a memory region more than once
Removed the reference count from the registrations map. Although technically supported, registering a single memory region more than once had a lot of unhandled cases and could easily lead to a segfault. RDMA maps require all memory to be unregistered in the same chunks the memory was registered, which is often impossible to achieve if a region was registered more than once: 1. register region 0x0 - 0x3 -> it gets mapped to a single ibv_mr 2. register region 0x1 - 0x2 -> nothing happens, this region is already registered 3. unregister region 0x0 - 0x3 -> 0x0-0x1 gets unregistered as one region. 0x2-0x3 gets unregistered as another (leading to segfault in the the current RDMA implementation) The problem is that the last two regions share the same ibv_mr, which SPDK tries to free twice. The second free causes a segfault. vtophys map handles this case by registering each 2MB chunk separately, but this solution cannot be applied for RDMA, as NICs put a limitation (~2048) on the number of regions registered. Another option is to keep a refcount of each ibv_mr allocated, and free it only when the entire region was unregistered from the SPDK mem map. This is however very tricky and RDMAmojo mentions that freeing a memory buffer before unregistering its ibv_mr may lead to a segfault. Change-Id: I545c56e24ffa55bda211dea22aeb8a55d9631fe5 Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-on: https://review.gerrithub.io/426085 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
This commit is contained in:
parent
6bcd929500
commit
a33e0943b0
@ -57,8 +57,8 @@
|
||||
#define MAP_256TB_IDX(vfn_2mb) ((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB))
|
||||
#define MAP_1GB_IDX(vfn_2mb) ((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB)) - 1))
|
||||
|
||||
/* Low 16 bits for the reference count */
|
||||
#define REG_MAP_REF_MASK 0xffff
|
||||
/* Page is registered */
|
||||
#define REG_MAP_REGISTERED (1ULL << 62)
|
||||
|
||||
/* A notification region barrier. The 2MB translation entry that's marked
|
||||
* with this flag must be unregistered separately. This allows contiguous
|
||||
@ -98,9 +98,8 @@ struct spdk_mem_map {
|
||||
|
||||
/* Registrations map. The 64 bit translations are bit fields with the
|
||||
* following layout (starting with the low bits):
|
||||
* 0 - 15 : reference count
|
||||
* 16 - 62 : reserved
|
||||
* 63 : flags
|
||||
* 0 - 61 : reserved
|
||||
* 62 - 63 : flags
|
||||
*/
|
||||
static struct spdk_mem_map *g_mem_reg_map;
|
||||
static TAILQ_HEAD(, spdk_mem_map) g_spdk_mem_maps = TAILQ_HEAD_INITIALIZER(g_spdk_mem_maps);
|
||||
@ -148,7 +147,7 @@ spdk_mem_map_notify_walk(struct spdk_mem_map *map, enum spdk_mem_map_notify_acti
|
||||
}
|
||||
|
||||
for (idx_1gb = 0; idx_1gb < sizeof(map_1gb->map) / sizeof(map_1gb->map[0]); idx_1gb++) {
|
||||
if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REF_MASK) > 0 &&
|
||||
if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) &&
|
||||
(contig_start == UINT64_MAX ||
|
||||
(map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) {
|
||||
/* Rebuild the virtual address from the indexes */
|
||||
@ -209,7 +208,7 @@ err_unregister:
|
||||
}
|
||||
|
||||
for (; idx_1gb < UINT64_MAX; idx_1gb--) {
|
||||
if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REF_MASK) > 0 &&
|
||||
if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) &&
|
||||
(contig_end == UINT64_MAX || (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) {
|
||||
/* Rebuild the virtual address from the indexes */
|
||||
uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB);
|
||||
@ -318,6 +317,7 @@ spdk_mem_register(void *vaddr, size_t len)
|
||||
int rc;
|
||||
void *seg_vaddr;
|
||||
size_t seg_len;
|
||||
uint64_t reg;
|
||||
|
||||
if ((uintptr_t)vaddr & ~MASK_256TB) {
|
||||
DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr);
|
||||
@ -330,45 +330,39 @@ spdk_mem_register(void *vaddr, size_t len)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&g_spdk_mem_map_mutex);
|
||||
|
||||
seg_vaddr = vaddr;
|
||||
seg_len = len;
|
||||
while (seg_len > 0) {
|
||||
reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL);
|
||||
if (reg & REG_MAP_REGISTERED) {
|
||||
pthread_mutex_unlock(&g_spdk_mem_map_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
seg_vaddr += VALUE_2MB;
|
||||
seg_len -= VALUE_2MB;
|
||||
}
|
||||
|
||||
seg_vaddr = vaddr;
|
||||
seg_len = 0;
|
||||
while (len > 0) {
|
||||
uint64_t reg;
|
||||
|
||||
reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL);
|
||||
spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB,
|
||||
seg_len == 0 ? (reg + 1) | REG_MAP_NOTIFY_START : reg + 1);
|
||||
|
||||
if ((reg & REG_MAP_REF_MASK) > 0 || (reg & REG_MAP_NOTIFY_START)) {
|
||||
if (seg_len > 0) {
|
||||
TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
|
||||
rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_REGISTER, seg_vaddr, seg_len);
|
||||
if (rc != 0) {
|
||||
pthread_mutex_unlock(&g_spdk_mem_map_mutex);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
seg_vaddr = vaddr + VALUE_2MB;
|
||||
seg_len = 0;
|
||||
} else {
|
||||
seg_len += VALUE_2MB;
|
||||
}
|
||||
|
||||
seg_len == 0 ? REG_MAP_REGISTERED | REG_MAP_NOTIFY_START : REG_MAP_REGISTERED);
|
||||
seg_len += VALUE_2MB;
|
||||
vaddr += VALUE_2MB;
|
||||
len -= VALUE_2MB;
|
||||
}
|
||||
|
||||
if (seg_len > 0) {
|
||||
TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
|
||||
rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_REGISTER, seg_vaddr, seg_len);
|
||||
if (rc != 0) {
|
||||
pthread_mutex_unlock(&g_spdk_mem_map_mutex);
|
||||
return rc;
|
||||
}
|
||||
TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
|
||||
rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_REGISTER, seg_vaddr, seg_len);
|
||||
if (rc != 0) {
|
||||
pthread_mutex_unlock(&g_spdk_mem_map_mutex);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
@ -402,7 +396,7 @@ spdk_mem_unregister(void *vaddr, size_t len)
|
||||
seg_len = len;
|
||||
while (seg_len > 0) {
|
||||
reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL);
|
||||
if ((reg & REG_MAP_REF_MASK) == 0) {
|
||||
if ((reg & REG_MAP_REGISTERED) == 0) {
|
||||
pthread_mutex_unlock(&g_spdk_mem_map_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -414,20 +408,14 @@ spdk_mem_unregister(void *vaddr, size_t len)
|
||||
seg_len = 0;
|
||||
while (len > 0) {
|
||||
reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL);
|
||||
if ((reg & REG_MAP_REF_MASK) == 1) {
|
||||
/* clear any flags */
|
||||
reg = 1;
|
||||
}
|
||||
spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, reg - 1);
|
||||
spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, 0);
|
||||
|
||||
if ((reg & REG_MAP_REF_MASK) > 1 || (reg & REG_MAP_NOTIFY_START)) {
|
||||
if (seg_len > 0) {
|
||||
TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
|
||||
rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len);
|
||||
if (rc != 0) {
|
||||
pthread_mutex_unlock(&g_spdk_mem_map_mutex);
|
||||
return rc;
|
||||
}
|
||||
if (seg_len > 0 && (reg & REG_MAP_NOTIFY_START)) {
|
||||
TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
|
||||
rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len);
|
||||
if (rc != 0) {
|
||||
pthread_mutex_unlock(&g_spdk_mem_map_mutex);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
|
13
test/env/memory/memory_ut.c
vendored
13
test/env/memory/memory_ut.c
vendored
@ -373,14 +373,17 @@ test_mem_map_registration(void)
|
||||
|
||||
/* Register an overlapping address range */
|
||||
rc = spdk_mem_register((void *)0, 3 * VALUE_2MB);
|
||||
CU_ASSERT(rc == 0);
|
||||
CU_ASSERT(rc == -EBUSY);
|
||||
|
||||
/*
|
||||
* Unregister the middle page of the larger region.
|
||||
* It was set twice, so unregister it twice.
|
||||
*/
|
||||
/* Unregister a 2MB page */
|
||||
rc = spdk_mem_unregister((void *)VALUE_2MB, VALUE_2MB);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
/* Register non overlapping address range */
|
||||
rc = spdk_mem_register((void *)0, 3 * VALUE_2MB);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
/* Unregister the middle page of the larger region. */
|
||||
rc = spdk_mem_unregister((void *)VALUE_2MB, VALUE_2MB);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user