memory: do not merge adjacent registrations
Prevents us from unregistering two or more separately registered regions with a single notification. This fixes an ibv_mr leak in RDMA. When multiple registrations were unregistered with a single notification, only the first ibv_mr one would be freed and the remaining memory would possibly still remain DMA-able. As of now, unregistering multiple complete regions with a single unregister call is not possible. It will be implemented later, after the rest of the code is cleaned up. Change-Id: I7d61867fa61fd7a4a8a644ff45cab17125d63e1b Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-on: https://review.gerrithub.io/425555 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
6c30e783c1
commit
6bcd929500
@ -57,6 +57,15 @@
|
||||
#define MAP_256TB_IDX(vfn_2mb) ((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB))
|
||||
#define MAP_1GB_IDX(vfn_2mb) ((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB)) - 1))
|
||||
|
||||
/* Low 16 bits for the reference count */
|
||||
#define REG_MAP_REF_MASK 0xffff
|
||||
|
||||
/* A notification region barrier. The 2MB translation entry that's marked
|
||||
* with this flag must be unregistered separately. This allows contiguous
|
||||
* regions to be unregistered in the same chunks they were registered.
|
||||
*/
|
||||
#define REG_MAP_NOTIFY_START (1ULL << 63)
|
||||
|
||||
/* Translation of a single 2MB page. */
|
||||
struct map_2mb {
|
||||
uint64_t translation_2mb;
|
||||
@ -87,6 +96,12 @@ struct spdk_mem_map {
|
||||
TAILQ_ENTRY(spdk_mem_map) tailq;
|
||||
};
|
||||
|
||||
/* Registrations map. The 64 bit translations are bit fields with the
|
||||
* following layout (starting with the low bits):
|
||||
* 0 - 15 : reference count
|
||||
* 16 - 62 : reserved
|
||||
* 63 : flags
|
||||
*/
|
||||
static struct spdk_mem_map *g_mem_reg_map;
|
||||
static TAILQ_HEAD(, spdk_mem_map) g_spdk_mem_maps = TAILQ_HEAD_INITIALIZER(g_spdk_mem_maps);
|
||||
static pthread_mutex_t g_spdk_mem_map_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
@ -133,7 +148,9 @@ spdk_mem_map_notify_walk(struct spdk_mem_map *map, enum spdk_mem_map_notify_acti
|
||||
}
|
||||
|
||||
for (idx_1gb = 0; idx_1gb < sizeof(map_1gb->map) / sizeof(map_1gb->map[0]); idx_1gb++) {
|
||||
if (map_1gb->map[idx_1gb].translation_2mb != 0) {
|
||||
if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REF_MASK) > 0 &&
|
||||
(contig_start == UINT64_MAX ||
|
||||
(map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) {
|
||||
/* Rebuild the virtual address from the indexes */
|
||||
uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB);
|
||||
|
||||
@ -152,6 +169,11 @@ spdk_mem_map_notify_walk(struct spdk_mem_map *map, enum spdk_mem_map_notify_acti
|
||||
if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) {
|
||||
goto err_unregister;
|
||||
}
|
||||
|
||||
/* This page might be a part of a neighbour region, so process
|
||||
* it again. The idx_1gb will be incremented immediately.
|
||||
*/
|
||||
idx_1gb--;
|
||||
}
|
||||
contig_start = UINT64_MAX;
|
||||
}
|
||||
@ -187,7 +209,8 @@ err_unregister:
|
||||
}
|
||||
|
||||
for (; idx_1gb < UINT64_MAX; idx_1gb--) {
|
||||
if (map_1gb->map[idx_1gb].translation_2mb != 0) {
|
||||
if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REF_MASK) > 0 &&
|
||||
(contig_end == UINT64_MAX || (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) {
|
||||
/* Rebuild the virtual address from the indexes */
|
||||
uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB);
|
||||
|
||||
@ -202,6 +225,7 @@ err_unregister:
|
||||
SPDK_MEM_MAP_NOTIFY_UNREGISTER,
|
||||
(void *)contig_start,
|
||||
contig_end - contig_start + VALUE_2MB);
|
||||
idx_1gb++;
|
||||
}
|
||||
contig_end = UINT64_MAX;
|
||||
}
|
||||
@ -311,13 +335,13 @@ spdk_mem_register(void *vaddr, size_t len)
|
||||
seg_vaddr = vaddr;
|
||||
seg_len = 0;
|
||||
while (len > 0) {
|
||||
uint64_t ref_count;
|
||||
uint64_t reg;
|
||||
|
||||
/* In g_mem_reg_map, the "translation" is the reference count */
|
||||
ref_count = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL);
|
||||
spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, ref_count + 1);
|
||||
reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL);
|
||||
spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB,
|
||||
seg_len == 0 ? (reg + 1) | REG_MAP_NOTIFY_START : reg + 1);
|
||||
|
||||
if (ref_count > 0) {
|
||||
if ((reg & REG_MAP_REF_MASK) > 0 || (reg & REG_MAP_NOTIFY_START)) {
|
||||
if (seg_len > 0) {
|
||||
TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
|
||||
rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_REGISTER, seg_vaddr, seg_len);
|
||||
@ -359,7 +383,7 @@ spdk_mem_unregister(void *vaddr, size_t len)
|
||||
int rc;
|
||||
void *seg_vaddr;
|
||||
size_t seg_len;
|
||||
uint64_t ref_count;
|
||||
uint64_t reg;
|
||||
|
||||
if ((uintptr_t)vaddr & ~MASK_256TB) {
|
||||
DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr);
|
||||
@ -377,8 +401,8 @@ spdk_mem_unregister(void *vaddr, size_t len)
|
||||
seg_vaddr = vaddr;
|
||||
seg_len = len;
|
||||
while (seg_len > 0) {
|
||||
ref_count = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL);
|
||||
if (ref_count == 0) {
|
||||
reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL);
|
||||
if ((reg & REG_MAP_REF_MASK) == 0) {
|
||||
pthread_mutex_unlock(&g_spdk_mem_map_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -389,11 +413,14 @@ spdk_mem_unregister(void *vaddr, size_t len)
|
||||
seg_vaddr = vaddr;
|
||||
seg_len = 0;
|
||||
while (len > 0) {
|
||||
/* In g_mem_reg_map, the "translation" is the reference count */
|
||||
ref_count = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL);
|
||||
spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, ref_count - 1);
|
||||
reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL);
|
||||
if ((reg & REG_MAP_REF_MASK) == 1) {
|
||||
/* clear any flags */
|
||||
reg = 1;
|
||||
}
|
||||
spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, reg - 1);
|
||||
|
||||
if (ref_count > 1) {
|
||||
if ((reg & REG_MAP_REF_MASK) > 1 || (reg & REG_MAP_NOTIFY_START)) {
|
||||
if (seg_len > 0) {
|
||||
TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) {
|
||||
rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len);
|
||||
|
64
test/env/memory/memory_ut.c
vendored
64
test/env/memory/memory_ut.c
vendored
@ -122,6 +122,30 @@ test_mem_map_notify_fail(void *cb_ctx, struct spdk_mem_map *map,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
test_mem_map_notify_checklen(void *cb_ctx, struct spdk_mem_map *map,
|
||||
enum spdk_mem_map_notify_action action, void *vaddr, size_t size)
|
||||
{
|
||||
size_t *len_arr = cb_ctx;
|
||||
|
||||
/*
|
||||
* This is a test requirement - the len array we use to verify
|
||||
* pages are valid is only so large.
|
||||
*/
|
||||
SPDK_CU_ASSERT_FATAL((uintptr_t)vaddr < (VALUE_2MB * PAGE_ARRAY_SIZE));
|
||||
|
||||
switch (action) {
|
||||
case SPDK_MEM_MAP_NOTIFY_REGISTER:
|
||||
assert(size == len_arr[(uintptr_t)vaddr / VALUE_2MB]);
|
||||
break;
|
||||
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
|
||||
CU_ASSERT(size == len_arr[(uintptr_t)vaddr / VALUE_2MB]);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
test_check_regions_contiguous(uint64_t addr1, uint64_t addr2)
|
||||
{
|
||||
@ -143,6 +167,11 @@ struct spdk_mem_map_ops test_map_ops_notify_fail = {
|
||||
.are_contiguous = NULL
|
||||
};
|
||||
|
||||
struct spdk_mem_map_ops test_map_ops_notify_checklen = {
|
||||
.notify_cb = test_mem_map_notify_checklen,
|
||||
.are_contiguous = NULL
|
||||
};
|
||||
|
||||
static void
|
||||
test_mem_map_alloc_free(void)
|
||||
{
|
||||
@ -367,6 +396,38 @@ test_mem_map_registration(void)
|
||||
CU_ASSERT(map == NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
test_mem_map_registration_adjacent(void)
|
||||
{
|
||||
struct spdk_mem_map *map;
|
||||
uint64_t default_translation = 0xDEADBEEF0BADF00D;
|
||||
uintptr_t vaddr;
|
||||
unsigned i;
|
||||
size_t notify_len[PAGE_ARRAY_SIZE] = {0};
|
||||
size_t chunk_len[] = { 2, 1, 3, 2, 1, 1 };
|
||||
|
||||
vaddr = 0;
|
||||
for (i = 0; i < SPDK_COUNTOF(chunk_len); i++) {
|
||||
notify_len[vaddr / VALUE_2MB] = chunk_len[i] * VALUE_2MB;
|
||||
spdk_mem_register((void *)vaddr, notify_len[vaddr / VALUE_2MB]);
|
||||
vaddr += notify_len[vaddr / VALUE_2MB];
|
||||
}
|
||||
|
||||
/* Verify the memory is translated in the same chunks it was registered */
|
||||
map = spdk_mem_map_alloc(default_translation,
|
||||
&test_map_ops_notify_checklen, notify_len);
|
||||
SPDK_CU_ASSERT_FATAL(map != NULL);
|
||||
spdk_mem_map_free(&map);
|
||||
CU_ASSERT(map == NULL);
|
||||
|
||||
vaddr = 0;
|
||||
for (i = 0; i < SPDK_COUNTOF(chunk_len); i++) {
|
||||
notify_len[vaddr / VALUE_2MB] = chunk_len[i] * VALUE_2MB;
|
||||
spdk_mem_unregister((void *)vaddr, notify_len[vaddr / VALUE_2MB]);
|
||||
vaddr += notify_len[vaddr / VALUE_2MB];
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
@ -398,7 +459,8 @@ main(int argc, char **argv)
|
||||
if (
|
||||
CU_add_test(suite, "alloc and free memory map", test_mem_map_alloc_free) == NULL ||
|
||||
CU_add_test(suite, "mem map translation", test_mem_map_translation) == NULL ||
|
||||
CU_add_test(suite, "mem map registration", test_mem_map_registration) == NULL
|
||||
CU_add_test(suite, "mem map registration", test_mem_map_registration) == NULL ||
|
||||
CU_add_test(suite, "mem map adjacent registrations", test_mem_map_registration_adjacent) == NULL
|
||||
) {
|
||||
CU_cleanup_registry();
|
||||
return CU_get_error();
|
||||
|
Loading…
x
Reference in New Issue
Block a user