diff --git a/lib/env_dpdk/memory.c b/lib/env_dpdk/memory.c index 08e4f1dbc5..3b4978bde2 100644 --- a/lib/env_dpdk/memory.c +++ b/lib/env_dpdk/memory.c @@ -57,6 +57,15 @@ #define MAP_256TB_IDX(vfn_2mb) ((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB)) #define MAP_1GB_IDX(vfn_2mb) ((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB)) - 1)) +/* Low 16 bits for the reference count */ +#define REG_MAP_REF_MASK 0xffff + +/* A notification region barrier. The 2MB translation entry that's marked + * with this flag must be unregistered separately. This allows contiguous + * regions to be unregistered in the same chunks they were registered. + */ +#define REG_MAP_NOTIFY_START (1ULL << 63) + /* Translation of a single 2MB page. */ struct map_2mb { uint64_t translation_2mb; @@ -87,6 +96,12 @@ struct spdk_mem_map { TAILQ_ENTRY(spdk_mem_map) tailq; }; +/* Registrations map. The 64 bit translations are bit fields with the + * following layout (starting with the low bits): + * 0 - 15 : reference count + * 16 - 62 : reserved + * 63 : flags + */ static struct spdk_mem_map *g_mem_reg_map; static TAILQ_HEAD(, spdk_mem_map) g_spdk_mem_maps = TAILQ_HEAD_INITIALIZER(g_spdk_mem_maps); static pthread_mutex_t g_spdk_mem_map_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -133,7 +148,9 @@ spdk_mem_map_notify_walk(struct spdk_mem_map *map, enum spdk_mem_map_notify_acti } for (idx_1gb = 0; idx_1gb < sizeof(map_1gb->map) / sizeof(map_1gb->map[0]); idx_1gb++) { - if (map_1gb->map[idx_1gb].translation_2mb != 0) { + if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REF_MASK) > 0 && + (contig_start == UINT64_MAX || + (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) { /* Rebuild the virtual address from the indexes */ uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB); @@ -152,6 +169,11 @@ spdk_mem_map_notify_walk(struct spdk_mem_map *map, enum spdk_mem_map_notify_acti if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) { goto err_unregister; } + + /* This page might be a part of a neighbour region, so process + * it again. The idx_1gb will be incremented immediately. + */ + idx_1gb--; } contig_start = UINT64_MAX; } @@ -187,7 +209,8 @@ err_unregister: } for (; idx_1gb < UINT64_MAX; idx_1gb--) { - if (map_1gb->map[idx_1gb].translation_2mb != 0) { + if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REF_MASK) > 0 && + (contig_end == UINT64_MAX || (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) { /* Rebuild the virtual address from the indexes */ uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB); @@ -202,6 +225,7 @@ err_unregister: SPDK_MEM_MAP_NOTIFY_UNREGISTER, (void *)contig_start, contig_end - contig_start + VALUE_2MB); + idx_1gb++; } contig_end = UINT64_MAX; } @@ -311,13 +335,13 @@ spdk_mem_register(void *vaddr, size_t len) seg_vaddr = vaddr; seg_len = 0; while (len > 0) { - uint64_t ref_count; + uint64_t reg; - /* In g_mem_reg_map, the "translation" is the reference count */ - ref_count = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); - spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, ref_count + 1); + reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); + spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, + seg_len == 0 ? (reg + 1) | REG_MAP_NOTIFY_START : reg + 1); - if (ref_count > 0) { + if ((reg & REG_MAP_REF_MASK) > 0 || (reg & REG_MAP_NOTIFY_START)) { if (seg_len > 0) { TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_REGISTER, seg_vaddr, seg_len); @@ -359,7 +383,7 @@ spdk_mem_unregister(void *vaddr, size_t len) int rc; void *seg_vaddr; size_t seg_len; - uint64_t ref_count; + uint64_t reg; if ((uintptr_t)vaddr & ~MASK_256TB) { DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr); @@ -377,8 +401,8 @@ spdk_mem_unregister(void *vaddr, size_t len) seg_vaddr = vaddr; seg_len = len; while (seg_len > 0) { - ref_count = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); - if (ref_count == 0) { + reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); + if ((reg & REG_MAP_REF_MASK) == 0) { pthread_mutex_unlock(&g_spdk_mem_map_mutex); return -EINVAL; } @@ -389,11 +413,14 @@ spdk_mem_unregister(void *vaddr, size_t len) seg_vaddr = vaddr; seg_len = 0; while (len > 0) { - /* In g_mem_reg_map, the "translation" is the reference count */ - ref_count = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); - spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, ref_count - 1); + reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); + if ((reg & REG_MAP_REF_MASK) == 1) { + /* clear any flags */ + reg = 1; + } + spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, reg - 1); - if (ref_count > 1) { + if ((reg & REG_MAP_REF_MASK) > 1 || (reg & REG_MAP_NOTIFY_START)) { if (seg_len > 0) { TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len); diff --git a/test/env/memory/memory_ut.c b/test/env/memory/memory_ut.c index c94f15401d..451970183d 100644 --- a/test/env/memory/memory_ut.c +++ b/test/env/memory/memory_ut.c @@ -122,6 +122,30 @@ test_mem_map_notify_fail(void *cb_ctx, struct spdk_mem_map *map, return 0; } +static int +test_mem_map_notify_checklen(void *cb_ctx, struct spdk_mem_map *map, + enum spdk_mem_map_notify_action action, void *vaddr, size_t size) +{ + size_t *len_arr = cb_ctx; + + /* + * This is a test requirement - the len array we use to verify + * pages are valid is only so large. + */ + SPDK_CU_ASSERT_FATAL((uintptr_t)vaddr < (VALUE_2MB * PAGE_ARRAY_SIZE)); + + switch (action) { + case SPDK_MEM_MAP_NOTIFY_REGISTER: + assert(size == len_arr[(uintptr_t)vaddr / VALUE_2MB]); + break; + case SPDK_MEM_MAP_NOTIFY_UNREGISTER: + CU_ASSERT(size == len_arr[(uintptr_t)vaddr / VALUE_2MB]); + break; + } + + return 0; +} + static int test_check_regions_contiguous(uint64_t addr1, uint64_t addr2) { @@ -143,6 +167,11 @@ struct spdk_mem_map_ops test_map_ops_notify_fail = { .are_contiguous = NULL }; +struct spdk_mem_map_ops test_map_ops_notify_checklen = { + .notify_cb = test_mem_map_notify_checklen, + .are_contiguous = NULL +}; + static void test_mem_map_alloc_free(void) { @@ -367,6 +396,38 @@ test_mem_map_registration(void) CU_ASSERT(map == NULL); } +static void +test_mem_map_registration_adjacent(void) +{ + struct spdk_mem_map *map; + uint64_t default_translation = 0xDEADBEEF0BADF00D; + uintptr_t vaddr; + unsigned i; + size_t notify_len[PAGE_ARRAY_SIZE] = {0}; + size_t chunk_len[] = { 2, 1, 3, 2, 1, 1 }; + + vaddr = 0; + for (i = 0; i < SPDK_COUNTOF(chunk_len); i++) { + notify_len[vaddr / VALUE_2MB] = chunk_len[i] * VALUE_2MB; + spdk_mem_register((void *)vaddr, notify_len[vaddr / VALUE_2MB]); + vaddr += notify_len[vaddr / VALUE_2MB]; + } + + /* Verify the memory is translated in the same chunks it was registered */ + map = spdk_mem_map_alloc(default_translation, + &test_map_ops_notify_checklen, notify_len); + SPDK_CU_ASSERT_FATAL(map != NULL); + spdk_mem_map_free(&map); + CU_ASSERT(map == NULL); + + vaddr = 0; + for (i = 0; i < SPDK_COUNTOF(chunk_len); i++) { + notify_len[vaddr / VALUE_2MB] = chunk_len[i] * VALUE_2MB; + spdk_mem_unregister((void *)vaddr, notify_len[vaddr / VALUE_2MB]); + vaddr += notify_len[vaddr / VALUE_2MB]; + } +} + int main(int argc, char **argv) { @@ -398,7 +459,8 @@ main(int argc, char **argv) if ( CU_add_test(suite, "alloc and free memory map", test_mem_map_alloc_free) == NULL || CU_add_test(suite, "mem map translation", test_mem_map_translation) == NULL || - CU_add_test(suite, "mem map registration", test_mem_map_registration) == NULL + CU_add_test(suite, "mem map registration", test_mem_map_registration) == NULL || + CU_add_test(suite, "mem map adjacent registrations", test_mem_map_registration_adjacent) == NULL ) { CU_cleanup_registry(); return CU_get_error();