memzone: improve zero-length reserve
Currently, reserving zero-length memzones is done by looking at malloc statistics, and reserving biggest sized element found in those statistics. This has two issues. First, there is a race condition. The heap is unlocked between the time we check stats, and the time we reserve malloc element for memzone. This may lead to inability to reserve the memzone we wanted to reserve, because another allocation might have taken place and biggest sized element may no longer be available. Second, the size returned by malloc statistics does not include any alignment information, which is worked around by being conservative and subtracting alignment length from the final result. This leads to fragmentation and reserving memzones that could have been bigger but aren't. Fix all of this by using earlier-introduced operation to reserve biggest possible malloc element. This, however, comes with a trade-off, because we can only lock one heap at a time. So, if we check the first available heap and find *any* element at all, that element will be considered "the biggest", even though other heaps might have bigger elements. We cannot know what other heaps have before we try and allocate it, and it is not a good idea to lock all of the heaps at the same time, so, we will just document this limitation and encourage users to reserve memzones with socket id properly set. Also, fixup unit tests to account for the new behavior. Fixes: fafcc11985a2 ("mem: rework memzone to be allocated by malloc") Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
This commit is contained in:
parent
68b6092bd3
commit
0b82bd7b24
@ -52,38 +52,6 @@ memzone_lookup_thread_unsafe(const char *name)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* This function will return the greatest free block if a heap has been
|
|
||||||
* specified. If no heap has been specified, it will return the heap and
|
|
||||||
* length of the greatest free block available in all heaps */
|
|
||||||
static size_t
|
|
||||||
find_heap_max_free_elem(int *s, unsigned align)
|
|
||||||
{
|
|
||||||
struct rte_mem_config *mcfg;
|
|
||||||
struct rte_malloc_socket_stats stats;
|
|
||||||
int i, socket = *s;
|
|
||||||
size_t len = 0;
|
|
||||||
|
|
||||||
/* get pointer to global configuration */
|
|
||||||
mcfg = rte_eal_get_configuration()->mem_config;
|
|
||||||
|
|
||||||
for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
|
|
||||||
if ((socket != SOCKET_ID_ANY) && (socket != i))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
|
|
||||||
if (stats.greatest_free_size > len) {
|
|
||||||
len = stats.greatest_free_size;
|
|
||||||
*s = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (len < MALLOC_ELEM_OVERHEAD + align)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return len - MALLOC_ELEM_OVERHEAD - align;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct rte_memzone *
|
static const struct rte_memzone *
|
||||||
memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
|
memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
|
||||||
int socket_id, unsigned int flags, unsigned int align,
|
int socket_id, unsigned int flags, unsigned int align,
|
||||||
@ -92,6 +60,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
|
|||||||
struct rte_memzone *mz;
|
struct rte_memzone *mz;
|
||||||
struct rte_mem_config *mcfg;
|
struct rte_mem_config *mcfg;
|
||||||
struct rte_fbarray *arr;
|
struct rte_fbarray *arr;
|
||||||
|
void *mz_addr;
|
||||||
size_t requested_len;
|
size_t requested_len;
|
||||||
int mz_idx;
|
int mz_idx;
|
||||||
bool contig;
|
bool contig;
|
||||||
@ -140,8 +109,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
len += RTE_CACHE_LINE_MASK;
|
len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
|
||||||
len &= ~((size_t) RTE_CACHE_LINE_MASK);
|
|
||||||
|
|
||||||
/* save minimal requested length */
|
/* save minimal requested length */
|
||||||
requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
|
requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
|
||||||
@ -165,27 +133,18 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
|
|||||||
/* malloc only cares about size flags, remove contig flag from flags */
|
/* malloc only cares about size flags, remove contig flag from flags */
|
||||||
flags &= ~RTE_MEMZONE_IOVA_CONTIG;
|
flags &= ~RTE_MEMZONE_IOVA_CONTIG;
|
||||||
|
|
||||||
if (len == 0) {
|
if (len == 0 && bound == 0) {
|
||||||
/* len == 0 is only allowed for non-contiguous zones */
|
/* no size constraints were placed, so use malloc elem len */
|
||||||
if (contig) {
|
requested_len = 0;
|
||||||
RTE_LOG(DEBUG, EAL, "Reserving zero-length contiguous memzones is not supported\n");
|
mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
|
||||||
rte_errno = EINVAL;
|
align, contig);
|
||||||
return NULL;
|
} else {
|
||||||
}
|
if (len == 0)
|
||||||
if (bound != 0)
|
|
||||||
requested_len = bound;
|
requested_len = bound;
|
||||||
else {
|
/* allocate memory on heap */
|
||||||
requested_len = find_heap_max_free_elem(&socket_id, align);
|
mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
|
||||||
if (requested_len == 0) {
|
flags, align, bound, contig);
|
||||||
rte_errno = ENOMEM;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* allocate memory on heap */
|
|
||||||
void *mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, flags,
|
|
||||||
align, bound, contig);
|
|
||||||
if (mz_addr == NULL) {
|
if (mz_addr == NULL) {
|
||||||
rte_errno = ENOMEM;
|
rte_errno = ENOMEM;
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -213,8 +172,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
|
|||||||
snprintf(mz->name, sizeof(mz->name), "%s", name);
|
snprintf(mz->name, sizeof(mz->name), "%s", name);
|
||||||
mz->iova = rte_malloc_virt2iova(mz_addr);
|
mz->iova = rte_malloc_virt2iova(mz_addr);
|
||||||
mz->addr = mz_addr;
|
mz->addr = mz_addr;
|
||||||
mz->len = (requested_len == 0 ?
|
mz->len = requested_len == 0 ?
|
||||||
(elem->size - MALLOC_ELEM_OVERHEAD) : requested_len);
|
elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
|
||||||
|
requested_len;
|
||||||
mz->hugepage_sz = elem->msl->page_sz;
|
mz->hugepage_sz = elem->msl->page_sz;
|
||||||
mz->socket_id = elem->msl->socket_id;
|
mz->socket_id = elem->msl->socket_id;
|
||||||
mz->flags = 0;
|
mz->flags = 0;
|
||||||
|
@ -81,8 +81,12 @@ struct rte_memzone {
|
|||||||
* memzones from memory that is already available. It will not trigger any
|
* memzones from memory that is already available. It will not trigger any
|
||||||
* new allocations.
|
* new allocations.
|
||||||
*
|
*
|
||||||
* @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
|
* @note: When reserving memzones with len set to 0, it is preferable to also
|
||||||
* supported.
|
* set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
|
||||||
|
* will likely not yield expected results. Specifically, the resulting memzone
|
||||||
|
* may not necessarily be the biggest memzone available, but rather biggest
|
||||||
|
* memzone available on socket id corresponding to an lcore from which
|
||||||
|
* reservation was called.
|
||||||
*
|
*
|
||||||
* @param name
|
* @param name
|
||||||
* The name of the memzone. If it already exists, the function will
|
* The name of the memzone. If it already exists, the function will
|
||||||
@ -141,8 +145,12 @@ const struct rte_memzone *rte_memzone_reserve(const char *name,
|
|||||||
* memzones from memory that is already available. It will not trigger any
|
* memzones from memory that is already available. It will not trigger any
|
||||||
* new allocations.
|
* new allocations.
|
||||||
*
|
*
|
||||||
* @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
|
* @note: When reserving memzones with len set to 0, it is preferable to also
|
||||||
* supported.
|
* set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
|
||||||
|
* will likely not yield expected results. Specifically, the resulting memzone
|
||||||
|
* may not necessarily be the biggest memzone available, but rather biggest
|
||||||
|
* memzone available on socket id corresponding to an lcore from which
|
||||||
|
* reservation was called.
|
||||||
*
|
*
|
||||||
* @param name
|
* @param name
|
||||||
* The name of the memzone. If it already exists, the function will
|
* The name of the memzone. If it already exists, the function will
|
||||||
@ -206,8 +214,12 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
|
|||||||
* memzones from memory that is already available. It will not trigger any
|
* memzones from memory that is already available. It will not trigger any
|
||||||
* new allocations.
|
* new allocations.
|
||||||
*
|
*
|
||||||
* @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
|
* @note: When reserving memzones with len set to 0, it is preferable to also
|
||||||
* supported.
|
* set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
|
||||||
|
* will likely not yield expected results. Specifically, the resulting memzone
|
||||||
|
* may not necessarily be the biggest memzone available, but rather biggest
|
||||||
|
* memzone available on socket id corresponding to an lcore from which
|
||||||
|
* reservation was called.
|
||||||
*
|
*
|
||||||
* @param name
|
* @param name
|
||||||
* The name of the memzone. If it already exists, the function will
|
* The name of the memzone. If it already exists, the function will
|
||||||
|
@ -467,61 +467,69 @@ test_memzone_reserve_flags(void)
|
|||||||
|
|
||||||
/* Find the heap with the greatest free block size */
|
/* Find the heap with the greatest free block size */
|
||||||
static size_t
|
static size_t
|
||||||
find_max_block_free_size(const unsigned _align)
|
find_max_block_free_size(unsigned int align, unsigned int socket_id)
|
||||||
{
|
{
|
||||||
struct rte_malloc_socket_stats stats;
|
struct rte_malloc_socket_stats stats;
|
||||||
unsigned i, align = _align;
|
size_t len, overhead;
|
||||||
size_t len = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
|
rte_malloc_get_socket_stats(socket_id, &stats);
|
||||||
rte_malloc_get_socket_stats(i, &stats);
|
|
||||||
if (stats.greatest_free_size > len)
|
|
||||||
len = stats.greatest_free_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (align < RTE_CACHE_LINE_SIZE)
|
len = stats.greatest_free_size;
|
||||||
align = RTE_CACHE_LINE_ROUNDUP(align+1);
|
overhead = MALLOC_ELEM_OVERHEAD;
|
||||||
|
|
||||||
if (len <= MALLOC_ELEM_OVERHEAD + align)
|
if (len == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return len - MALLOC_ELEM_OVERHEAD - align;
|
align = RTE_CACHE_LINE_ROUNDUP(align);
|
||||||
|
overhead += align;
|
||||||
|
|
||||||
|
if (len < overhead)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return len - overhead;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
test_memzone_reserve_max(void)
|
test_memzone_reserve_max(void)
|
||||||
{
|
{
|
||||||
const struct rte_memzone *mz;
|
unsigned int i;
|
||||||
size_t maxlen;
|
|
||||||
|
|
||||||
maxlen = find_max_block_free_size(0);
|
for (i = 0; i < rte_socket_count(); i++) {
|
||||||
|
const struct rte_memzone *mz;
|
||||||
|
size_t maxlen;
|
||||||
|
int socket;
|
||||||
|
|
||||||
if (maxlen == 0) {
|
socket = rte_socket_id_by_idx(i);
|
||||||
printf("There is no space left!\n");
|
maxlen = find_max_block_free_size(0, socket);
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
mz = rte_memzone_reserve(TEST_MEMZONE_NAME("max_zone"), 0,
|
if (maxlen == 0) {
|
||||||
SOCKET_ID_ANY, 0);
|
printf("There is no space left!\n");
|
||||||
if (mz == NULL){
|
return 0;
|
||||||
printf("Failed to reserve a big chunk of memory - %s\n",
|
}
|
||||||
rte_strerror(rte_errno));
|
|
||||||
rte_dump_physmem_layout(stdout);
|
|
||||||
rte_memzone_dump(stdout);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mz->len != maxlen) {
|
mz = rte_memzone_reserve(TEST_MEMZONE_NAME("max_zone"), 0,
|
||||||
printf("Memzone reserve with 0 size did not return bigest block\n");
|
socket, 0);
|
||||||
printf("Expected size = %zu, actual size = %zu\n", maxlen, mz->len);
|
if (mz == NULL) {
|
||||||
rte_dump_physmem_layout(stdout);
|
printf("Failed to reserve a big chunk of memory - %s\n",
|
||||||
rte_memzone_dump(stdout);
|
rte_strerror(rte_errno));
|
||||||
return -1;
|
rte_dump_physmem_layout(stdout);
|
||||||
}
|
rte_memzone_dump(stdout);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
if (rte_memzone_free(mz)) {
|
if (mz->len != maxlen) {
|
||||||
printf("Fail memzone free\n");
|
printf("Memzone reserve with 0 size did not return bigest block\n");
|
||||||
return -1;
|
printf("Expected size = %zu, actual size = %zu\n",
|
||||||
|
maxlen, mz->len);
|
||||||
|
rte_dump_physmem_layout(stdout);
|
||||||
|
rte_memzone_dump(stdout);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rte_memzone_free(mz)) {
|
||||||
|
printf("Fail memzone free\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -530,45 +538,62 @@ test_memzone_reserve_max(void)
|
|||||||
static int
|
static int
|
||||||
test_memzone_reserve_max_aligned(void)
|
test_memzone_reserve_max_aligned(void)
|
||||||
{
|
{
|
||||||
const struct rte_memzone *mz;
|
unsigned int i;
|
||||||
size_t maxlen = 0;
|
|
||||||
|
|
||||||
/* random alignment */
|
for (i = 0; i < rte_socket_count(); i++) {
|
||||||
rte_srand((unsigned)rte_rdtsc());
|
const struct rte_memzone *mz;
|
||||||
const unsigned align = 1 << ((rte_rand() % 8) + 5); /* from 128 up to 4k alignment */
|
size_t maxlen, minlen = 0;
|
||||||
|
int socket;
|
||||||
|
|
||||||
maxlen = find_max_block_free_size(align);
|
socket = rte_socket_id_by_idx(i);
|
||||||
|
|
||||||
if (maxlen == 0) {
|
/* random alignment */
|
||||||
printf("There is no space left for biggest %u-aligned memzone!\n", align);
|
rte_srand((unsigned int)rte_rdtsc());
|
||||||
return 0;
|
const unsigned int align = 1 << ((rte_rand() % 8) + 5); /* from 128 up to 4k alignment */
|
||||||
|
|
||||||
|
/* memzone size may be between size and size - align */
|
||||||
|
minlen = find_max_block_free_size(align, socket);
|
||||||
|
maxlen = find_max_block_free_size(0, socket);
|
||||||
|
|
||||||
|
if (minlen == 0 || maxlen == 0) {
|
||||||
|
printf("There is no space left for biggest %u-aligned memzone!\n",
|
||||||
|
align);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
mz = rte_memzone_reserve_aligned(
|
||||||
|
TEST_MEMZONE_NAME("max_zone_aligned"),
|
||||||
|
0, socket, 0, align);
|
||||||
|
if (mz == NULL) {
|
||||||
|
printf("Failed to reserve a big chunk of memory - %s\n",
|
||||||
|
rte_strerror(rte_errno));
|
||||||
|
rte_dump_physmem_layout(stdout);
|
||||||
|
rte_memzone_dump(stdout);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (mz->addr != RTE_PTR_ALIGN(mz->addr, align)) {
|
||||||
|
printf("Memzone reserve with 0 size and alignment %u did not return aligned block\n",
|
||||||
|
align);
|
||||||
|
rte_dump_physmem_layout(stdout);
|
||||||
|
rte_memzone_dump(stdout);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mz->len < minlen || mz->len > maxlen) {
|
||||||
|
printf("Memzone reserve with 0 size and alignment %u did not return"
|
||||||
|
" bigest block\n", align);
|
||||||
|
printf("Expected size = %zu-%zu, actual size = %zu\n",
|
||||||
|
minlen, maxlen, mz->len);
|
||||||
|
rte_dump_physmem_layout(stdout);
|
||||||
|
rte_memzone_dump(stdout);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rte_memzone_free(mz)) {
|
||||||
|
printf("Fail memzone free\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mz = rte_memzone_reserve_aligned(TEST_MEMZONE_NAME("max_zone_aligned"),
|
|
||||||
0, SOCKET_ID_ANY, 0, align);
|
|
||||||
if (mz == NULL){
|
|
||||||
printf("Failed to reserve a big chunk of memory - %s\n",
|
|
||||||
rte_strerror(rte_errno));
|
|
||||||
rte_dump_physmem_layout(stdout);
|
|
||||||
rte_memzone_dump(stdout);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mz->len != maxlen) {
|
|
||||||
printf("Memzone reserve with 0 size and alignment %u did not return"
|
|
||||||
" bigest block\n", align);
|
|
||||||
printf("Expected size = %zu, actual size = %zu\n",
|
|
||||||
maxlen, mz->len);
|
|
||||||
rte_dump_physmem_layout(stdout);
|
|
||||||
rte_memzone_dump(stdout);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rte_memzone_free(mz)) {
|
|
||||||
printf("Fail memzone free\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user