2017-12-19 15:49:03 +00:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
* Copyright(c) 2010-2014 Intel Corporation
|
2012-09-04 13:54:00 +01:00
|
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <sys/queue.h>
|
|
|
|
|
|
|
|
#include <rte_memory.h>
|
|
|
|
#include <rte_eal.h>
|
2012-12-20 00:00:00 +01:00
|
|
|
#include <rte_eal_memconfig.h>
|
2012-09-04 13:54:00 +01:00
|
|
|
#include <rte_launch.h>
|
|
|
|
#include <rte_per_lcore.h>
|
|
|
|
#include <rte_lcore.h>
|
|
|
|
#include <rte_common.h>
|
|
|
|
#include <rte_string_fns.h>
|
|
|
|
#include <rte_spinlock.h>
|
2012-12-20 00:00:00 +01:00
|
|
|
#include <rte_memcpy.h>
|
|
|
|
#include <rte_atomic.h>
|
2012-09-04 13:54:00 +01:00
|
|
|
|
|
|
|
#include "malloc_elem.h"
|
|
|
|
#include "malloc_heap.h"
|
|
|
|
|
2015-07-15 17:32:21 +01:00
|
|
|
static unsigned
|
|
|
|
check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
|
2012-09-04 13:54:00 +01:00
|
|
|
{
|
2015-07-15 17:32:21 +01:00
|
|
|
unsigned check_flag = 0;
|
|
|
|
|
|
|
|
if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
switch (hugepage_sz) {
|
|
|
|
case RTE_PGSIZE_256K:
|
|
|
|
check_flag = RTE_MEMZONE_256KB;
|
|
|
|
break;
|
|
|
|
case RTE_PGSIZE_2M:
|
|
|
|
check_flag = RTE_MEMZONE_2MB;
|
|
|
|
break;
|
|
|
|
case RTE_PGSIZE_16M:
|
|
|
|
check_flag = RTE_MEMZONE_16MB;
|
|
|
|
break;
|
|
|
|
case RTE_PGSIZE_256M:
|
|
|
|
check_flag = RTE_MEMZONE_256MB;
|
|
|
|
break;
|
|
|
|
case RTE_PGSIZE_512M:
|
|
|
|
check_flag = RTE_MEMZONE_512MB;
|
|
|
|
break;
|
|
|
|
case RTE_PGSIZE_1G:
|
|
|
|
check_flag = RTE_MEMZONE_1GB;
|
|
|
|
break;
|
|
|
|
case RTE_PGSIZE_4G:
|
|
|
|
check_flag = RTE_MEMZONE_4GB;
|
|
|
|
break;
|
|
|
|
case RTE_PGSIZE_16G:
|
|
|
|
check_flag = RTE_MEMZONE_16GB;
|
|
|
|
}
|
|
|
|
|
2016-01-27 21:58:30 +08:00
|
|
|
return check_flag & flags;
|
2012-09-04 13:54:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-07-15 17:32:21 +01:00
|
|
|
* Expand the heap with a memseg.
|
|
|
|
* This reserves the zone and sets a dummy malloc_elem header at the end
|
2012-09-04 13:54:00 +01:00
|
|
|
* to prevent overflow. The rest of the zone is added to free list as a single
|
|
|
|
* large free block
|
|
|
|
*/
|
2015-07-15 17:32:21 +01:00
|
|
|
static void
|
|
|
|
malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
|
2012-09-04 13:54:00 +01:00
|
|
|
{
|
|
|
|
/* allocate the memory block headers, one at end, one at start */
|
2015-07-15 17:32:21 +01:00
|
|
|
struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
|
|
|
|
struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
|
|
|
|
ms->len - MALLOC_ELEM_OVERHEAD);
|
2014-11-19 12:26:06 +00:00
|
|
|
end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
|
2015-07-15 17:32:21 +01:00
|
|
|
const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
|
2012-09-04 13:54:00 +01:00
|
|
|
|
2015-07-15 17:32:21 +01:00
|
|
|
malloc_elem_init(start_elem, heap, ms, elem_size);
|
2012-09-04 13:54:00 +01:00
|
|
|
malloc_elem_mkend(end_elem, start_elem);
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 17:17:09 -04:00
|
|
|
malloc_elem_free_list_insert(start_elem);
|
2012-09-04 13:54:00 +01:00
|
|
|
|
2015-07-15 17:32:21 +01:00
|
|
|
heap->total_size += elem_size;
|
2012-09-04 13:54:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Iterates through the freelist for a heap to find a free element
|
|
|
|
* which can store data of the required size and with the requested alignment.
|
2015-07-15 17:32:21 +01:00
|
|
|
* If size is 0, find the biggest available elem.
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 17:17:09 -04:00
|
|
|
* Returns null on failure, or pointer to element on success.
|
2012-09-04 13:54:00 +01:00
|
|
|
*/
|
|
|
|
static struct malloc_elem *
|
2015-07-15 17:32:21 +01:00
|
|
|
find_suitable_element(struct malloc_heap *heap, size_t size,
|
|
|
|
unsigned flags, size_t align, size_t bound)
|
2012-09-04 13:54:00 +01:00
|
|
|
{
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 17:17:09 -04:00
|
|
|
size_t idx;
|
2015-07-15 17:32:21 +01:00
|
|
|
struct malloc_elem *elem, *alt_elem = NULL;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 17:17:09 -04:00
|
|
|
|
|
|
|
for (idx = malloc_elem_free_list_index(size);
|
2015-07-15 17:32:21 +01:00
|
|
|
idx < RTE_HEAP_NUM_FREELISTS; idx++) {
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 17:17:09 -04:00
|
|
|
for (elem = LIST_FIRST(&heap->free_head[idx]);
|
2015-07-15 17:32:21 +01:00
|
|
|
!!elem; elem = LIST_NEXT(elem, free_list)) {
|
|
|
|
if (malloc_elem_can_hold(elem, size, align, bound)) {
|
|
|
|
if (check_hugepage_sz(flags, elem->ms->hugepage_sz))
|
|
|
|
return elem;
|
|
|
|
if (alt_elem == NULL)
|
|
|
|
alt_elem = elem;
|
|
|
|
}
|
2013-06-03 00:00:00 +00:00
|
|
|
}
|
2012-09-04 13:54:00 +01:00
|
|
|
}
|
2015-07-15 17:32:21 +01:00
|
|
|
|
|
|
|
if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY))
|
|
|
|
return alt_elem;
|
|
|
|
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 17:17:09 -04:00
|
|
|
return NULL;
|
2012-09-04 13:54:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-07-15 17:32:21 +01:00
|
|
|
* Main function to allocate a block of memory from the heap.
|
|
|
|
* It locks the free list, scans it, and adds a new memseg if the
|
|
|
|
* scan fails. Once the new memseg is added, it re-scans and should return
|
2012-09-04 13:54:00 +01:00
|
|
|
* the new element after releasing the lock.
|
|
|
|
*/
|
|
|
|
void *
|
|
|
|
malloc_heap_alloc(struct malloc_heap *heap,
|
2015-07-15 17:32:21 +01:00
|
|
|
const char *type __attribute__((unused)), size_t size, unsigned flags,
|
|
|
|
size_t align, size_t bound)
|
2012-09-04 13:54:00 +01:00
|
|
|
{
|
2015-07-15 17:32:21 +01:00
|
|
|
struct malloc_elem *elem;
|
|
|
|
|
2014-11-19 12:26:06 +00:00
|
|
|
size = RTE_CACHE_LINE_ROUNDUP(size);
|
|
|
|
align = RTE_CACHE_LINE_ROUNDUP(align);
|
2015-07-15 17:32:21 +01:00
|
|
|
|
2012-09-04 13:54:00 +01:00
|
|
|
rte_spinlock_lock(&heap->lock);
|
2012-12-20 00:00:00 +01:00
|
|
|
|
2015-07-15 17:32:21 +01:00
|
|
|
elem = find_suitable_element(heap, size, flags, align, bound);
|
|
|
|
if (elem != NULL) {
|
|
|
|
elem = malloc_elem_alloc(elem, size, align, bound);
|
2012-12-20 00:00:00 +01:00
|
|
|
/* increase heap's count of allocated elements */
|
|
|
|
heap->alloc_count++;
|
|
|
|
}
|
2012-09-04 13:54:00 +01:00
|
|
|
rte_spinlock_unlock(&heap->lock);
|
2012-12-20 00:00:00 +01:00
|
|
|
|
2015-07-15 17:32:21 +01:00
|
|
|
return elem == NULL ? NULL : (void *)(&elem[1]);
|
2012-12-20 00:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Function to retrieve data for heap on given socket
|
|
|
|
*/
|
|
|
|
int
|
2017-12-21 17:32:04 +00:00
|
|
|
malloc_heap_get_stats(struct malloc_heap *heap,
|
2012-12-20 00:00:00 +01:00
|
|
|
struct rte_malloc_socket_stats *socket_stats)
|
|
|
|
{
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 17:17:09 -04:00
|
|
|
size_t idx;
|
|
|
|
struct malloc_elem *elem;
|
2012-12-20 00:00:00 +01:00
|
|
|
|
2017-12-21 17:32:04 +00:00
|
|
|
rte_spinlock_lock(&heap->lock);
|
|
|
|
|
2012-12-20 00:00:00 +01:00
|
|
|
/* Initialise variables for heap */
|
|
|
|
socket_stats->free_count = 0;
|
|
|
|
socket_stats->heap_freesz_bytes = 0;
|
|
|
|
socket_stats->greatest_free_size = 0;
|
|
|
|
|
|
|
|
/* Iterate through free list */
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 17:17:09 -04:00
|
|
|
for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
|
|
|
|
for (elem = LIST_FIRST(&heap->free_head[idx]);
|
|
|
|
!!elem; elem = LIST_NEXT(elem, free_list))
|
|
|
|
{
|
|
|
|
socket_stats->free_count++;
|
|
|
|
socket_stats->heap_freesz_bytes += elem->size;
|
|
|
|
if (elem->size > socket_stats->greatest_free_size)
|
|
|
|
socket_stats->greatest_free_size = elem->size;
|
|
|
|
}
|
2012-12-20 00:00:00 +01:00
|
|
|
}
|
|
|
|
/* Get stats on overall heap and allocated memory on this heap */
|
|
|
|
socket_stats->heap_totalsz_bytes = heap->total_size;
|
|
|
|
socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes -
|
|
|
|
socket_stats->heap_freesz_bytes);
|
|
|
|
socket_stats->alloc_count = heap->alloc_count;
|
2017-12-21 17:32:04 +00:00
|
|
|
|
|
|
|
rte_spinlock_unlock(&heap->lock);
|
2012-12-20 00:00:00 +01:00
|
|
|
return 0;
|
2012-09-04 13:54:00 +01:00
|
|
|
}
|
2015-07-15 17:32:21 +01:00
|
|
|
|
|
|
|
int
|
|
|
|
rte_eal_malloc_heap_init(void)
|
|
|
|
{
|
|
|
|
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
|
|
|
|
unsigned ms_cnt;
|
|
|
|
struct rte_memseg *ms;
|
|
|
|
|
|
|
|
if (mcfg == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
for (ms = &mcfg->memseg[0], ms_cnt = 0;
|
|
|
|
(ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
|
2015-08-07 16:27:32 +01:00
|
|
|
ms_cnt++, ms++) {
|
2015-07-15 17:32:21 +01:00
|
|
|
malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
|
2015-08-07 16:27:32 +01:00
|
|
|
}
|
2015-07-15 17:32:21 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|