2017-12-19 15:49:03 +00:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
* Copyright(c) 2010-2014 Intel Corporation
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
2018-04-11 12:29:39 +00:00
|
|
|
#include <inttypes.h>
|
2012-09-04 12:54:00 +00:00
|
|
|
#include <stdint.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdio.h>
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
#include <string.h>
|
2012-09-04 12:54:00 +00:00
|
|
|
#include <sys/queue.h>
|
|
|
|
|
|
|
|
#include <rte_memory.h>
|
|
|
|
#include <rte_eal.h>
|
|
|
|
#include <rte_launch.h>
|
|
|
|
#include <rte_per_lcore.h>
|
|
|
|
#include <rte_lcore.h>
|
|
|
|
#include <rte_debug.h>
|
|
|
|
#include <rte_common.h>
|
|
|
|
#include <rte_spinlock.h>
|
|
|
|
|
|
|
|
#include "malloc_elem.h"
|
|
|
|
#include "malloc_heap.h"
|
|
|
|
|
2014-11-19 12:26:06 +00:00
|
|
|
#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
|
2012-09-04 12:54:00 +00:00
|
|
|
|
|
|
|
/*
|
2017-07-07 00:36:35 +00:00
|
|
|
* Initialize a general malloc_elem header structure
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
malloc_elem_init(struct malloc_elem *elem,
|
2015-07-15 16:32:21 +00:00
|
|
|
struct malloc_heap *heap, const struct rte_memseg *ms, size_t size)
|
2012-09-04 12:54:00 +00:00
|
|
|
{
|
|
|
|
elem->heap = heap;
|
2015-07-15 16:32:21 +00:00
|
|
|
elem->ms = ms;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
elem->prev = NULL;
|
2018-04-11 12:29:38 +00:00
|
|
|
elem->next = NULL;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
memset(&elem->free_list, 0, sizeof(elem->free_list));
|
2012-09-04 12:54:00 +00:00
|
|
|
elem->state = ELEM_FREE;
|
|
|
|
elem->size = size;
|
|
|
|
elem->pad = 0;
|
|
|
|
set_header(elem);
|
|
|
|
set_trailer(elem);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2018-04-11 12:29:38 +00:00
|
|
|
malloc_elem_insert(struct malloc_elem *elem)
|
2012-09-04 12:54:00 +00:00
|
|
|
{
|
2018-04-11 12:29:38 +00:00
|
|
|
struct malloc_elem *prev_elem, *next_elem;
|
|
|
|
struct malloc_heap *heap = elem->heap;
|
|
|
|
|
|
|
|
if (heap->first == NULL && heap->last == NULL) {
|
|
|
|
/* if empty heap */
|
|
|
|
heap->first = elem;
|
|
|
|
heap->last = elem;
|
|
|
|
prev_elem = NULL;
|
|
|
|
next_elem = NULL;
|
|
|
|
} else if (elem < heap->first) {
|
|
|
|
/* if lower than start */
|
|
|
|
prev_elem = NULL;
|
|
|
|
next_elem = heap->first;
|
|
|
|
heap->first = elem;
|
|
|
|
} else if (elem > heap->last) {
|
|
|
|
/* if higher than end */
|
|
|
|
prev_elem = heap->last;
|
|
|
|
next_elem = NULL;
|
|
|
|
heap->last = elem;
|
|
|
|
} else {
|
|
|
|
/* the new memory is somewhere inbetween start and end */
|
|
|
|
uint64_t dist_from_start, dist_from_end;
|
|
|
|
|
|
|
|
dist_from_end = RTE_PTR_DIFF(heap->last, elem);
|
|
|
|
dist_from_start = RTE_PTR_DIFF(elem, heap->first);
|
|
|
|
|
|
|
|
/* check which is closer, and find closest list entries */
|
|
|
|
if (dist_from_start < dist_from_end) {
|
|
|
|
prev_elem = heap->first;
|
|
|
|
while (prev_elem->next < elem)
|
|
|
|
prev_elem = prev_elem->next;
|
|
|
|
next_elem = prev_elem->next;
|
|
|
|
} else {
|
|
|
|
next_elem = heap->last;
|
|
|
|
while (next_elem->prev > elem)
|
|
|
|
next_elem = next_elem->prev;
|
|
|
|
prev_elem = next_elem->prev;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* insert new element */
|
|
|
|
elem->prev = prev_elem;
|
|
|
|
elem->next = next_elem;
|
|
|
|
if (prev_elem)
|
|
|
|
prev_elem->next = elem;
|
|
|
|
if (next_elem)
|
|
|
|
next_elem->prev = elem;
|
2012-09-04 12:54:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* calculate the starting point of where data of the requested size
|
|
|
|
* and alignment would fit in the current element. If the data doesn't
|
|
|
|
* fit, return NULL.
|
|
|
|
*/
|
|
|
|
static void *
|
2015-07-15 16:32:21 +00:00
|
|
|
elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
|
|
|
|
size_t bound)
|
2012-09-04 12:54:00 +00:00
|
|
|
{
|
2015-07-15 16:32:21 +00:00
|
|
|
const size_t bmask = ~(bound - 1);
|
|
|
|
uintptr_t end_pt = (uintptr_t)elem +
|
2012-09-04 12:54:00 +00:00
|
|
|
elem->size - MALLOC_ELEM_TRAILER_LEN;
|
2015-07-15 16:32:21 +00:00
|
|
|
uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
|
|
|
|
uintptr_t new_elem_start;
|
|
|
|
|
|
|
|
/* check boundary */
|
|
|
|
if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
|
|
|
|
end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
|
|
|
|
new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
|
malloc: fix end for bounded elements
In cases when alignment is bigger than boundary, we may incorrectly
calculate end of a bounded malloc element.
Consider this: suppose we are allocating a bounded malloc element
that should be of 128 bytes in size, bounded to 128 bytes and
aligned on a 256-byte boundary. Suppose our malloc element ends
at 0x140 - that is, 256 plus one cacheline.
So, right at the start, we are aligning our new_data_start to
include the required element size, and to be aligned on a specified
boundary - so new_data_start becomes 0. This fails the following
bounds check, because our element cannot go above 128 bytes from
the start, and we are at 320. So, we enter the bounds handling
branch.
While we're in there, we are aligning end_pt to our boundedness
requirement of 128 byte, and end up with 0x100 (since 256 is
128-byte aligned). We recalculate new_data_size and it stays at
0, however our end is at 0x100, which is beyond the 128 byte
boundary, and we report inability to reserve a bounded element
when we could have.
This patch adds an end_pt recalculation after new_data_start
adjustment - we already know that size <= bound, so we can do it
safely - and we then correctly report that we can, in fact, try
using this element for bounded malloc allocation.
Fixes: fafcc11985a2 ("mem: rework memzone to be allocated by malloc")
Cc: stable@dpdk.org
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
2017-12-21 16:54:24 +00:00
|
|
|
end_pt = new_data_start + size;
|
2015-07-15 16:32:21 +00:00
|
|
|
if (((end_pt - 1) & bmask) != (new_data_start & bmask))
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
|
2012-09-04 12:54:00 +00:00
|
|
|
|
|
|
|
/* if the new start point is before the exist start, it won't fit */
|
|
|
|
return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* use elem_start_pt to determine if we get meet the size and
|
|
|
|
* alignment request from the current element
|
|
|
|
*/
|
|
|
|
int
|
2015-07-15 16:32:21 +00:00
|
|
|
malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
|
|
|
|
size_t bound)
|
2012-09-04 12:54:00 +00:00
|
|
|
{
|
2015-07-15 16:32:21 +00:00
|
|
|
return elem_start_pt(elem, size, align, bound) != NULL;
|
2012-09-04 12:54:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* split an existing element into two smaller elements at the given
|
|
|
|
* split_pt parameter.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
|
|
|
|
{
|
2018-04-11 12:29:38 +00:00
|
|
|
struct malloc_elem *next_elem = elem->next;
|
2015-07-15 16:32:21 +00:00
|
|
|
const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
|
|
|
|
const size_t new_elem_size = elem->size - old_elem_size;
|
2012-09-04 12:54:00 +00:00
|
|
|
|
2015-07-15 16:32:21 +00:00
|
|
|
malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
|
2012-09-04 12:54:00 +00:00
|
|
|
split_pt->prev = elem;
|
2018-04-11 12:29:38 +00:00
|
|
|
split_pt->next = next_elem;
|
|
|
|
if (next_elem)
|
|
|
|
next_elem->prev = split_pt;
|
|
|
|
else
|
|
|
|
elem->heap->last = split_pt;
|
|
|
|
elem->next = split_pt;
|
2012-09-04 12:54:00 +00:00
|
|
|
elem->size = old_elem_size;
|
|
|
|
set_trailer(elem);
|
|
|
|
}
|
|
|
|
|
2018-04-11 12:29:38 +00:00
|
|
|
/*
|
|
|
|
* our malloc heap is a doubly linked list, so doubly remove our element.
|
|
|
|
*/
|
|
|
|
static void __rte_unused
|
|
|
|
remove_elem(struct malloc_elem *elem)
|
|
|
|
{
|
|
|
|
struct malloc_elem *next, *prev;
|
|
|
|
next = elem->next;
|
|
|
|
prev = elem->prev;
|
|
|
|
|
|
|
|
if (next)
|
|
|
|
next->prev = prev;
|
|
|
|
else
|
|
|
|
elem->heap->last = prev;
|
|
|
|
if (prev)
|
|
|
|
prev->next = next;
|
|
|
|
else
|
|
|
|
elem->heap->first = next;
|
|
|
|
|
|
|
|
elem->prev = NULL;
|
|
|
|
elem->next = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
next_elem_is_adjacent(struct malloc_elem *elem)
|
|
|
|
{
|
|
|
|
return elem->next == RTE_PTR_ADD(elem, elem->size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
prev_elem_is_adjacent(struct malloc_elem *elem)
|
|
|
|
{
|
|
|
|
return elem == RTE_PTR_ADD(elem->prev, elem->prev->size);
|
|
|
|
}
|
|
|
|
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
/*
|
|
|
|
* Given an element size, compute its freelist index.
|
|
|
|
* We free an element into the freelist containing similarly-sized elements.
|
|
|
|
* We try to allocate elements starting with the freelist containing
|
|
|
|
* similarly-sized elements, and if necessary, we search freelists
|
|
|
|
* containing larger elements.
|
|
|
|
*
|
|
|
|
* Example element size ranges for a heap with five free lists:
|
|
|
|
* heap->free_head[0] - (0 , 2^8]
|
|
|
|
* heap->free_head[1] - (2^8 , 2^10]
|
|
|
|
* heap->free_head[2] - (2^10 ,2^12]
|
|
|
|
* heap->free_head[3] - (2^12, 2^14]
|
|
|
|
* heap->free_head[4] - (2^14, MAX_SIZE]
|
|
|
|
*/
|
|
|
|
size_t
|
|
|
|
malloc_elem_free_list_index(size_t size)
|
|
|
|
{
|
|
|
|
#define MALLOC_MINSIZE_LOG2 8
|
|
|
|
#define MALLOC_LOG2_INCREMENT 2
|
|
|
|
|
|
|
|
size_t log2;
|
|
|
|
size_t index;
|
|
|
|
|
|
|
|
if (size <= (1UL << MALLOC_MINSIZE_LOG2))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Find next power of 2 >= size. */
|
|
|
|
log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
|
|
|
|
|
|
|
|
/* Compute freelist index, based on log2(size). */
|
|
|
|
index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
|
|
|
|
MALLOC_LOG2_INCREMENT;
|
|
|
|
|
2016-01-27 13:58:30 +00:00
|
|
|
return index <= RTE_HEAP_NUM_FREELISTS-1?
|
|
|
|
index: RTE_HEAP_NUM_FREELISTS-1;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add the specified element to its heap's free list.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
malloc_elem_free_list_insert(struct malloc_elem *elem)
|
|
|
|
{
|
2015-07-15 16:32:21 +00:00
|
|
|
size_t idx;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
|
2015-07-15 16:32:21 +00:00
|
|
|
idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
elem->state = ELEM_FREE;
|
|
|
|
LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove the specified element from its heap's free list.
|
|
|
|
*/
|
2018-04-11 12:29:42 +00:00
|
|
|
void
|
|
|
|
malloc_elem_free_list_remove(struct malloc_elem *elem)
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
{
|
|
|
|
LIST_REMOVE(elem, free_list);
|
|
|
|
}
|
|
|
|
|
2012-09-04 12:54:00 +00:00
|
|
|
/*
|
|
|
|
* reserve a block of data in an existing malloc_elem. If the malloc_elem
|
|
|
|
* is much larger than the data block requested, we split the element in two.
|
|
|
|
* This function is only called from malloc_heap_alloc so parameter checking
|
|
|
|
* is not done here, as it's done there previously.
|
|
|
|
*/
|
|
|
|
struct malloc_elem *
|
2015-07-15 16:32:21 +00:00
|
|
|
malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
|
|
|
|
size_t bound)
|
2012-09-04 12:54:00 +00:00
|
|
|
{
|
2015-07-15 16:32:21 +00:00
|
|
|
struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound);
|
|
|
|
const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
|
|
|
|
const size_t trailer_size = elem->size - old_elem_size - size -
|
|
|
|
MALLOC_ELEM_OVERHEAD;
|
|
|
|
|
2018-04-11 12:29:42 +00:00
|
|
|
malloc_elem_free_list_remove(elem);
|
2012-09-04 12:54:00 +00:00
|
|
|
|
2015-07-15 16:32:21 +00:00
|
|
|
if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
|
|
|
|
/* split it, too much free space after elem */
|
|
|
|
struct malloc_elem *new_free_elem =
|
|
|
|
RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
|
|
|
|
|
|
|
|
split_elem(elem, new_free_elem);
|
|
|
|
malloc_elem_free_list_insert(new_free_elem);
|
2018-04-11 12:29:38 +00:00
|
|
|
|
|
|
|
if (elem == elem->heap->last)
|
|
|
|
elem->heap->last = new_free_elem;
|
2015-07-15 16:32:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
|
2012-09-04 12:54:00 +00:00
|
|
|
/* don't split it, pad the element instead */
|
|
|
|
elem->state = ELEM_BUSY;
|
|
|
|
elem->pad = old_elem_size;
|
|
|
|
|
|
|
|
/* put a dummy header in padding, to point to real element header */
|
2017-07-07 00:36:35 +00:00
|
|
|
if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything
|
2012-09-04 12:54:00 +00:00
|
|
|
* is cache-line aligned */
|
|
|
|
new_elem->pad = elem->pad;
|
|
|
|
new_elem->state = ELEM_PAD;
|
|
|
|
new_elem->size = elem->size - elem->pad;
|
|
|
|
set_header(new_elem);
|
|
|
|
}
|
|
|
|
|
|
|
|
return new_elem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we are going to split the element in two. The original element
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
* remains free, and the new element is the one allocated.
|
|
|
|
* Re-insert original element, in case its new size makes it
|
|
|
|
* belong on a different list.
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
|
|
|
split_elem(elem, new_elem);
|
|
|
|
new_elem->state = ELEM_BUSY;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
malloc_elem_free_list_insert(elem);
|
2012-09-04 12:54:00 +00:00
|
|
|
|
|
|
|
return new_elem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-11-10 08:24:23 +00:00
|
|
|
* join two struct malloc_elem together. elem1 and elem2 must
|
2012-09-04 12:54:00 +00:00
|
|
|
* be contiguous in memory.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
|
|
|
|
{
|
2018-04-11 12:29:38 +00:00
|
|
|
struct malloc_elem *next = elem2->next;
|
2012-09-04 12:54:00 +00:00
|
|
|
elem1->size += elem2->size;
|
2018-04-11 12:29:38 +00:00
|
|
|
if (next)
|
|
|
|
next->prev = elem1;
|
|
|
|
else
|
|
|
|
elem1->heap->last = elem1;
|
|
|
|
elem1->next = next;
|
|
|
|
}
|
|
|
|
|
2018-04-11 12:29:41 +00:00
|
|
|
struct malloc_elem *
|
|
|
|
malloc_elem_join_adjacent_free(struct malloc_elem *elem)
|
2018-04-11 12:29:38 +00:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* check if next element exists, is adjacent and is free, if so join
|
|
|
|
* with it, need to remove from free list.
|
|
|
|
*/
|
|
|
|
if (elem->next != NULL && elem->next->state == ELEM_FREE &&
|
|
|
|
next_elem_is_adjacent(elem)) {
|
|
|
|
void *erase;
|
|
|
|
|
|
|
|
/* we will want to erase the trailer and header */
|
|
|
|
erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
|
|
|
|
|
|
|
|
/* remove from free list, join to this one */
|
2018-04-11 12:29:42 +00:00
|
|
|
malloc_elem_free_list_remove(elem->next);
|
2018-04-11 12:29:38 +00:00
|
|
|
join_elem(elem, elem->next);
|
|
|
|
|
|
|
|
/* erase header and trailer */
|
|
|
|
memset(erase, 0, MALLOC_ELEM_OVERHEAD);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check if prev element exists, is adjacent and is free, if so join
|
|
|
|
* with it, need to remove from free list.
|
|
|
|
*/
|
|
|
|
if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
|
|
|
|
prev_elem_is_adjacent(elem)) {
|
|
|
|
struct malloc_elem *new_elem;
|
|
|
|
void *erase;
|
|
|
|
|
|
|
|
/* we will want to erase trailer and header */
|
|
|
|
erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
|
|
|
|
|
|
|
|
/* remove from free list, join to this one */
|
2018-04-11 12:29:42 +00:00
|
|
|
malloc_elem_free_list_remove(elem->prev);
|
2018-04-11 12:29:38 +00:00
|
|
|
|
|
|
|
new_elem = elem->prev;
|
|
|
|
join_elem(new_elem, elem);
|
|
|
|
|
|
|
|
/* erase header and trailer */
|
|
|
|
memset(erase, 0, MALLOC_ELEM_OVERHEAD);
|
|
|
|
|
|
|
|
elem = new_elem;
|
|
|
|
}
|
|
|
|
|
|
|
|
return elem;
|
2012-09-04 12:54:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* free a malloc_elem block by adding it to the free list. If the
|
|
|
|
* blocks either immediately before or immediately after newly freed block
|
|
|
|
* are also free, the blocks are merged together.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
malloc_elem_free(struct malloc_elem *elem)
|
|
|
|
{
|
2018-04-11 12:29:38 +00:00
|
|
|
void *ptr;
|
|
|
|
size_t data_len;
|
|
|
|
|
|
|
|
ptr = RTE_PTR_ADD(elem, sizeof(*elem));
|
|
|
|
data_len = elem->size - MALLOC_ELEM_OVERHEAD;
|
|
|
|
|
2018-04-11 12:29:41 +00:00
|
|
|
elem = malloc_elem_join_adjacent_free(elem);
|
2012-09-04 12:54:00 +00:00
|
|
|
|
2016-07-05 11:01:15 +00:00
|
|
|
malloc_elem_free_list_insert(elem);
|
|
|
|
|
2012-12-19 23:00:00 +00:00
|
|
|
/* decrease heap's count of allocated elements */
|
|
|
|
elem->heap->alloc_count--;
|
2016-07-05 11:01:15 +00:00
|
|
|
|
2018-04-11 12:29:38 +00:00
|
|
|
memset(ptr, 0, data_len);
|
2016-07-05 11:01:15 +00:00
|
|
|
|
2012-09-04 12:54:00 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* attempt to resize a malloc_elem by expanding into any free space
|
|
|
|
* immediately after it in memory.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
malloc_elem_resize(struct malloc_elem *elem, size_t size)
|
|
|
|
{
|
2017-06-08 19:12:17 +00:00
|
|
|
const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
|
2018-04-11 12:29:38 +00:00
|
|
|
|
2012-09-04 12:54:00 +00:00
|
|
|
/* if we request a smaller size, then always return ok */
|
2017-06-08 19:12:17 +00:00
|
|
|
if (elem->size >= new_size)
|
2012-09-04 12:54:00 +00:00
|
|
|
return 0;
|
|
|
|
|
2018-04-11 12:29:38 +00:00
|
|
|
/* check if there is a next element, it's free and adjacent */
|
|
|
|
if (!elem->next || elem->next->state != ELEM_FREE ||
|
|
|
|
!next_elem_is_adjacent(elem))
|
2018-04-11 12:29:37 +00:00
|
|
|
return -1;
|
2018-04-11 12:29:38 +00:00
|
|
|
if (elem->size + elem->next->size < new_size)
|
2018-04-11 12:29:37 +00:00
|
|
|
return -1;
|
2012-09-04 12:54:00 +00:00
|
|
|
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
/* we now know the element fits, so remove from free list,
|
|
|
|
* join the two
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
2018-04-11 12:29:42 +00:00
|
|
|
malloc_elem_free_list_remove(elem->next);
|
2018-04-11 12:29:38 +00:00
|
|
|
join_elem(elem, elem->next);
|
2012-09-04 12:54:00 +00:00
|
|
|
|
2017-06-08 19:12:17 +00:00
|
|
|
if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
|
2012-09-04 12:54:00 +00:00
|
|
|
/* now we have a big block together. Lets cut it down a bit, by splitting */
|
|
|
|
struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
|
2014-11-19 12:26:06 +00:00
|
|
|
split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
|
2012-09-04 12:54:00 +00:00
|
|
|
split_elem(elem, split_pt);
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
malloc_elem_free_list_insert(split_pt);
|
2012-09-04 12:54:00 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2018-04-11 12:29:39 +00:00
|
|
|
|
|
|
|
static inline const char *
|
|
|
|
elem_state_to_str(enum elem_state state)
|
|
|
|
{
|
|
|
|
switch (state) {
|
|
|
|
case ELEM_PAD:
|
|
|
|
return "PAD";
|
|
|
|
case ELEM_BUSY:
|
|
|
|
return "BUSY";
|
|
|
|
case ELEM_FREE:
|
|
|
|
return "FREE";
|
|
|
|
}
|
|
|
|
return "ERROR";
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
malloc_elem_dump(const struct malloc_elem *elem, FILE *f)
|
|
|
|
{
|
|
|
|
fprintf(f, "Malloc element at %p (%s)\n", elem,
|
|
|
|
elem_state_to_str(elem->state));
|
|
|
|
fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad);
|
|
|
|
fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next);
|
|
|
|
}
|