2017-12-19 15:49:03 +00:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
* Copyright(c) 2010-2014 Intel Corporation
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdio.h>
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
#include <string.h>
|
2012-09-04 12:54:00 +00:00
|
|
|
#include <sys/queue.h>
|
|
|
|
|
|
|
|
#include <rte_memory.h>
|
|
|
|
#include <rte_eal.h>
|
|
|
|
#include <rte_launch.h>
|
|
|
|
#include <rte_per_lcore.h>
|
|
|
|
#include <rte_lcore.h>
|
|
|
|
#include <rte_debug.h>
|
|
|
|
#include <rte_common.h>
|
|
|
|
#include <rte_spinlock.h>
|
|
|
|
|
|
|
|
#include "malloc_elem.h"
|
|
|
|
#include "malloc_heap.h"
|
|
|
|
|
2014-11-19 12:26:06 +00:00
|
|
|
#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
|
2012-09-04 12:54:00 +00:00
|
|
|
|
|
|
|
/*
|
2017-07-07 00:36:35 +00:00
|
|
|
* Initialize a general malloc_elem header structure
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
malloc_elem_init(struct malloc_elem *elem,
|
2015-07-15 16:32:21 +00:00
|
|
|
struct malloc_heap *heap, const struct rte_memseg *ms, size_t size)
|
2012-09-04 12:54:00 +00:00
|
|
|
{
|
|
|
|
elem->heap = heap;
|
2015-07-15 16:32:21 +00:00
|
|
|
elem->ms = ms;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
elem->prev = NULL;
|
|
|
|
memset(&elem->free_list, 0, sizeof(elem->free_list));
|
2012-09-04 12:54:00 +00:00
|
|
|
elem->state = ELEM_FREE;
|
|
|
|
elem->size = size;
|
|
|
|
elem->pad = 0;
|
|
|
|
set_header(elem);
|
|
|
|
set_trailer(elem);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-07-07 00:36:35 +00:00
|
|
|
* Initialize a dummy malloc_elem header for the end-of-memseg marker
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
|
|
|
|
{
|
2015-07-15 16:32:21 +00:00
|
|
|
malloc_elem_init(elem, prev->heap, prev->ms, 0);
|
2012-09-04 12:54:00 +00:00
|
|
|
elem->prev = prev;
|
|
|
|
elem->state = ELEM_BUSY; /* mark busy so its never merged */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* calculate the starting point of where data of the requested size
|
|
|
|
* and alignment would fit in the current element. If the data doesn't
|
|
|
|
* fit, return NULL.
|
|
|
|
*/
|
|
|
|
static void *
|
2015-07-15 16:32:21 +00:00
|
|
|
elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
|
|
|
|
size_t bound)
|
2012-09-04 12:54:00 +00:00
|
|
|
{
|
2015-07-15 16:32:21 +00:00
|
|
|
const size_t bmask = ~(bound - 1);
|
|
|
|
uintptr_t end_pt = (uintptr_t)elem +
|
2012-09-04 12:54:00 +00:00
|
|
|
elem->size - MALLOC_ELEM_TRAILER_LEN;
|
2015-07-15 16:32:21 +00:00
|
|
|
uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
|
|
|
|
uintptr_t new_elem_start;
|
|
|
|
|
|
|
|
/* check boundary */
|
|
|
|
if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
|
|
|
|
end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
|
|
|
|
new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
|
|
|
|
if (((end_pt - 1) & bmask) != (new_data_start & bmask))
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
|
2012-09-04 12:54:00 +00:00
|
|
|
|
|
|
|
/* if the new start point is before the exist start, it won't fit */
|
|
|
|
return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* use elem_start_pt to determine if we get meet the size and
|
|
|
|
* alignment request from the current element
|
|
|
|
*/
|
|
|
|
int
|
2015-07-15 16:32:21 +00:00
|
|
|
malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
|
|
|
|
size_t bound)
|
2012-09-04 12:54:00 +00:00
|
|
|
{
|
2015-07-15 16:32:21 +00:00
|
|
|
return elem_start_pt(elem, size, align, bound) != NULL;
|
2012-09-04 12:54:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* split an existing element into two smaller elements at the given
|
|
|
|
* split_pt parameter.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
|
|
|
|
{
|
|
|
|
struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size);
|
2015-07-15 16:32:21 +00:00
|
|
|
const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
|
|
|
|
const size_t new_elem_size = elem->size - old_elem_size;
|
2012-09-04 12:54:00 +00:00
|
|
|
|
2015-07-15 16:32:21 +00:00
|
|
|
malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
|
2012-09-04 12:54:00 +00:00
|
|
|
split_pt->prev = elem;
|
|
|
|
next_elem->prev = split_pt;
|
|
|
|
elem->size = old_elem_size;
|
|
|
|
set_trailer(elem);
|
|
|
|
}
|
|
|
|
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
/*
|
|
|
|
* Given an element size, compute its freelist index.
|
|
|
|
* We free an element into the freelist containing similarly-sized elements.
|
|
|
|
* We try to allocate elements starting with the freelist containing
|
|
|
|
* similarly-sized elements, and if necessary, we search freelists
|
|
|
|
* containing larger elements.
|
|
|
|
*
|
|
|
|
* Example element size ranges for a heap with five free lists:
|
|
|
|
* heap->free_head[0] - (0 , 2^8]
|
|
|
|
* heap->free_head[1] - (2^8 , 2^10]
|
|
|
|
* heap->free_head[2] - (2^10 ,2^12]
|
|
|
|
* heap->free_head[3] - (2^12, 2^14]
|
|
|
|
* heap->free_head[4] - (2^14, MAX_SIZE]
|
|
|
|
*/
|
|
|
|
size_t
|
|
|
|
malloc_elem_free_list_index(size_t size)
|
|
|
|
{
|
|
|
|
#define MALLOC_MINSIZE_LOG2 8
|
|
|
|
#define MALLOC_LOG2_INCREMENT 2
|
|
|
|
|
|
|
|
size_t log2;
|
|
|
|
size_t index;
|
|
|
|
|
|
|
|
if (size <= (1UL << MALLOC_MINSIZE_LOG2))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Find next power of 2 >= size. */
|
|
|
|
log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
|
|
|
|
|
|
|
|
/* Compute freelist index, based on log2(size). */
|
|
|
|
index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
|
|
|
|
MALLOC_LOG2_INCREMENT;
|
|
|
|
|
2016-01-27 13:58:30 +00:00
|
|
|
return index <= RTE_HEAP_NUM_FREELISTS-1?
|
|
|
|
index: RTE_HEAP_NUM_FREELISTS-1;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add the specified element to its heap's free list.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
malloc_elem_free_list_insert(struct malloc_elem *elem)
|
|
|
|
{
|
2015-07-15 16:32:21 +00:00
|
|
|
size_t idx;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
|
2015-07-15 16:32:21 +00:00
|
|
|
idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
elem->state = ELEM_FREE;
|
|
|
|
LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove the specified element from its heap's free list.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
elem_free_list_remove(struct malloc_elem *elem)
|
|
|
|
{
|
|
|
|
LIST_REMOVE(elem, free_list);
|
|
|
|
}
|
|
|
|
|
2012-09-04 12:54:00 +00:00
|
|
|
/*
|
|
|
|
* reserve a block of data in an existing malloc_elem. If the malloc_elem
|
|
|
|
* is much larger than the data block requested, we split the element in two.
|
|
|
|
* This function is only called from malloc_heap_alloc so parameter checking
|
|
|
|
* is not done here, as it's done there previously.
|
|
|
|
*/
|
|
|
|
struct malloc_elem *
|
2015-07-15 16:32:21 +00:00
|
|
|
malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
|
|
|
|
size_t bound)
|
2012-09-04 12:54:00 +00:00
|
|
|
{
|
2015-07-15 16:32:21 +00:00
|
|
|
struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound);
|
|
|
|
const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
|
|
|
|
const size_t trailer_size = elem->size - old_elem_size - size -
|
|
|
|
MALLOC_ELEM_OVERHEAD;
|
|
|
|
|
|
|
|
elem_free_list_remove(elem);
|
2012-09-04 12:54:00 +00:00
|
|
|
|
2015-07-15 16:32:21 +00:00
|
|
|
if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
|
|
|
|
/* split it, too much free space after elem */
|
|
|
|
struct malloc_elem *new_free_elem =
|
|
|
|
RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
|
|
|
|
|
|
|
|
split_elem(elem, new_free_elem);
|
|
|
|
malloc_elem_free_list_insert(new_free_elem);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
|
2012-09-04 12:54:00 +00:00
|
|
|
/* don't split it, pad the element instead */
|
|
|
|
elem->state = ELEM_BUSY;
|
|
|
|
elem->pad = old_elem_size;
|
|
|
|
|
|
|
|
/* put a dummy header in padding, to point to real element header */
|
2017-07-07 00:36:35 +00:00
|
|
|
if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything
|
2012-09-04 12:54:00 +00:00
|
|
|
* is cache-line aligned */
|
|
|
|
new_elem->pad = elem->pad;
|
|
|
|
new_elem->state = ELEM_PAD;
|
|
|
|
new_elem->size = elem->size - elem->pad;
|
|
|
|
set_header(new_elem);
|
|
|
|
}
|
|
|
|
|
|
|
|
return new_elem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we are going to split the element in two. The original element
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
* remains free, and the new element is the one allocated.
|
|
|
|
* Re-insert original element, in case its new size makes it
|
|
|
|
* belong on a different list.
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
|
|
|
split_elem(elem, new_elem);
|
|
|
|
new_elem->state = ELEM_BUSY;
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
malloc_elem_free_list_insert(elem);
|
2012-09-04 12:54:00 +00:00
|
|
|
|
|
|
|
return new_elem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-11-10 08:24:23 +00:00
|
|
|
* join two struct malloc_elem together. elem1 and elem2 must
|
2012-09-04 12:54:00 +00:00
|
|
|
* be contiguous in memory.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
|
|
|
|
{
|
|
|
|
struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size);
|
|
|
|
elem1->size += elem2->size;
|
|
|
|
next->prev = elem1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* free a malloc_elem block by adding it to the free list. If the
|
|
|
|
* blocks either immediately before or immediately after newly freed block
|
|
|
|
* are also free, the blocks are merged together.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
malloc_elem_free(struct malloc_elem *elem)
|
|
|
|
{
|
|
|
|
if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
rte_spinlock_lock(&(elem->heap->lock));
|
2017-09-09 07:33:19 +00:00
|
|
|
size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN;
|
2016-07-05 11:01:15 +00:00
|
|
|
uint8_t *ptr = (uint8_t *)&elem[1];
|
2012-09-04 12:54:00 +00:00
|
|
|
struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
|
|
|
|
if (next->state == ELEM_FREE){
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
/* remove from free list, join to this one */
|
|
|
|
elem_free_list_remove(next);
|
2012-09-04 12:54:00 +00:00
|
|
|
join_elem(elem, next);
|
2017-09-09 07:33:19 +00:00
|
|
|
sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
|
2012-09-04 12:54:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* check if previous element is free, if so join with it and return,
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
* need to re-insert in free list, as that element's size is changing
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
|
|
|
|
elem_free_list_remove(elem->prev);
|
2012-09-04 12:54:00 +00:00
|
|
|
join_elem(elem->prev, elem);
|
2017-09-09 07:33:19 +00:00
|
|
|
sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
|
|
|
|
ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
|
2016-07-05 11:01:15 +00:00
|
|
|
elem = elem->prev;
|
2012-09-04 12:54:00 +00:00
|
|
|
}
|
2016-07-05 11:01:15 +00:00
|
|
|
malloc_elem_free_list_insert(elem);
|
|
|
|
|
2012-12-19 23:00:00 +00:00
|
|
|
/* decrease heap's count of allocated elements */
|
|
|
|
elem->heap->alloc_count--;
|
2016-07-05 11:01:15 +00:00
|
|
|
|
|
|
|
memset(ptr, 0, sz);
|
|
|
|
|
2012-09-04 12:54:00 +00:00
|
|
|
rte_spinlock_unlock(&(elem->heap->lock));
|
2012-12-19 23:00:00 +00:00
|
|
|
|
2012-09-04 12:54:00 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* attempt to resize a malloc_elem by expanding into any free space
|
|
|
|
* immediately after it in memory.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
malloc_elem_resize(struct malloc_elem *elem, size_t size)
|
|
|
|
{
|
2017-06-08 19:12:17 +00:00
|
|
|
const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
|
2012-09-04 12:54:00 +00:00
|
|
|
/* if we request a smaller size, then always return ok */
|
2017-06-08 19:12:17 +00:00
|
|
|
if (elem->size >= new_size)
|
2012-09-04 12:54:00 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
|
|
|
|
rte_spinlock_lock(&elem->heap->lock);
|
|
|
|
if (next ->state != ELEM_FREE)
|
|
|
|
goto err_return;
|
2017-06-08 19:12:17 +00:00
|
|
|
if (elem->size + next->size < new_size)
|
2012-09-04 12:54:00 +00:00
|
|
|
goto err_return;
|
|
|
|
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
/* we now know the element fits, so remove from free list,
|
|
|
|
* join the two
|
2012-09-04 12:54:00 +00:00
|
|
|
*/
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
elem_free_list_remove(next);
|
2012-09-04 12:54:00 +00:00
|
|
|
join_elem(elem, next);
|
|
|
|
|
2017-06-08 19:12:17 +00:00
|
|
|
if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
|
2012-09-04 12:54:00 +00:00
|
|
|
/* now we have a big block together. Lets cut it down a bit, by splitting */
|
|
|
|
struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
|
2014-11-19 12:26:06 +00:00
|
|
|
split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
|
2012-09-04 12:54:00 +00:00
|
|
|
split_elem(elem, split_pt);
|
malloc: fix linear complexity
Problems with lib rte_malloc:
1. Rte_malloc searches a heap's entire free list looking for the best
fit, resulting in linear complexity.
2. Heaps store free blocks in a singly-linked list, resulting in
linear complexity when rte_free needs to remove an adjacent block.
3. The library inserts and removes free blocks with ad hoc, in-line
code, rather than using linked-list functions or macros.
4. The library wastes potential small blocks of size 64 and 128 bytes
(plus overhead of 64 bytes) as padding when reusing free blocks or
resizing allocated blocks.
This patch addresses those problems as follows:
1. Replace single free list with a handful of free lists. Each free
list contains blocks of a specified size range, for example:
list[0]: (0 , 2^8]
list[1]: (2^8 , 2^10]
list[2]: (2^10, 2^12]
list[3]: (2^12, 2^14]
list[4]: (2^14, MAX_SIZE]
When allocating a block, start at the first list that can contain
a big enough block. Search subsequent lists, if necessary.
Terminate the search as soon as we find a block that is big enough.
2. Use doubly-linked lists, so that we can remove free blocks in
constant time.
3. Use BSD LIST macros, as defined in sys/queue.h and the QUEUE(3)
man page.
4. Change code to utilize small blocks of data size 64 and 128, when
splitting larger blocks.
Signed-off-by: Robert Sanford <rsanford2@gmail.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-06-23 21:17:09 +00:00
|
|
|
malloc_elem_free_list_insert(split_pt);
|
2012-09-04 12:54:00 +00:00
|
|
|
}
|
|
|
|
rte_spinlock_unlock(&elem->heap->lock);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_return:
|
|
|
|
rte_spinlock_unlock(&elem->heap->lock);
|
|
|
|
return -1;
|
|
|
|
}
|