Implement chunk allocation/deallocation hysteresis by caching one spare
chunk per arena, rather than immediately deallocating all unused chunks. This fixes a potential performance issue when allocating/deallocating an object of size (4kB..1MB] in a loop. Reported by: davidxu
This commit is contained in:
parent
6c5b167508
commit
ee0ab7cd86
@ -627,6 +627,17 @@ struct arena_s {
|
|||||||
*/
|
*/
|
||||||
arena_chunk_tree_t chunks;
|
arena_chunk_tree_t chunks;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In order to avoid rapid chunk allocation/deallocation when an arena
|
||||||
|
* oscillates right on the cusp of needing a new chunk, cache the most
|
||||||
|
* recently freed chunk. This caching is disabled by opt_hint.
|
||||||
|
*
|
||||||
|
* There is one spare chunk per arena, rather than one spare total, in
|
||||||
|
* order to avoid interactions between multiple threads that could make
|
||||||
|
* a single spare inadequate.
|
||||||
|
*/
|
||||||
|
arena_chunk_t *spare;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* bins is used to store rings of free regions of the following sizes,
|
* bins is used to store rings of free regions of the following sizes,
|
||||||
* assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
|
* assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
|
||||||
@ -839,7 +850,7 @@ static arena_t *choose_arena_hard(void);
|
|||||||
static void arena_run_split(arena_t *arena, arena_run_t *run, bool large,
|
static void arena_run_split(arena_t *arena, arena_run_t *run, bool large,
|
||||||
size_t size);
|
size_t size);
|
||||||
static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
|
static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
|
||||||
static void arena_chunk_dealloc(arena_chunk_t *chunk);
|
static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
|
||||||
static void arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
|
static void arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
|
||||||
arena_run_t *run);
|
arena_run_t *run);
|
||||||
static void arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
|
static void arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
|
||||||
@ -1776,78 +1787,101 @@ arena_chunk_alloc(arena_t *arena)
|
|||||||
unsigned log2_run_pages, run_pages;
|
unsigned log2_run_pages, run_pages;
|
||||||
size_t header_size;
|
size_t header_size;
|
||||||
|
|
||||||
chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
|
if (arena->spare != NULL) {
|
||||||
if (chunk == NULL)
|
chunk = arena->spare;
|
||||||
return (NULL);
|
arena->spare = NULL;
|
||||||
|
|
||||||
chunk->arena = arena;
|
RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
|
||||||
|
} else {
|
||||||
|
chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
|
||||||
|
if (chunk == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
|
chunk->arena = arena;
|
||||||
|
|
||||||
/*
|
RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
|
||||||
* Claim that no pages are in use, since the header is merely overhead.
|
|
||||||
*/
|
|
||||||
chunk->pages_used = 0;
|
|
||||||
|
|
||||||
memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
|
/*
|
||||||
|
* Claim that no pages are in use, since the header is merely
|
||||||
|
* overhead.
|
||||||
|
*/
|
||||||
|
chunk->pages_used = 0;
|
||||||
|
|
||||||
header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen]
|
memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
|
||||||
- (uintptr_t)chunk);
|
|
||||||
if (header_size % pagesize != 0) {
|
|
||||||
/* Round up to the nearest page boundary. */
|
|
||||||
header_size += pagesize - (header_size % pagesize);
|
|
||||||
}
|
|
||||||
|
|
||||||
header_npages = header_size >> pagesize_2pow;
|
header_size =
|
||||||
pow2_header_npages = pow2_ceil(header_npages);
|
(size_t)((uintptr_t)&chunk->map[arena_chunk_maplen] -
|
||||||
|
(uintptr_t)chunk);
|
||||||
/*
|
if (header_size % pagesize != 0) {
|
||||||
* Iteratively mark runs as in use, until we've spoken for the entire
|
/* Round up to the nearest page boundary. */
|
||||||
* header.
|
header_size += pagesize - (header_size % pagesize);
|
||||||
*/
|
|
||||||
map_offset = 0;
|
|
||||||
for (i = 0; header_npages > 0; i++) {
|
|
||||||
if ((pow2_header_npages >> i) <= header_npages) {
|
|
||||||
for (j = 0; j < (pow2_header_npages >> i); j++) {
|
|
||||||
chunk->map[map_offset + j].free = false;
|
|
||||||
chunk->map[map_offset + j].large = false;
|
|
||||||
chunk->map[map_offset + j].npages =
|
|
||||||
(pow2_header_npages >> i);
|
|
||||||
chunk->map[map_offset + j].pos = j;
|
|
||||||
}
|
|
||||||
header_npages -= (pow2_header_npages >> i);
|
|
||||||
map_offset += (pow2_header_npages >> i);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
header_npages = header_size >> pagesize_2pow;
|
||||||
* Finish initializing map. The chunk header takes up some space at
|
pow2_header_npages = pow2_ceil(header_npages);
|
||||||
* the beginning of the chunk, which we just took care of by
|
|
||||||
* "allocating" the leading pages.
|
|
||||||
*/
|
|
||||||
while (map_offset < (chunk_size >> pagesize_2pow)) {
|
|
||||||
log2_run_pages = ffs(map_offset) - 1;
|
|
||||||
run_pages = (1 << log2_run_pages);
|
|
||||||
|
|
||||||
chunk->map[map_offset].free = true;
|
/*
|
||||||
chunk->map[map_offset].large = false;
|
* Iteratively mark runs as in use, until we've spoken for the
|
||||||
chunk->map[map_offset].npages = run_pages;
|
* entire header.
|
||||||
|
*/
|
||||||
|
map_offset = 0;
|
||||||
|
for (i = 0; header_npages > 0; i++) {
|
||||||
|
if ((pow2_header_npages >> i) <= header_npages) {
|
||||||
|
for (j = 0; j < (pow2_header_npages >> i);
|
||||||
|
j++) {
|
||||||
|
chunk->map[map_offset + j].free =
|
||||||
|
false;
|
||||||
|
chunk->map[map_offset + j].large =
|
||||||
|
false;
|
||||||
|
chunk->map[map_offset + j].npages =
|
||||||
|
(pow2_header_npages >> i);
|
||||||
|
chunk->map[map_offset + j].pos = j;
|
||||||
|
}
|
||||||
|
header_npages -= (pow2_header_npages >> i);
|
||||||
|
map_offset += (pow2_header_npages >> i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
chunk->nfree_runs[log2_run_pages]++;
|
/*
|
||||||
|
* Finish initializing map. The chunk header takes up some
|
||||||
|
* space at the beginning of the chunk, which we just took care
|
||||||
|
* of by "allocating" the leading pages.
|
||||||
|
*/
|
||||||
|
while (map_offset < (chunk_size >> pagesize_2pow)) {
|
||||||
|
log2_run_pages = ffs(map_offset) - 1;
|
||||||
|
run_pages = (1 << log2_run_pages);
|
||||||
|
|
||||||
map_offset += run_pages;
|
chunk->map[map_offset].free = true;
|
||||||
|
chunk->map[map_offset].large = false;
|
||||||
|
chunk->map[map_offset].npages = run_pages;
|
||||||
|
|
||||||
|
chunk->nfree_runs[log2_run_pages]++;
|
||||||
|
|
||||||
|
map_offset += run_pages;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (chunk);
|
return (chunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
arena_chunk_dealloc(arena_chunk_t *chunk)
|
arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove chunk from the chunk tree, regardless of whether this chunk
|
||||||
|
* will be cached, so that the arena does not use it.
|
||||||
|
*/
|
||||||
RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
|
RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
|
||||||
|
|
||||||
chunk_dealloc((void *)chunk, chunk_size);
|
if (opt_hint == false) {
|
||||||
|
if (arena->spare != NULL)
|
||||||
|
chunk_dealloc((void *)arena->spare, chunk_size);
|
||||||
|
arena->spare = chunk;
|
||||||
|
} else {
|
||||||
|
assert(arena->spare == NULL);
|
||||||
|
chunk_dealloc((void *)chunk, chunk_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -2123,7 +2157,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
|
|||||||
/* Free pages, to the extent possible. */
|
/* Free pages, to the extent possible. */
|
||||||
if (chunk->pages_used == 0) {
|
if (chunk->pages_used == 0) {
|
||||||
/* This chunk is completely unused now, so deallocate it. */
|
/* This chunk is completely unused now, so deallocate it. */
|
||||||
arena_chunk_dealloc(chunk);
|
arena_chunk_dealloc(arena, chunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2451,6 +2485,7 @@ arena_new(arena_t *arena)
|
|||||||
|
|
||||||
/* Initialize chunks. */
|
/* Initialize chunks. */
|
||||||
RB_INIT(&arena->chunks);
|
RB_INIT(&arena->chunks);
|
||||||
|
arena->spare = NULL;
|
||||||
|
|
||||||
/* Initialize bins. */
|
/* Initialize bins. */
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user