2006-01-13 18:38:56 +00:00
|
|
|
/*-
|
|
|
|
* Copyright (C) 2006 Jason Evans <jasone@FreeBSD.org>.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice(s), this list of conditions and the following disclaimer as
|
|
|
|
* the first lines of this file unmodified other than the possible
|
|
|
|
* addition of one or more copyright notices.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice(s), this list of conditions and the following disclaimer in
|
|
|
|
* the documentation and/or other materials provided with the
|
|
|
|
* distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
|
|
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
|
|
|
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
|
|
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
|
|
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
2006-03-26 23:37:25 +00:00
|
|
|
* This allocator implementation is designed to provide scalable performance
|
|
|
|
* for multi-threaded programs on multi-processor systems. The following
|
|
|
|
* features are included for this purpose:
|
2006-01-13 18:38:56 +00:00
|
|
|
*
|
|
|
|
* + Multiple arenas are used if there are multiple CPUs, which reduces lock
|
|
|
|
* contention and cache sloshing.
|
|
|
|
*
|
|
|
|
* + Cache line sharing between arenas is avoided for internal data
|
|
|
|
* structures.
|
|
|
|
*
|
2006-03-26 23:37:25 +00:00
|
|
|
* + Memory is managed in chunks and runs (chunks can be split into runs using
|
|
|
|
* a binary buddy scheme), rather than as individual pages. This provides
|
|
|
|
* a constant-time mechanism for associating allocations with particular
|
|
|
|
* arenas.
|
2006-01-13 18:38:56 +00:00
|
|
|
*
|
2006-03-26 23:37:25 +00:00
|
|
|
* Allocation requests are rounded up to the nearest size class, and no record
|
|
|
|
* of the original request size is maintained. Allocations are broken into
|
|
|
|
* categories according to size class. Assuming runtime defaults, 4 kB pages
|
|
|
|
* and a 16 byte quantum, the size classes in each category are as follows:
|
|
|
|
*
|
|
|
|
* |====================================|
|
|
|
|
* | Category | Subcategory | Size |
|
|
|
|
* |====================================|
|
|
|
|
* | Small | Tiny | 2 |
|
|
|
|
* | | | 4 |
|
|
|
|
* | | | 8 |
|
|
|
|
* | |----------------+--------|
|
|
|
|
* | | Quantum-spaced | 16 |
|
|
|
|
* | | | 32 |
|
|
|
|
* | | | 48 |
|
|
|
|
* | | | ... |
|
|
|
|
* | | | 480 |
|
|
|
|
* | | | 496 |
|
|
|
|
* | | | 512 |
|
|
|
|
* | |----------------+--------|
|
|
|
|
* | | Sub-page | 1 kB |
|
|
|
|
* | | | 2 kB |
|
|
|
|
* |====================================|
|
2006-04-04 03:51:47 +00:00
|
|
|
* | Large | 4 kB |
|
2006-03-26 23:37:25 +00:00
|
|
|
* | | 8 kB |
|
|
|
|
* | | 16 kB |
|
|
|
|
* | | ... |
|
|
|
|
* | | 256 kB |
|
|
|
|
* | | 512 kB |
|
|
|
|
* |====================================|
|
2007-03-23 05:05:48 +00:00
|
|
|
* | Huge | 1 MB |
|
|
|
|
* | | 2 MB |
|
|
|
|
* | | 3 MB |
|
2006-03-26 23:37:25 +00:00
|
|
|
* | | ... |
|
|
|
|
* |====================================|
|
|
|
|
*
|
|
|
|
* A different mechanism is used for each category:
|
|
|
|
*
|
|
|
|
* Small : Each size class is segregated into its own set of runs. Each run
|
|
|
|
* maintains a bitmap of which regions are free/allocated.
|
|
|
|
*
|
2006-04-04 03:51:47 +00:00
|
|
|
* Large : Each allocation is backed by a dedicated run. Metadata are stored
|
|
|
|
* in the associated arena chunk header maps.
|
2006-03-26 23:37:25 +00:00
|
|
|
*
|
2006-04-04 03:51:47 +00:00
|
|
|
* Huge : Each allocation is backed by a dedicated contiguous set of chunks.
|
|
|
|
* Metadata are stored in a separate red-black tree.
|
2006-01-13 18:38:56 +00:00
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
*/
|
|
|
|
|
1994-05-27 05:00:24 +00:00
|
|
|
/*
|
2006-01-13 18:38:56 +00:00
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* Ring macros.
|
1994-05-27 05:00:24 +00:00
|
|
|
*
|
2006-01-13 18:38:56 +00:00
|
|
|
*******************************************************************************
|
1994-05-27 05:00:24 +00:00
|
|
|
*/
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Ring definitions. */
|
|
|
|
#define qr(a_type) struct { \
|
|
|
|
a_type *qre_next; \
|
|
|
|
a_type *qre_prev; \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define qr_initializer {NULL, NULL}
|
|
|
|
|
|
|
|
/* Ring functions. */
|
|
|
|
#define qr_new(a_qr, a_field) do { \
|
|
|
|
(a_qr)->a_field.qre_next = (a_qr); \
|
|
|
|
(a_qr)->a_field.qre_prev = (a_qr); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
|
|
|
|
|
|
|
|
#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
|
|
|
|
|
|
|
|
#define qr_before_insert(a_qrelm, a_qr, a_field) do { \
|
|
|
|
(a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \
|
|
|
|
(a_qr)->a_field.qre_next = (a_qrelm); \
|
|
|
|
(a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \
|
|
|
|
(a_qrelm)->a_field.qre_prev = (a_qr); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define qr_after_insert(a_qrelm, a_qr, a_field) do { \
|
|
|
|
(a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \
|
|
|
|
(a_qr)->a_field.qre_prev = (a_qrelm); \
|
|
|
|
(a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \
|
|
|
|
(a_qrelm)->a_field.qre_next = (a_qr); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do { \
|
|
|
|
a_type *t; \
|
|
|
|
(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \
|
|
|
|
(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \
|
|
|
|
t = (a_qr_a)->a_field.qre_prev; \
|
|
|
|
(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \
|
|
|
|
(a_qr_b)->a_field.qre_prev = t; \
|
|
|
|
} while (0)
|
|
|
|
|
2006-04-04 03:51:47 +00:00
|
|
|
/*
|
|
|
|
* qr_meld() and qr_split() are functionally equivalent, so there's no need to
|
|
|
|
* have two copies of the code.
|
|
|
|
*/
|
2006-01-13 18:38:56 +00:00
|
|
|
#define qr_split(a_qr_a, a_qr_b, a_type, a_field) \
|
|
|
|
qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
|
|
|
|
|
|
|
|
#define qr_remove(a_qr, a_field) do { \
|
|
|
|
(a_qr)->a_field.qre_prev->a_field.qre_next \
|
|
|
|
= (a_qr)->a_field.qre_next; \
|
|
|
|
(a_qr)->a_field.qre_next->a_field.qre_prev \
|
|
|
|
= (a_qr)->a_field.qre_prev; \
|
|
|
|
(a_qr)->a_field.qre_next = (a_qr); \
|
|
|
|
(a_qr)->a_field.qre_prev = (a_qr); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define qr_foreach(var, a_qr, a_field) \
|
|
|
|
for ((var) = (a_qr); \
|
|
|
|
(var) != NULL; \
|
|
|
|
(var) = (((var)->a_field.qre_next != (a_qr)) \
|
|
|
|
? (var)->a_field.qre_next : NULL))
|
|
|
|
|
|
|
|
#define qr_reverse_foreach(var, a_qr, a_field) \
|
|
|
|
for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \
|
|
|
|
(var) != NULL; \
|
|
|
|
(var) = (((var) != (a_qr)) \
|
|
|
|
? (var)->a_field.qre_prev : NULL))
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/*
|
2007-03-23 05:05:48 +00:00
|
|
|
* MALLOC_PRODUCTION disables assertions and statistics gathering. It also
|
|
|
|
* defaults the A and J runtime options to off. These settings are appropriate
|
|
|
|
* for production systems.
|
2006-01-27 04:42:10 +00:00
|
|
|
*/
|
2007-03-23 05:05:48 +00:00
|
|
|
/* #define MALLOC_PRODUCTION */
|
2006-01-27 04:42:10 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
#ifndef MALLOC_PRODUCTION
|
2006-01-27 04:42:10 +00:00
|
|
|
# define MALLOC_DEBUG
|
|
|
|
#endif
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2002-03-22 21:53:29 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
#include "libc_private.h"
|
|
|
|
#ifdef MALLOC_DEBUG
|
|
|
|
# define _LOCK_DEBUG
|
|
|
|
#endif
|
|
|
|
#include "spinlock.h"
|
|
|
|
#include "namespace.h"
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/stddef.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/tree.h>
|
|
|
|
#include <sys/uio.h>
|
|
|
|
#include <sys/ktrace.h> /* Must come after several other sys/ includes. */
|
|
|
|
|
|
|
|
#include <machine/atomic.h>
|
|
|
|
#include <machine/cpufunc.h>
|
|
|
|
#include <machine/vmparam.h>
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <pthread.h>
|
|
|
|
#include <sched.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stdio.h>
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#include <stdint.h>
|
2006-01-13 18:38:56 +00:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <strings.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "un-namespace.h"
|
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/* MALLOC_STATS enables statistics calculation. */
|
|
|
|
#ifndef MALLOC_PRODUCTION
|
2006-01-27 04:42:10 +00:00
|
|
|
# define MALLOC_STATS
|
|
|
|
#endif
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifndef MALLOC_DEBUG
|
|
|
|
# ifndef NDEBUG
|
|
|
|
# define NDEBUG
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#ifdef MALLOC_DEBUG
|
|
|
|
/* Disable inlining to make debugging easier. */
|
2006-03-17 09:00:27 +00:00
|
|
|
# define inline
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Size of stack-allocated buffer passed to strerror_r(). */
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#define STRERROR_BUF 64
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
|
|
|
|
#ifdef __i386__
|
|
|
|
# define QUANTUM_2POW_MIN 4
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
# define SIZEOF_PTR_2POW 2
|
2006-01-13 18:38:56 +00:00
|
|
|
# define USE_BRK
|
|
|
|
#endif
|
|
|
|
#ifdef __ia64__
|
|
|
|
# define QUANTUM_2POW_MIN 4
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
# define SIZEOF_PTR_2POW 3
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
#ifdef __alpha__
|
|
|
|
# define QUANTUM_2POW_MIN 4
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
# define SIZEOF_PTR_2POW 3
|
2006-01-13 18:38:56 +00:00
|
|
|
# define NO_TLS
|
|
|
|
#endif
|
|
|
|
#ifdef __sparc64__
|
|
|
|
# define QUANTUM_2POW_MIN 4
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
# define SIZEOF_PTR_2POW 3
|
2006-01-13 18:38:56 +00:00
|
|
|
# define NO_TLS
|
|
|
|
#endif
|
|
|
|
#ifdef __amd64__
|
|
|
|
# define QUANTUM_2POW_MIN 4
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
# define SIZEOF_PTR_2POW 3
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
#ifdef __arm__
|
2006-07-27 19:09:32 +00:00
|
|
|
# define QUANTUM_2POW_MIN 3
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
# define SIZEOF_PTR_2POW 2
|
2006-01-13 18:38:56 +00:00
|
|
|
# define USE_BRK
|
|
|
|
# define NO_TLS
|
|
|
|
#endif
|
|
|
|
#ifdef __powerpc__
|
|
|
|
# define QUANTUM_2POW_MIN 4
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
# define SIZEOF_PTR_2POW 2
|
2006-01-13 18:38:56 +00:00
|
|
|
# define USE_BRK
|
|
|
|
#endif
|
|
|
|
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#define SIZEOF_PTR (1 << SIZEOF_PTR_2POW)
|
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
/* sizeof(int) == (1 << SIZEOF_INT_2POW). */
|
|
|
|
#ifndef SIZEOF_INT_2POW
|
|
|
|
# define SIZEOF_INT_2POW 2
|
|
|
|
#endif
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
|
|
|
|
#if (!defined(PIC) && !defined(NO_TLS))
|
|
|
|
# define NO_TLS
|
|
|
|
#endif
|
1995-09-16 09:28:13 +00:00
|
|
|
|
|
|
|
/*
|
2006-01-13 18:38:56 +00:00
|
|
|
* Size and alignment of memory chunks that are allocated by the OS's virtual
|
|
|
|
* memory system.
|
|
|
|
*/
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#define CHUNK_2POW_DEFAULT 20
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Maximum size of L1 cache line. This is used to avoid cache line aliasing,
|
|
|
|
* so over-estimates are okay (up to a point), but under-estimates will
|
|
|
|
* negatively affect performance.
|
1995-09-16 09:28:13 +00:00
|
|
|
*/
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#define CACHELINE_2POW 6
|
|
|
|
#define CACHELINE ((size_t)(1 << CACHELINE_2POW))
|
|
|
|
|
|
|
|
/* Smallest size class to support. */
|
|
|
|
#define TINY_MIN_2POW 1
|
1996-10-20 13:20:57 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/*
|
|
|
|
* Maximum size class that is a multiple of the quantum, but not (necessarily)
|
|
|
|
* a power of 2. Above this size, allocations are rounded up to the nearest
|
|
|
|
* power of 2.
|
|
|
|
*/
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#define SMALL_MAX_2POW_DEFAULT 9
|
|
|
|
#define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
/*
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
* Maximum desired run header overhead. Runs are sized as small as possible
|
|
|
|
* such that this setting is still honored, without violating other constraints.
|
|
|
|
* The goal is to make runs as small as possible without exceeding a per run
|
|
|
|
* external fragmentation threshold.
|
2006-03-17 09:00:27 +00:00
|
|
|
*
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
* Note that it is possible to set this low enough that it cannot be honored
|
|
|
|
* for some/all object sizes, since there is one bit of header overhead per
|
2007-03-23 05:05:48 +00:00
|
|
|
* object (plus a constant). In such cases, this constraint is relaxed.
|
|
|
|
*
|
|
|
|
* RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
|
|
|
|
* overhead for which RUN_MAX_OVRHD is relaxed.
|
2006-03-17 09:00:27 +00:00
|
|
|
*/
|
2007-03-23 05:05:48 +00:00
|
|
|
#define RUN_MAX_OVRHD 0.015
|
|
|
|
#define RUN_MAX_OVRHD_RELAX 1.5
|
2006-03-17 09:00:27 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#define RUN_MAX_SMALL_2POW 16
|
|
|
|
#define RUN_MAX_SMALL (1 << RUN_MAX_SMALL_2POW)
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/******************************************************************************/
|
1997-05-30 20:39:32 +00:00
|
|
|
|
1996-09-23 19:26:39 +00:00
|
|
|
/*
|
2006-01-13 18:38:56 +00:00
|
|
|
* Mutexes based on spinlocks. We can't use normal pthread mutexes, because
|
|
|
|
* they require malloc()ed memory.
|
1996-09-23 19:26:39 +00:00
|
|
|
*/
|
2006-01-13 18:38:56 +00:00
|
|
|
typedef struct {
|
|
|
|
spinlock_t lock;
|
|
|
|
} malloc_mutex_t;
|
|
|
|
|
2006-04-04 19:46:28 +00:00
|
|
|
/* Set to true once the allocator has been initialized. */
|
2006-01-13 18:38:56 +00:00
|
|
|
static bool malloc_initialized = false;
|
|
|
|
|
2006-04-04 19:46:28 +00:00
|
|
|
/* Used to avoid initialization races. */
|
|
|
|
static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
|
|
|
* Statistics data structures.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
|
|
|
|
typedef struct malloc_bin_stats_s malloc_bin_stats_t;
|
|
|
|
struct malloc_bin_stats_s {
|
|
|
|
/*
|
|
|
|
* Number of allocation requests that corresponded to the size of this
|
|
|
|
* bin.
|
|
|
|
*/
|
|
|
|
uint64_t nrequests;
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Total number of runs created for this bin's size class. */
|
|
|
|
uint64_t nruns;
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* Total number of run promotions/demotions for this bin's size class.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
2006-03-17 09:00:27 +00:00
|
|
|
uint64_t npromote;
|
|
|
|
uint64_t ndemote;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* High-water mark for this bin. */
|
|
|
|
unsigned long highruns;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Current number of runs in this bin. */
|
|
|
|
unsigned long curruns;
|
2006-01-13 18:38:56 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct arena_stats_s arena_stats_t;
|
|
|
|
struct arena_stats_s {
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Number of bytes currently mapped. */
|
|
|
|
size_t mapped;
|
2006-03-26 23:37:25 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Per-size-category statistics. */
|
|
|
|
size_t allocated_small;
|
|
|
|
uint64_t nmalloc_small;
|
|
|
|
uint64_t ndalloc_small;
|
2006-03-30 20:25:52 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
size_t allocated_large;
|
|
|
|
uint64_t nmalloc_large;
|
|
|
|
uint64_t ndalloc_large;
|
2006-01-13 18:38:56 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct chunk_stats_s chunk_stats_t;
|
|
|
|
struct chunk_stats_s {
|
|
|
|
/* Number of chunks that were allocated. */
|
|
|
|
uint64_t nchunks;
|
|
|
|
|
|
|
|
/* High-water mark for number of chunks allocated. */
|
|
|
|
unsigned long highchunks;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Current number of chunks allocated. This value isn't maintained for
|
|
|
|
* any other purpose, so keep track of it in order to be able to set
|
|
|
|
* highchunks.
|
|
|
|
*/
|
|
|
|
unsigned long curchunks;
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif /* #ifdef MALLOC_STATS */
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
|
|
|
* Chunk data structures.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Tree of chunks. */
|
|
|
|
typedef struct chunk_node_s chunk_node_t;
|
|
|
|
struct chunk_node_s {
|
|
|
|
/* Linkage for the chunk tree. */
|
|
|
|
RB_ENTRY(chunk_node_s) link;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Pointer to the chunk that this tree node is responsible for. In some
|
|
|
|
* (but certainly not all) cases, this data structure is placed at the
|
|
|
|
* beginning of the corresponding chunk, so this field may point to this
|
|
|
|
* node.
|
|
|
|
*/
|
|
|
|
void *chunk;
|
|
|
|
|
|
|
|
/* Total chunk size. */
|
|
|
|
size_t size;
|
|
|
|
};
|
|
|
|
typedef struct chunk_tree_s chunk_tree_t;
|
|
|
|
RB_HEAD(chunk_tree_s, chunk_node_s);
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* Arena data structures.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
typedef struct arena_s arena_t;
|
|
|
|
typedef struct arena_bin_s arena_bin_t;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
typedef struct arena_chunk_map_s arena_chunk_map_t;
|
|
|
|
struct arena_chunk_map_s {
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Number of pages in run. */
|
|
|
|
uint32_t npages;
|
|
|
|
/*
|
|
|
|
* Position within run. For a free run, this is POS_FREE for the first
|
|
|
|
* and last pages. The POS_FREE special value makes it possible to
|
|
|
|
* quickly coalesce free runs.
|
|
|
|
*
|
2007-03-23 22:58:15 +00:00
|
|
|
* This is the limiting factor for chunksize; there can be at most 2^31
|
2007-03-23 05:05:48 +00:00
|
|
|
* pages in a run.
|
|
|
|
*/
|
2007-03-23 22:58:15 +00:00
|
|
|
#define POS_FREE ((uint32_t)0xffffffffU)
|
2007-03-23 05:05:48 +00:00
|
|
|
uint32_t pos;
|
2006-01-13 18:38:56 +00:00
|
|
|
};
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Arena chunk header. */
|
|
|
|
typedef struct arena_chunk_s arena_chunk_t;
|
|
|
|
struct arena_chunk_s {
|
|
|
|
/* Arena that owns the chunk. */
|
|
|
|
arena_t *arena;
|
|
|
|
|
|
|
|
/* Linkage for the arena's chunk tree. */
|
|
|
|
RB_ENTRY(arena_chunk_s) link;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* Number of pages in use. This is maintained in order to make
|
|
|
|
* detection of empty chunks fast.
|
|
|
|
*/
|
|
|
|
uint32_t pages_used;
|
|
|
|
|
|
|
|
/*
|
2007-03-23 05:05:48 +00:00
|
|
|
* Every time a free run larger than this value is created/coalesced,
|
|
|
|
* this value is increased. The only way that the value decreases is if
|
|
|
|
* arena_run_alloc() fails to find a free run as large as advertised by
|
|
|
|
* this value.
|
|
|
|
*/
|
|
|
|
uint32_t max_frun_npages;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Every time a free run that starts at an earlier page than this value
|
|
|
|
* is created/coalesced, this value is decreased. It is reset in a
|
|
|
|
* similar fashion to max_frun_npages.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
2007-03-23 05:05:48 +00:00
|
|
|
uint32_t min_frun_ind;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/*
|
|
|
|
* Map of pages within chunk that keeps track of free/large/small. For
|
|
|
|
* free runs, only the map entries for the first and last pages are
|
|
|
|
* kept up to date, so that free runs can be quickly coalesced.
|
|
|
|
*/
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_chunk_map_t map[1]; /* Dynamically sized. */
|
2006-01-13 18:38:56 +00:00
|
|
|
};
|
2006-03-17 09:00:27 +00:00
|
|
|
typedef struct arena_chunk_tree_s arena_chunk_tree_t;
|
|
|
|
RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
typedef struct arena_run_s arena_run_t;
|
|
|
|
struct arena_run_s {
|
2006-03-24 00:28:08 +00:00
|
|
|
/* Linkage for run rings. */
|
|
|
|
qr(arena_run_t) link;
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
#ifdef MALLOC_DEBUG
|
2006-03-24 00:28:08 +00:00
|
|
|
uint32_t magic;
|
2006-03-17 09:00:27 +00:00
|
|
|
# define ARENA_RUN_MAGIC 0x384adf93
|
|
|
|
#endif
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Bin this run is associated with. */
|
|
|
|
arena_bin_t *bin;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Index of first element that might have a free region. */
|
2006-03-30 20:25:52 +00:00
|
|
|
unsigned regs_minelm;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Number of free regions in run. */
|
2006-05-10 00:07:45 +00:00
|
|
|
unsigned nfree;
|
1994-05-27 05:00:24 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
2006-03-24 22:13:49 +00:00
|
|
|
* Current quartile for this run, one of: {RUN_QINIT, RUN_Q0, RUN_25,
|
2006-03-20 04:05:05 +00:00
|
|
|
* RUN_Q50, RUN_Q75, RUN_Q100}.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
2006-03-24 22:13:49 +00:00
|
|
|
#define RUN_QINIT 0
|
2006-03-20 04:05:05 +00:00
|
|
|
#define RUN_Q0 1
|
|
|
|
#define RUN_Q25 2
|
|
|
|
#define RUN_Q50 3
|
|
|
|
#define RUN_Q75 4
|
|
|
|
#define RUN_Q100 5
|
2006-03-30 20:25:52 +00:00
|
|
|
unsigned quartile;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/*
|
|
|
|
* Limits on the number of free regions for the fullness quartile this
|
|
|
|
* run is currently in. If nfree goes outside these limits, the run
|
|
|
|
* is moved to a different fullness quartile.
|
|
|
|
*/
|
2006-03-30 20:25:52 +00:00
|
|
|
unsigned free_max;
|
|
|
|
unsigned free_min;
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
|
|
|
|
/* Bitmask of in-use regions (0: in use, 1: free). */
|
|
|
|
unsigned regs_mask[1]; /* Dynamically sized. */
|
1995-09-16 09:28:13 +00:00
|
|
|
};
|
1994-05-27 05:00:24 +00:00
|
|
|
|
2006-03-24 00:28:08 +00:00
|
|
|
/* Used for run ring headers, where the run isn't actually used. */
|
|
|
|
typedef struct arena_run_link_s arena_run_link_t;
|
|
|
|
struct arena_run_link_s {
|
|
|
|
/* Linkage for run rings. */
|
|
|
|
qr(arena_run_t) link;
|
|
|
|
};
|
|
|
|
|
2007-01-31 22:54:19 +00:00
|
|
|
/* Avoid pointer aliasing issues. */
|
|
|
|
static inline arena_run_t *
|
|
|
|
arena_bin_link(void *ptr)
|
|
|
|
{
|
|
|
|
|
|
|
|
return ((arena_run_t *)ptr);
|
|
|
|
}
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
struct arena_bin_s {
|
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* Current run being used to service allocations of this bin's size
|
|
|
|
* class.
|
|
|
|
*/
|
|
|
|
arena_run_t *runcur;
|
|
|
|
|
|
|
|
/*
|
2006-03-20 04:05:05 +00:00
|
|
|
* Links into rings of runs, of various fullnesses (names indicate
|
|
|
|
* approximate lower bounds). A new run conceptually starts off in
|
2006-03-26 23:37:25 +00:00
|
|
|
* runsinit, and it isn't inserted into the runs0 ring until it
|
2006-03-20 04:05:05 +00:00
|
|
|
* reaches 25% full (hysteresis mechanism). For the run to be moved
|
|
|
|
* again, it must become either empty or 50% full. Thus, each ring
|
|
|
|
* contains runs that are within 50% above the advertised fullness for
|
|
|
|
* the ring. This provides a low-overhead mechanism for segregating
|
|
|
|
* runs into approximate fullness classes.
|
2006-03-17 09:00:27 +00:00
|
|
|
*
|
2006-03-20 04:05:05 +00:00
|
|
|
* Conceptually, there is a runs100 that contains completely full runs.
|
|
|
|
* Since we don't need to search for these runs though, no runs100 ring
|
|
|
|
* is actually maintained.
|
2006-03-17 09:00:27 +00:00
|
|
|
*
|
2006-03-20 04:05:05 +00:00
|
|
|
* These rings are useful when looking for an existing run to use when
|
|
|
|
* runcur is no longer usable. We look for usable runs in the
|
|
|
|
* following order:
|
2006-03-17 09:00:27 +00:00
|
|
|
*
|
2006-03-20 04:05:05 +00:00
|
|
|
* 1) runs50
|
|
|
|
* 2) runs25
|
|
|
|
* 3) runs0
|
|
|
|
* 4) runs75
|
2006-03-17 09:00:27 +00:00
|
|
|
*
|
2006-03-26 23:37:25 +00:00
|
|
|
* runs75 isn't a good place to look, because it contains runs that may
|
|
|
|
* be nearly completely full. Still, we look there as a last resort in
|
|
|
|
* order to avoid allocating a new run if at all possible.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
2006-03-26 23:37:25 +00:00
|
|
|
/* arena_run_link_t runsinit; 0% <= fullness < 25% */
|
2006-03-24 00:28:08 +00:00
|
|
|
arena_run_link_t runs0; /* 0% < fullness < 50% */
|
|
|
|
arena_run_link_t runs25; /* 25% < fullness < 75% */
|
|
|
|
arena_run_link_t runs50; /* 50% < fullness < 100% */
|
|
|
|
arena_run_link_t runs75; /* 75% < fullness < 100% */
|
|
|
|
/* arena_run_link_t runs100; fullness == 100% */
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
/* Size of regions in a run for this bin's size class. */
|
|
|
|
size_t reg_size;
|
|
|
|
|
|
|
|
/* Total size of a run for this bin's size class. */
|
|
|
|
size_t run_size;
|
|
|
|
|
|
|
|
/* Total number of regions in a run for this bin's size class. */
|
|
|
|
uint32_t nregs;
|
|
|
|
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
/* Number of elements in a run's regs_mask for this bin's size class. */
|
|
|
|
uint32_t regs_mask_nelms;
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Offset of first region in a run for this bin's size class. */
|
|
|
|
uint32_t reg0_offset;
|
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
/* Bin statistics. */
|
|
|
|
malloc_bin_stats_t stats;
|
|
|
|
#endif
|
2006-01-13 18:38:56 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct arena_s {
|
|
|
|
#ifdef MALLOC_DEBUG
|
2006-03-17 09:00:27 +00:00
|
|
|
uint32_t magic;
|
2006-01-13 18:38:56 +00:00
|
|
|
# define ARENA_MAGIC 0x947d3d24
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* All operations on this arena require that mtx be locked. */
|
2006-03-17 09:00:27 +00:00
|
|
|
malloc_mutex_t mtx;
|
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
arena_stats_t stats;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tree of chunks this arena manages.
|
|
|
|
*/
|
|
|
|
arena_chunk_tree_t chunks;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-12-23 00:18:51 +00:00
|
|
|
/*
|
|
|
|
* In order to avoid rapid chunk allocation/deallocation when an arena
|
|
|
|
* oscillates right on the cusp of needing a new chunk, cache the most
|
|
|
|
* recently freed chunk. This caching is disabled by opt_hint.
|
|
|
|
*
|
|
|
|
* There is one spare chunk per arena, rather than one spare total, in
|
|
|
|
* order to avoid interactions between multiple threads that could make
|
|
|
|
* a single spare inadequate.
|
|
|
|
*/
|
|
|
|
arena_chunk_t *spare;
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
|
|
|
* bins is used to store rings of free regions of the following sizes,
|
2006-03-17 09:00:27 +00:00
|
|
|
* assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
|
2006-01-13 18:38:56 +00:00
|
|
|
*
|
|
|
|
* bins[i] | size |
|
|
|
|
* --------+------+
|
2006-03-17 09:00:27 +00:00
|
|
|
* 0 | 2 |
|
|
|
|
* 1 | 4 |
|
|
|
|
* 2 | 8 |
|
|
|
|
* --------+------+
|
|
|
|
* 3 | 16 |
|
|
|
|
* 4 | 32 |
|
|
|
|
* 5 | 48 |
|
|
|
|
* 6 | 64 |
|
2006-01-13 18:38:56 +00:00
|
|
|
* : :
|
|
|
|
* : :
|
2006-03-17 09:00:27 +00:00
|
|
|
* 33 | 496 |
|
|
|
|
* 34 | 512 |
|
|
|
|
* --------+------+
|
|
|
|
* 35 | 1024 |
|
|
|
|
* 36 | 2048 |
|
2006-01-13 18:38:56 +00:00
|
|
|
* --------+------+
|
|
|
|
*/
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_bin_t bins[1]; /* Dynamically sized. */
|
1995-09-16 09:28:13 +00:00
|
|
|
};
|
1994-05-27 05:00:24 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
|
|
|
* Data.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Number of CPUs. */
|
|
|
|
static unsigned ncpus;
|
|
|
|
|
|
|
|
/* VM page size. */
|
|
|
|
static unsigned pagesize;
|
2007-03-23 05:05:48 +00:00
|
|
|
static unsigned pagesize_mask;
|
2006-03-17 09:00:27 +00:00
|
|
|
static unsigned pagesize_2pow;
|
|
|
|
|
|
|
|
/* Various bin-related settings. */
|
|
|
|
static size_t bin_maxclass; /* Max size class for bins. */
|
|
|
|
static unsigned ntbins; /* Number of (2^n)-spaced tiny bins. */
|
|
|
|
static unsigned nqbins; /* Number of quantum-spaced bins. */
|
2006-03-26 23:37:25 +00:00
|
|
|
static unsigned nsbins; /* Number of (2^n)-spaced sub-page bins. */
|
2006-03-17 09:00:27 +00:00
|
|
|
static size_t small_min;
|
|
|
|
static size_t small_max;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/* Various quantum-related settings. */
|
|
|
|
static size_t quantum;
|
|
|
|
static size_t quantum_mask; /* (quantum - 1). */
|
|
|
|
|
|
|
|
/* Various chunk-related settings. */
|
2007-03-23 22:58:15 +00:00
|
|
|
static size_t chunksize;
|
|
|
|
static size_t chunksize_mask; /* (chunksize - 1). */
|
2007-03-23 05:05:48 +00:00
|
|
|
static unsigned chunk_npages;
|
|
|
|
static unsigned arena_chunk_header_npages;
|
2006-03-17 09:00:27 +00:00
|
|
|
static size_t arena_maxclass; /* Max size class for arenas. */
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/********/
|
|
|
|
/*
|
|
|
|
* Chunks.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Protects chunk-related data structures. */
|
|
|
|
static malloc_mutex_t chunks_mtx;
|
|
|
|
|
|
|
|
/* Tree of chunks that are stand-alone huge allocations. */
|
|
|
|
static chunk_tree_t huge;
|
|
|
|
|
|
|
|
#ifdef USE_BRK
|
|
|
|
/*
|
|
|
|
* Try to use brk for chunk-size allocations, due to address space constraints.
|
|
|
|
*/
|
2006-09-08 17:52:15 +00:00
|
|
|
/*
|
|
|
|
* Protects sbrk() calls. This must be separate from chunks_mtx, since
|
|
|
|
* base_chunk_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
|
|
|
|
* could cause recursive lock acquisition).
|
|
|
|
*/
|
|
|
|
static malloc_mutex_t brk_mtx;
|
2006-01-19 07:23:13 +00:00
|
|
|
/* Result of first sbrk(0) call. */
|
|
|
|
static void *brk_base;
|
|
|
|
/* Current end of brk, or ((void *)-1) if brk is exhausted. */
|
|
|
|
static void *brk_prev;
|
2006-04-27 01:03:00 +00:00
|
|
|
/* Current upper limit on brk addresses. */
|
2006-01-19 07:23:13 +00:00
|
|
|
static void *brk_max;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
/*
|
2007-03-23 05:05:48 +00:00
|
|
|
* Byte counters for allocated/mapped space used by the chunks in the huge
|
2006-01-13 18:38:56 +00:00
|
|
|
* allocations tree.
|
|
|
|
*/
|
2006-03-17 09:00:27 +00:00
|
|
|
static uint64_t huge_nmalloc;
|
|
|
|
static uint64_t huge_ndalloc;
|
2006-01-13 18:38:56 +00:00
|
|
|
static size_t huge_allocated;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tree of chunks that were previously allocated. This is used when allocating
|
|
|
|
* chunks, in an attempt to re-use address space.
|
|
|
|
*/
|
|
|
|
static chunk_tree_t old_chunks;
|
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
/****************************/
|
|
|
|
/*
|
|
|
|
* base (internal allocation).
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Current chunk that is being used for internal memory allocations. This
|
|
|
|
* chunk is carved up in cacheline-size quanta, so that there is no chance of
|
2006-03-26 23:37:25 +00:00
|
|
|
* false cache line sharing.
|
|
|
|
*/
|
2006-01-19 07:23:13 +00:00
|
|
|
static void *base_chunk;
|
|
|
|
static void *base_next_addr;
|
|
|
|
static void *base_past_addr; /* Addr immediately past base_chunk. */
|
|
|
|
static chunk_node_t *base_chunk_nodes; /* LIFO cache of chunk nodes. */
|
|
|
|
static malloc_mutex_t base_mtx;
|
2006-01-16 05:13:49 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 05:05:48 +00:00
|
|
|
static size_t base_mapped;
|
2006-01-16 05:13:49 +00:00
|
|
|
#endif
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/********/
|
|
|
|
/*
|
|
|
|
* Arenas.
|
|
|
|
*/
|
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/*
|
2006-01-13 18:38:56 +00:00
|
|
|
* Arenas that are used to service external requests. Not all elements of the
|
|
|
|
* arenas array are necessarily used; arenas are created lazily as needed.
|
|
|
|
*/
|
|
|
|
static arena_t **arenas;
|
|
|
|
static unsigned narenas;
|
|
|
|
#ifndef NO_TLS
|
|
|
|
static unsigned next_arena;
|
|
|
|
#endif
|
|
|
|
static malloc_mutex_t arenas_mtx; /* Protects arenas initialization. */
|
|
|
|
|
|
|
|
#ifndef NO_TLS
|
1994-05-27 05:00:24 +00:00
|
|
|
/*
|
2006-01-13 18:38:56 +00:00
|
|
|
* Map of pthread_self() --> arenas[???], used for selecting an arena to use
|
|
|
|
* for allocations.
|
1994-05-27 05:00:24 +00:00
|
|
|
*/
|
2006-03-26 23:37:25 +00:00
|
|
|
static __thread arena_t *arenas_map;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
/* Chunk statistics. */
|
2006-01-19 07:23:13 +00:00
|
|
|
static chunk_stats_t stats_chunks;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*******************************/
|
|
|
|
/*
|
|
|
|
* Runtime configuration options.
|
|
|
|
*/
|
|
|
|
const char *_malloc_options;
|
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
#ifndef MALLOC_PRODUCTION
|
2006-01-13 18:38:56 +00:00
|
|
|
static bool opt_abort = true;
|
|
|
|
static bool opt_junk = true;
|
2006-03-17 09:00:27 +00:00
|
|
|
#else
|
|
|
|
static bool opt_abort = false;
|
|
|
|
static bool opt_junk = false;
|
|
|
|
#endif
|
|
|
|
static bool opt_hint = false;
|
2006-01-13 18:38:56 +00:00
|
|
|
static bool opt_print_stats = false;
|
|
|
|
static size_t opt_quantum_2pow = QUANTUM_2POW_MIN;
|
2006-03-17 09:00:27 +00:00
|
|
|
static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
|
2006-01-13 18:38:56 +00:00
|
|
|
static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT;
|
|
|
|
static bool opt_utrace = false;
|
|
|
|
static bool opt_sysv = false;
|
|
|
|
static bool opt_xmalloc = false;
|
|
|
|
static bool opt_zero = false;
|
|
|
|
static int32_t opt_narenas_lshift = 0;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
void *p;
|
|
|
|
size_t s;
|
|
|
|
void *r;
|
|
|
|
} malloc_utrace_t;
|
|
|
|
|
|
|
|
#define UTRACE(a, b, c) \
|
|
|
|
if (opt_utrace) { \
|
|
|
|
malloc_utrace_t ut = {a, b, c}; \
|
|
|
|
utrace(&ut, sizeof(ut)); \
|
|
|
|
}
|
1994-05-27 05:00:24 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/******************************************************************************/
|
1995-09-16 09:28:13 +00:00
|
|
|
/*
|
2006-01-13 18:38:56 +00:00
|
|
|
* Begin function prototypes for non-inline static functions.
|
1995-09-16 09:28:13 +00:00
|
|
|
*/
|
1994-05-27 05:00:24 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
static void malloc_mutex_init(malloc_mutex_t *a_mutex);
|
|
|
|
static void wrtmessage(const char *p1, const char *p2, const char *p3,
|
|
|
|
const char *p4);
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2006-01-13 18:38:56 +00:00
|
|
|
static void malloc_printf(const char *format, ...);
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#endif
|
|
|
|
static char *umax2s(uintmax_t x, char *s);
|
2006-09-08 17:52:15 +00:00
|
|
|
static bool base_chunk_alloc(size_t minsize);
|
2006-01-16 05:13:49 +00:00
|
|
|
static void *base_alloc(size_t size);
|
|
|
|
static chunk_node_t *base_chunk_node_alloc(void);
|
|
|
|
static void base_chunk_node_dealloc(chunk_node_t *node);
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2006-03-17 09:00:27 +00:00
|
|
|
static void stats_print(arena_t *arena);
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
static void *pages_map(void *addr, size_t size);
|
|
|
|
static void pages_unmap(void *addr, size_t size);
|
|
|
|
static void *chunk_alloc(size_t size);
|
|
|
|
static void chunk_dealloc(void *chunk, size_t size);
|
2006-03-30 20:25:52 +00:00
|
|
|
#ifndef NO_TLS
|
|
|
|
static arena_t *choose_arena_hard(void);
|
|
|
|
#endif
|
2007-03-23 05:05:48 +00:00
|
|
|
static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
|
2006-03-17 09:00:27 +00:00
|
|
|
static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
|
2006-12-23 00:18:51 +00:00
|
|
|
static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
|
2006-03-30 20:25:52 +00:00
|
|
|
static void arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
|
2006-07-27 04:00:12 +00:00
|
|
|
arena_run_t *run);
|
2006-03-30 20:25:52 +00:00
|
|
|
static void arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
|
2006-07-27 04:00:12 +00:00
|
|
|
arena_run_t *run);
|
2007-03-23 05:05:48 +00:00
|
|
|
static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
|
2006-03-17 09:00:27 +00:00
|
|
|
static void arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
|
2006-07-27 04:00:12 +00:00
|
|
|
static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
|
|
|
|
static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
|
2006-01-13 18:38:56 +00:00
|
|
|
static void *arena_malloc(arena_t *arena, size_t size);
|
2007-03-23 22:58:15 +00:00
|
|
|
static void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
|
|
|
|
size_t alloc_size);
|
2006-03-30 20:25:52 +00:00
|
|
|
static size_t arena_salloc(const void *ptr);
|
|
|
|
static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
|
|
|
|
static void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
|
2006-01-16 05:13:49 +00:00
|
|
|
static bool arena_new(arena_t *arena);
|
2006-01-13 18:38:56 +00:00
|
|
|
static arena_t *arenas_extend(unsigned ind);
|
2006-03-19 18:28:06 +00:00
|
|
|
static void *huge_malloc(size_t size);
|
2007-03-23 22:58:15 +00:00
|
|
|
static void *huge_palloc(size_t alignment, size_t size);
|
2006-03-19 18:28:06 +00:00
|
|
|
static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
|
2006-01-13 18:38:56 +00:00
|
|
|
static void huge_dalloc(void *ptr);
|
2006-03-30 20:25:52 +00:00
|
|
|
static void *imalloc(size_t size);
|
|
|
|
static void *ipalloc(size_t alignment, size_t size);
|
|
|
|
static void *icalloc(size_t size);
|
2006-03-26 23:37:25 +00:00
|
|
|
static size_t isalloc(const void *ptr);
|
2006-03-30 20:25:52 +00:00
|
|
|
static void *iralloc(void *ptr, size_t size);
|
2006-01-13 18:38:56 +00:00
|
|
|
static void idalloc(void *ptr);
|
|
|
|
static void malloc_print_stats(void);
|
2006-01-19 02:11:05 +00:00
|
|
|
static bool malloc_init_hard(void);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* End function prototypes.
|
|
|
|
*/
|
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
|
|
|
* Begin mutex.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
malloc_mutex_init(malloc_mutex_t *a_mutex)
|
|
|
|
{
|
|
|
|
static const spinlock_t lock = _SPINLOCK_INITIALIZER;
|
|
|
|
|
|
|
|
a_mutex->lock = lock;
|
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static inline void
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_mutex_lock(malloc_mutex_t *a_mutex)
|
|
|
|
{
|
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
if (__isthreaded)
|
|
|
|
_SPINLOCK(&a_mutex->lock);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static inline void
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_mutex_unlock(malloc_mutex_t *a_mutex)
|
|
|
|
{
|
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
if (__isthreaded)
|
|
|
|
_SPINUNLOCK(&a_mutex->lock);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* End mutex.
|
|
|
|
*/
|
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
|
|
|
* Begin Utility functions/macros.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Return the chunk address for allocation address a. */
|
|
|
|
#define CHUNK_ADDR2BASE(a) \
|
2007-03-23 22:58:15 +00:00
|
|
|
((void *)((uintptr_t)(a) & ~chunksize_mask))
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/* Return the chunk offset of address a. */
|
|
|
|
#define CHUNK_ADDR2OFFSET(a) \
|
2007-03-23 22:58:15 +00:00
|
|
|
((size_t)((uintptr_t)(a) & chunksize_mask))
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/* Return the smallest chunk multiple that is >= s. */
|
|
|
|
#define CHUNK_CEILING(s) \
|
2007-03-23 22:58:15 +00:00
|
|
|
(((s) + chunksize_mask) & ~chunksize_mask)
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
/* Return the smallest cacheline multiple that is >= s. */
|
|
|
|
#define CACHELINE_CEILING(s) \
|
|
|
|
(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Return the smallest quantum multiple that is >= a. */
|
|
|
|
#define QUANTUM_CEILING(a) \
|
|
|
|
(((a) + quantum_mask) & ~quantum_mask)
|
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Return the smallest pagesize multiple that is >= s. */
|
|
|
|
#define PAGE_CEILING(s) \
|
|
|
|
(((s) + pagesize_mask) & ~pagesize_mask)
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Compute the smallest power of 2 that is >= x. */
|
|
|
|
static inline size_t
|
|
|
|
pow2_ceil(size_t x)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
2006-04-27 01:03:00 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
x--;
|
|
|
|
x |= x >> 1;
|
|
|
|
x |= x >> 2;
|
|
|
|
x |= x >> 4;
|
|
|
|
x |= x >> 8;
|
|
|
|
x |= x >> 16;
|
|
|
|
#if (SIZEOF_PTR == 8)
|
|
|
|
x |= x >> 32;
|
|
|
|
#endif
|
|
|
|
x++;
|
|
|
|
return (x);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
|
|
|
|
{
|
|
|
|
|
|
|
|
_write(STDERR_FILENO, p1, strlen(p1));
|
|
|
|
_write(STDERR_FILENO, p2, strlen(p2));
|
|
|
|
_write(STDERR_FILENO, p3, strlen(p3));
|
|
|
|
_write(STDERR_FILENO, p4, strlen(p4));
|
|
|
|
}
|
|
|
|
|
|
|
|
void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
|
|
|
|
const char *p4) = wrtmessage;
|
|
|
|
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
|
|
|
* Print to stderr in such a way as to (hopefully) avoid memory allocation.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
malloc_printf(const char *format, ...)
|
|
|
|
{
|
|
|
|
char buf[4096];
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, format);
|
|
|
|
vsnprintf(buf, sizeof(buf), format, ap);
|
|
|
|
va_end(ap);
|
|
|
|
_malloc_message(buf, "", "", "");
|
|
|
|
}
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't want to depend on vsnprintf() for production builds, since that can
|
|
|
|
* cause unnecessary bloat for static binaries. umax2s() provides minimal
|
|
|
|
* integer printing functionality, so that malloc_printf() use can be limited to
|
|
|
|
* MALLOC_STATS code.
|
|
|
|
*/
|
|
|
|
#define UMAX2S_BUFSIZE 21
|
|
|
|
static char *
|
|
|
|
umax2s(uintmax_t x, char *s)
|
|
|
|
{
|
|
|
|
unsigned i;
|
|
|
|
|
|
|
|
/* Make sure UMAX2S_BUFSIZE is large enough. */
|
|
|
|
assert(sizeof(uintmax_t) <= 8);
|
|
|
|
|
|
|
|
i = UMAX2S_BUFSIZE - 1;
|
|
|
|
s[i] = '\0';
|
|
|
|
do {
|
|
|
|
i--;
|
|
|
|
s[i] = "0123456789"[x % 10];
|
|
|
|
x /= 10;
|
|
|
|
} while (x > 0);
|
|
|
|
|
|
|
|
return (&s[i]);
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
/******************************************************************************/
|
|
|
|
|
2006-09-08 17:52:15 +00:00
|
|
|
static bool
|
|
|
|
base_chunk_alloc(size_t minsize)
|
|
|
|
{
|
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
assert(minsize <= chunksize);
|
2006-09-08 17:52:15 +00:00
|
|
|
|
|
|
|
#ifdef USE_BRK
|
|
|
|
/*
|
|
|
|
* Do special brk allocation here, since the base chunk doesn't really
|
|
|
|
* need to be chunk-aligned.
|
|
|
|
*/
|
|
|
|
if (brk_prev != (void *)-1) {
|
|
|
|
void *brk_cur;
|
|
|
|
intptr_t incr;
|
|
|
|
|
|
|
|
malloc_mutex_lock(&brk_mtx);
|
|
|
|
do {
|
|
|
|
/* Get the current end of brk. */
|
|
|
|
brk_cur = sbrk(0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Calculate how much padding is necessary to
|
|
|
|
* chunk-align the end of brk. Don't worry about
|
|
|
|
* brk_cur not being chunk-aligned though.
|
|
|
|
*/
|
2007-03-23 22:58:15 +00:00
|
|
|
incr = (intptr_t)chunksize
|
2006-09-08 17:52:15 +00:00
|
|
|
- (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
|
|
|
|
if (incr < minsize)
|
2007-03-23 22:58:15 +00:00
|
|
|
incr += chunksize;
|
2006-09-08 17:52:15 +00:00
|
|
|
|
|
|
|
brk_prev = sbrk(incr);
|
|
|
|
if (brk_prev == brk_cur) {
|
|
|
|
/* Success. */
|
|
|
|
malloc_mutex_unlock(&brk_mtx);
|
|
|
|
base_chunk = brk_cur;
|
|
|
|
base_next_addr = base_chunk;
|
2007-02-22 19:10:30 +00:00
|
|
|
base_past_addr = (void *)((uintptr_t)base_chunk
|
|
|
|
+ incr);
|
2006-09-08 17:52:15 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 05:05:48 +00:00
|
|
|
base_mapped += incr;
|
2006-09-08 17:52:15 +00:00
|
|
|
#endif
|
|
|
|
return (false);
|
|
|
|
}
|
|
|
|
} while (brk_prev != (void *)-1);
|
|
|
|
malloc_mutex_unlock(&brk_mtx);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
2007-02-22 19:10:30 +00:00
|
|
|
* Don't worry about chunk alignment here, since base_chunk doesn't
|
|
|
|
* really need to be aligned.
|
2006-09-08 17:52:15 +00:00
|
|
|
*/
|
2007-03-23 22:58:15 +00:00
|
|
|
base_chunk = pages_map(NULL, chunksize);
|
2006-09-08 17:52:15 +00:00
|
|
|
if (base_chunk == NULL)
|
|
|
|
return (true);
|
|
|
|
base_next_addr = base_chunk;
|
2007-03-23 22:58:15 +00:00
|
|
|
base_past_addr = (void *)((uintptr_t)base_chunk + chunksize);
|
2006-09-08 17:52:15 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 22:58:15 +00:00
|
|
|
base_mapped += chunksize;
|
2006-09-08 17:52:15 +00:00
|
|
|
#endif
|
|
|
|
return (false);
|
|
|
|
}
|
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
static void *
|
|
|
|
base_alloc(size_t size)
|
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
size_t csize;
|
|
|
|
|
|
|
|
/* Round size up to nearest multiple of the cacheline size. */
|
|
|
|
csize = CACHELINE_CEILING(size);
|
|
|
|
|
|
|
|
malloc_mutex_lock(&base_mtx);
|
|
|
|
|
2007-02-22 19:10:30 +00:00
|
|
|
/*
|
|
|
|
* Make sure there's enough space for the allocation.
|
|
|
|
* base_chunk_alloc() does not guarantee that a newly allocated chunk
|
|
|
|
* is >= size, so loop here, rather than only trying once.
|
|
|
|
*/
|
|
|
|
while ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
|
2006-09-08 17:52:15 +00:00
|
|
|
if (base_chunk_alloc(csize)) {
|
2006-01-16 05:13:49 +00:00
|
|
|
ret = NULL;
|
|
|
|
goto RETURN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate. */
|
|
|
|
ret = base_next_addr;
|
2006-01-20 03:11:11 +00:00
|
|
|
base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
|
2006-01-16 05:13:49 +00:00
|
|
|
|
|
|
|
RETURN:
|
|
|
|
malloc_mutex_unlock(&base_mtx);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static chunk_node_t *
|
|
|
|
base_chunk_node_alloc(void)
|
|
|
|
{
|
|
|
|
chunk_node_t *ret;
|
|
|
|
|
|
|
|
malloc_mutex_lock(&base_mtx);
|
|
|
|
if (base_chunk_nodes != NULL) {
|
|
|
|
ret = base_chunk_nodes;
|
|
|
|
base_chunk_nodes = *(chunk_node_t **)ret;
|
|
|
|
malloc_mutex_unlock(&base_mtx);
|
|
|
|
} else {
|
|
|
|
malloc_mutex_unlock(&base_mtx);
|
|
|
|
ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
|
|
|
|
}
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
base_chunk_node_dealloc(chunk_node_t *node)
|
|
|
|
{
|
|
|
|
|
|
|
|
malloc_mutex_lock(&base_mtx);
|
|
|
|
*(chunk_node_t **)node = base_chunk_nodes;
|
|
|
|
base_chunk_nodes = node;
|
|
|
|
malloc_mutex_unlock(&base_mtx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
static void
|
2006-03-17 09:00:27 +00:00
|
|
|
stats_print(arena_t *arena)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
|
|
|
unsigned i;
|
2006-03-17 09:00:27 +00:00
|
|
|
int gap_start;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
malloc_printf(
|
|
|
|
" allocated/mapped nmalloc ndalloc\n");
|
|
|
|
malloc_printf("small: %12llu %-12s %12llu %12llu\n",
|
|
|
|
arena->stats.allocated_small, "", arena->stats.nmalloc_small,
|
|
|
|
arena->stats.ndalloc_small);
|
|
|
|
malloc_printf("large: %12llu %-12s %12llu %12llu\n",
|
|
|
|
arena->stats.allocated_large, "", arena->stats.nmalloc_large,
|
|
|
|
arena->stats.ndalloc_large);
|
|
|
|
malloc_printf("total: %12llu/%-12llu %12llu %12llu\n",
|
|
|
|
arena->stats.allocated_small + arena->stats.allocated_large,
|
|
|
|
arena->stats.mapped,
|
|
|
|
arena->stats.nmalloc_small + arena->stats.nmalloc_large,
|
|
|
|
arena->stats.ndalloc_small + arena->stats.ndalloc_large);
|
|
|
|
|
|
|
|
malloc_printf("bins: bin size regs pgs requests newruns "
|
|
|
|
"maxruns curruns promote demote\n");
|
2006-03-26 23:37:25 +00:00
|
|
|
for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
|
2006-03-17 09:00:27 +00:00
|
|
|
if (arena->bins[i].stats.nrequests == 0) {
|
|
|
|
if (gap_start == -1)
|
|
|
|
gap_start = i;
|
|
|
|
} else {
|
|
|
|
if (gap_start != -1) {
|
|
|
|
if (i > gap_start + 1) {
|
|
|
|
/* Gap of more than one size class. */
|
|
|
|
malloc_printf("[%u..%u]\n",
|
|
|
|
gap_start, i - 1);
|
|
|
|
} else {
|
|
|
|
/* Gap of one size class. */
|
|
|
|
malloc_printf("[%u]\n", gap_start);
|
|
|
|
}
|
|
|
|
gap_start = -1;
|
|
|
|
}
|
|
|
|
malloc_printf(
|
2007-03-23 05:05:48 +00:00
|
|
|
"%13u %1s %4u %4u %3u %9llu %7llu"
|
|
|
|
" %7lu %7lu %7llu %7llu\n",
|
2006-03-17 09:00:27 +00:00
|
|
|
i,
|
2006-03-26 23:37:25 +00:00
|
|
|
i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
|
2006-03-17 09:00:27 +00:00
|
|
|
arena->bins[i].reg_size,
|
|
|
|
arena->bins[i].nregs,
|
2007-03-23 05:05:48 +00:00
|
|
|
arena->bins[i].run_size >> pagesize_2pow,
|
2006-03-17 09:00:27 +00:00
|
|
|
arena->bins[i].stats.nrequests,
|
|
|
|
arena->bins[i].stats.nruns,
|
|
|
|
arena->bins[i].stats.highruns,
|
|
|
|
arena->bins[i].stats.curruns,
|
|
|
|
arena->bins[i].stats.npromote,
|
|
|
|
arena->bins[i].stats.ndemote);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (gap_start != -1) {
|
|
|
|
if (i > gap_start + 1) {
|
|
|
|
/* Gap of more than one size class. */
|
|
|
|
malloc_printf("[%u..%u]\n", gap_start, i - 1);
|
|
|
|
} else {
|
|
|
|
/* Gap of one size class. */
|
|
|
|
malloc_printf("[%u]\n", gap_start);
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* End Utility functions/macros.
|
|
|
|
*/
|
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* Begin chunk management functions.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static inline int
|
2006-01-13 18:38:56 +00:00
|
|
|
chunk_comp(chunk_node_t *a, chunk_node_t *b)
|
|
|
|
{
|
|
|
|
|
|
|
|
assert(a != NULL);
|
|
|
|
assert(b != NULL);
|
|
|
|
|
2006-01-20 03:11:11 +00:00
|
|
|
if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
|
2006-03-17 09:00:27 +00:00
|
|
|
return (-1);
|
2006-01-13 18:38:56 +00:00
|
|
|
else if (a->chunk == b->chunk)
|
2006-03-17 09:00:27 +00:00
|
|
|
return (0);
|
2006-01-13 18:38:56 +00:00
|
|
|
else
|
2006-03-17 09:00:27 +00:00
|
|
|
return (1);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Generate red-black tree code for chunks. */
|
2006-01-19 07:23:13 +00:00
|
|
|
RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
static void *
|
|
|
|
pages_map(void *addr, size_t size)
|
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't use MAP_FIXED here, because it can cause the *replacement*
|
|
|
|
* of existing mappings, and we only want to create new mappings.
|
|
|
|
*/
|
|
|
|
ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
|
|
|
|
-1, 0);
|
|
|
|
assert(ret != NULL);
|
|
|
|
|
|
|
|
if (ret == MAP_FAILED)
|
|
|
|
ret = NULL;
|
|
|
|
else if (addr != NULL && ret != addr) {
|
|
|
|
/*
|
|
|
|
* We succeeded in mapping memory, but not in the right place.
|
|
|
|
*/
|
|
|
|
if (munmap(ret, size) == -1) {
|
|
|
|
char buf[STRERROR_BUF];
|
|
|
|
|
|
|
|
strerror_r(errno, buf, sizeof(buf));
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Error in munmap(): ", buf, "\n");
|
2006-01-13 18:38:56 +00:00
|
|
|
if (opt_abort)
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
ret = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(ret == NULL || (addr == NULL && ret != addr)
|
|
|
|
|| (addr != NULL && ret == addr));
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
pages_unmap(void *addr, size_t size)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (munmap(addr, size) == -1) {
|
|
|
|
char buf[STRERROR_BUF];
|
|
|
|
|
|
|
|
strerror_r(errno, buf, sizeof(buf));
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Error in munmap(): ", buf, "\n");
|
2006-01-13 18:38:56 +00:00
|
|
|
if (opt_abort)
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *
|
|
|
|
chunk_alloc(size_t size)
|
|
|
|
{
|
|
|
|
void *ret, *chunk;
|
|
|
|
chunk_node_t *tchunk, *delchunk;
|
|
|
|
|
|
|
|
assert(size != 0);
|
2007-03-23 22:58:15 +00:00
|
|
|
assert((size & chunksize_mask) == 0);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
malloc_mutex_lock(&chunks_mtx);
|
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
if (size == chunksize) {
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
|
|
|
* Check for address ranges that were previously chunks and try
|
|
|
|
* to use them.
|
|
|
|
*/
|
|
|
|
|
|
|
|
tchunk = RB_MIN(chunk_tree_s, &old_chunks);
|
|
|
|
while (tchunk != NULL) {
|
|
|
|
/* Found an address range. Try to recycle it. */
|
|
|
|
|
|
|
|
chunk = tchunk->chunk;
|
|
|
|
delchunk = tchunk;
|
|
|
|
tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Remove delchunk from the tree. */
|
2006-01-13 18:38:56 +00:00
|
|
|
RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
|
2006-03-17 09:00:27 +00:00
|
|
|
base_chunk_node_dealloc(delchunk);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
#ifdef USE_BRK
|
2006-01-20 03:11:11 +00:00
|
|
|
if ((uintptr_t)chunk >= (uintptr_t)brk_base
|
|
|
|
&& (uintptr_t)chunk < (uintptr_t)brk_max) {
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Re-use a previously freed brk chunk. */
|
|
|
|
ret = chunk;
|
|
|
|
goto RETURN;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if ((ret = pages_map(chunk, size)) != NULL) {
|
|
|
|
/* Success. */
|
|
|
|
goto RETURN;
|
|
|
|
}
|
|
|
|
}
|
2006-04-27 01:03:00 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-02-22 19:10:30 +00:00
|
|
|
/*
|
|
|
|
* Try to over-allocate, but allow the OS to place the allocation
|
|
|
|
* anywhere. Beware of size_t wrap-around.
|
|
|
|
*/
|
2007-03-23 22:58:15 +00:00
|
|
|
if (size + chunksize > size) {
|
|
|
|
if ((ret = pages_map(NULL, size + chunksize)) != NULL) {
|
2007-02-22 19:10:30 +00:00
|
|
|
size_t offset = CHUNK_ADDR2OFFSET(ret);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Success. Clean up unneeded leading/trailing space.
|
|
|
|
*/
|
|
|
|
if (offset != 0) {
|
|
|
|
/* Leading space. */
|
2007-03-23 22:58:15 +00:00
|
|
|
pages_unmap(ret, chunksize - offset);
|
2007-02-22 19:10:30 +00:00
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
ret = (void *)((uintptr_t)ret + (chunksize -
|
2007-02-22 19:10:30 +00:00
|
|
|
offset));
|
|
|
|
|
|
|
|
/* Trailing space. */
|
|
|
|
pages_unmap((void *)((uintptr_t)ret + size),
|
|
|
|
offset);
|
|
|
|
} else {
|
|
|
|
/* Trailing space only. */
|
|
|
|
pages_unmap((void *)((uintptr_t)ret + size),
|
2007-03-23 22:58:15 +00:00
|
|
|
chunksize);
|
2007-02-22 19:10:30 +00:00
|
|
|
}
|
|
|
|
goto RETURN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef USE_BRK
|
2006-04-27 01:03:00 +00:00
|
|
|
/*
|
|
|
|
* Try to create allocations in brk, in order to make full use of
|
|
|
|
* limited address space.
|
|
|
|
*/
|
|
|
|
if (brk_prev != (void *)-1) {
|
|
|
|
void *brk_cur;
|
|
|
|
intptr_t incr;
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
2006-04-27 01:03:00 +00:00
|
|
|
* The loop is necessary to recover from races with other
|
|
|
|
* threads that are using brk for something other than malloc.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
2006-09-08 17:52:15 +00:00
|
|
|
malloc_mutex_lock(&brk_mtx);
|
2006-04-27 01:03:00 +00:00
|
|
|
do {
|
|
|
|
/* Get the current end of brk. */
|
|
|
|
brk_cur = sbrk(0);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/*
|
2006-04-27 01:03:00 +00:00
|
|
|
* Calculate how much padding is necessary to
|
|
|
|
* chunk-align the end of brk.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
2006-04-27 01:03:00 +00:00
|
|
|
incr = (intptr_t)size
|
|
|
|
- (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
|
|
|
|
if (incr == size) {
|
|
|
|
ret = brk_cur;
|
|
|
|
} else {
|
2007-01-31 22:54:19 +00:00
|
|
|
ret = (void *)((intptr_t)brk_cur + incr);
|
2006-04-27 01:03:00 +00:00
|
|
|
incr += size;
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-04-27 01:03:00 +00:00
|
|
|
brk_prev = sbrk(incr);
|
|
|
|
if (brk_prev == brk_cur) {
|
|
|
|
/* Success. */
|
2006-09-08 17:52:15 +00:00
|
|
|
malloc_mutex_unlock(&brk_mtx);
|
2007-01-31 22:54:19 +00:00
|
|
|
brk_max = (void *)((intptr_t)ret + size);
|
2006-04-27 01:03:00 +00:00
|
|
|
goto RETURN;
|
|
|
|
}
|
|
|
|
} while (brk_prev != (void *)-1);
|
2006-09-08 17:52:15 +00:00
|
|
|
malloc_mutex_unlock(&brk_mtx);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
2006-04-27 01:03:00 +00:00
|
|
|
#endif
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/* All strategies for allocation failed. */
|
|
|
|
ret = NULL;
|
|
|
|
RETURN:
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
if (ret != NULL) {
|
2007-03-23 22:58:15 +00:00
|
|
|
stats_chunks.nchunks += (size / chunksize);
|
|
|
|
stats_chunks.curchunks += (size / chunksize);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
if (stats_chunks.curchunks > stats_chunks.highchunks)
|
|
|
|
stats_chunks.highchunks = stats_chunks.curchunks;
|
|
|
|
#endif
|
|
|
|
malloc_mutex_unlock(&chunks_mtx);
|
|
|
|
|
|
|
|
assert(CHUNK_ADDR2BASE(ret) == ret);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
chunk_dealloc(void *chunk, size_t size)
|
|
|
|
{
|
2006-03-24 00:28:08 +00:00
|
|
|
size_t offset;
|
2006-06-20 20:38:25 +00:00
|
|
|
chunk_node_t key;
|
2006-03-24 00:28:08 +00:00
|
|
|
chunk_node_t *node;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
assert(chunk != NULL);
|
|
|
|
assert(CHUNK_ADDR2BASE(chunk) == chunk);
|
|
|
|
assert(size != 0);
|
2007-03-23 22:58:15 +00:00
|
|
|
assert((size & chunksize_mask) == 0);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
malloc_mutex_lock(&chunks_mtx);
|
2006-04-27 01:03:00 +00:00
|
|
|
|
|
|
|
#ifdef USE_BRK
|
|
|
|
if ((uintptr_t)chunk >= (uintptr_t)brk_base
|
|
|
|
&& (uintptr_t)chunk < (uintptr_t)brk_max) {
|
|
|
|
void *brk_cur;
|
|
|
|
|
2006-09-08 17:52:15 +00:00
|
|
|
malloc_mutex_lock(&brk_mtx);
|
2006-04-27 01:03:00 +00:00
|
|
|
/* Get the current end of brk. */
|
|
|
|
brk_cur = sbrk(0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to shrink the data segment if this chunk is at the end
|
|
|
|
* of the data segment. The sbrk() call here is subject to a
|
|
|
|
* race condition with threads that use brk(2) or sbrk(2)
|
|
|
|
* directly, but the alternative would be to leak memory for
|
|
|
|
* the sake of poorly designed multi-threaded programs.
|
|
|
|
*/
|
|
|
|
if (brk_cur == brk_max
|
2007-01-31 22:54:19 +00:00
|
|
|
&& (void *)((uintptr_t)chunk + size) == brk_max
|
2006-04-27 01:03:00 +00:00
|
|
|
&& sbrk(-(intptr_t)size) == brk_max) {
|
2006-09-08 17:52:15 +00:00
|
|
|
malloc_mutex_unlock(&brk_mtx);
|
2006-04-27 01:03:00 +00:00
|
|
|
if (brk_prev == brk_max) {
|
|
|
|
/* Success. */
|
2007-01-31 22:54:19 +00:00
|
|
|
brk_prev = (void *)((intptr_t)brk_max
|
|
|
|
- (intptr_t)size);
|
2006-04-27 01:03:00 +00:00
|
|
|
brk_max = brk_prev;
|
|
|
|
}
|
|
|
|
goto RETURN;
|
|
|
|
} else
|
2006-09-08 17:52:15 +00:00
|
|
|
malloc_mutex_unlock(&brk_mtx);
|
2006-04-27 01:03:00 +00:00
|
|
|
madvise(chunk, size, MADV_FREE);
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
pages_unmap(chunk, size);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Iteratively create records of each chunk-sized memory region that
|
|
|
|
* 'chunk' is comprised of, so that the address range can be recycled
|
|
|
|
* if memory usage increases later on.
|
|
|
|
*/
|
2007-03-23 22:58:15 +00:00
|
|
|
for (offset = 0; offset < size; offset += chunksize) {
|
2006-06-20 20:38:25 +00:00
|
|
|
/*
|
|
|
|
* It is possible for chunk to overlap existing entries in
|
|
|
|
* old_chunks if it is a huge allocation, so take care to not
|
|
|
|
* leak tree nodes.
|
|
|
|
*/
|
|
|
|
key.chunk = (void *)((uintptr_t)chunk + (uintptr_t)offset);
|
|
|
|
if (RB_FIND(chunk_tree_s, &old_chunks, &key) == NULL) {
|
|
|
|
node = base_chunk_node_alloc();
|
|
|
|
if (node == NULL)
|
|
|
|
break;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-06-20 20:38:25 +00:00
|
|
|
node->chunk = key.chunk;
|
2007-03-23 22:58:15 +00:00
|
|
|
node->size = chunksize;
|
2006-06-20 20:38:25 +00:00
|
|
|
RB_INSERT(chunk_tree_s, &old_chunks, node);
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef USE_BRK
|
2006-04-27 01:03:00 +00:00
|
|
|
RETURN:
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 22:58:15 +00:00
|
|
|
stats_chunks.curchunks -= (size / chunksize);
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
2006-03-26 23:37:25 +00:00
|
|
|
malloc_mutex_unlock(&chunks_mtx);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* End chunk management functions.
|
|
|
|
*/
|
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
|
|
|
* Begin arena.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
/*
|
|
|
|
* Choose an arena based on a per-thread value (fast-path code, calls slow-path
|
|
|
|
* code if necessary.
|
|
|
|
*/
|
|
|
|
static inline arena_t *
|
|
|
|
choose_arena(void)
|
|
|
|
{
|
|
|
|
arena_t *ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can only use TLS if this is a PIC library, since for the static
|
|
|
|
* library version, libc's malloc is used by TLS allocation, which
|
|
|
|
* introduces a bootstrapping issue.
|
|
|
|
*/
|
|
|
|
#ifndef NO_TLS
|
|
|
|
if (__isthreaded == false) {
|
|
|
|
/*
|
|
|
|
* Avoid the overhead of TLS for single-threaded operation. If the
|
|
|
|
* app switches to threaded mode, the initial thread may end up
|
|
|
|
* being assigned to some other arena, but this one-time switch
|
|
|
|
* shouldn't cause significant issues.
|
|
|
|
* */
|
|
|
|
return (arenas[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = arenas_map;
|
|
|
|
if (ret == NULL)
|
|
|
|
ret = choose_arena_hard();
|
|
|
|
#else
|
|
|
|
if (__isthreaded) {
|
|
|
|
unsigned long ind;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hash _pthread_self() to one of the arenas. There is a prime
|
|
|
|
* number of arenas, so this has a reasonable chance of
|
|
|
|
* working. Even so, the hashing can be easily thwarted by
|
|
|
|
* inconvenient _pthread_self() values. Without specific
|
|
|
|
* knowledge of how _pthread_self() calculates values, we can't
|
2006-04-27 01:03:00 +00:00
|
|
|
* easily do much better than this.
|
2006-03-30 20:25:52 +00:00
|
|
|
*/
|
|
|
|
ind = (unsigned long) _pthread_self() % narenas;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Optimistially assume that arenas[ind] has been initialized.
|
|
|
|
* At worst, we find out that some other thread has already
|
|
|
|
* done so, after acquiring the lock in preparation. Note that
|
|
|
|
* this lazy locking also has the effect of lazily forcing
|
|
|
|
* cache coherency; without the lock acquisition, there's no
|
|
|
|
* guarantee that modification of arenas[ind] by another thread
|
|
|
|
* would be seen on this CPU for an arbitrary amount of time.
|
|
|
|
*
|
|
|
|
* In general, this approach to modifying a synchronized value
|
|
|
|
* isn't a good idea, but in this case we only ever modify the
|
|
|
|
* value once, so things work out well.
|
|
|
|
*/
|
|
|
|
ret = arenas[ind];
|
|
|
|
if (ret == NULL) {
|
|
|
|
/*
|
|
|
|
* Avoid races with another thread that may have already
|
|
|
|
* initialized arenas[ind].
|
|
|
|
*/
|
|
|
|
malloc_mutex_lock(&arenas_mtx);
|
|
|
|
if (arenas[ind] == NULL)
|
|
|
|
ret = arenas_extend((unsigned)ind);
|
|
|
|
else
|
|
|
|
ret = arenas[ind];
|
|
|
|
malloc_mutex_unlock(&arenas_mtx);
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
ret = arenas[0];
|
|
|
|
#endif
|
|
|
|
|
|
|
|
assert(ret != NULL);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef NO_TLS
|
|
|
|
/*
|
|
|
|
* Choose an arena based on a per-thread value (slow-path code only, called
|
|
|
|
* only by choose_arena()).
|
|
|
|
*/
|
|
|
|
static arena_t *
|
|
|
|
choose_arena_hard(void)
|
|
|
|
{
|
|
|
|
arena_t *ret;
|
|
|
|
|
|
|
|
assert(__isthreaded);
|
|
|
|
|
|
|
|
/* Assign one of the arenas to this thread, in a round-robin fashion. */
|
|
|
|
malloc_mutex_lock(&arenas_mtx);
|
|
|
|
ret = arenas[next_arena];
|
|
|
|
if (ret == NULL)
|
|
|
|
ret = arenas_extend(next_arena);
|
|
|
|
if (ret == NULL) {
|
|
|
|
/*
|
|
|
|
* Make sure that this function never returns NULL, so that
|
|
|
|
* choose_arena() doesn't have to check for a NULL return
|
|
|
|
* value.
|
|
|
|
*/
|
|
|
|
ret = arenas[0];
|
|
|
|
}
|
|
|
|
next_arena = (next_arena + 1) % narenas;
|
|
|
|
malloc_mutex_unlock(&arenas_mtx);
|
|
|
|
arenas_map = ret;
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static inline int
|
|
|
|
arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(a != NULL);
|
|
|
|
assert(b != NULL);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
if ((uintptr_t)a < (uintptr_t)b)
|
|
|
|
return (-1);
|
|
|
|
else if (a == b)
|
|
|
|
return (0);
|
|
|
|
else
|
|
|
|
return (1);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Generate red-black tree code for arena chunks. */
|
|
|
|
RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
|
|
|
|
|
2006-04-04 03:51:47 +00:00
|
|
|
static inline void *
|
|
|
|
arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
2006-04-04 03:51:47 +00:00
|
|
|
void *ret;
|
|
|
|
unsigned i, mask, bit, regind;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(run->magic == ARENA_RUN_MAGIC);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
for (i = run->regs_minelm; i < bin->regs_mask_nelms; i++) {
|
2006-03-17 09:00:27 +00:00
|
|
|
mask = run->regs_mask[i];
|
|
|
|
if (mask != 0) {
|
2006-04-04 03:51:47 +00:00
|
|
|
/* Usable allocation found. */
|
2007-01-31 22:54:19 +00:00
|
|
|
bit = ffs((int)mask) - 1;
|
2006-04-04 03:51:47 +00:00
|
|
|
|
|
|
|
regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
|
|
|
|
ret = (void *)&((char *)run)[bin->reg0_offset
|
|
|
|
+ (bin->reg_size * regind)];
|
|
|
|
|
|
|
|
/* Clear bit. */
|
|
|
|
mask ^= (1 << bit);
|
|
|
|
run->regs_mask[i] = mask;
|
|
|
|
|
|
|
|
return (ret);
|
2006-01-13 18:38:56 +00:00
|
|
|
} else {
|
2006-03-26 23:37:25 +00:00
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* Make a note that nothing before this element
|
|
|
|
* contains a free region.
|
|
|
|
*/
|
|
|
|
run->regs_minelm = i + 1;
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
}
|
2006-04-04 03:51:47 +00:00
|
|
|
/* Not reached. */
|
|
|
|
assert(0);
|
2006-04-05 18:46:24 +00:00
|
|
|
return (NULL);
|
2006-04-04 03:51:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
|
|
|
|
{
|
2006-07-01 16:51:10 +00:00
|
|
|
/*
|
|
|
|
* To divide by a number D that is not a power of two we multiply
|
|
|
|
* by (2^21 / D) and then right shift by 21 positions.
|
|
|
|
*
|
|
|
|
* X / D
|
|
|
|
*
|
|
|
|
* becomes
|
|
|
|
*
|
2006-09-08 17:52:15 +00:00
|
|
|
* (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
|
2006-07-01 16:51:10 +00:00
|
|
|
*/
|
|
|
|
#define SIZE_INV_SHIFT 21
|
|
|
|
#define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
|
|
|
|
static const unsigned size_invs[] = {
|
|
|
|
SIZE_INV(3),
|
|
|
|
SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
|
|
|
|
SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
|
|
|
|
SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
|
|
|
|
SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
|
|
|
|
SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
|
|
|
|
SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
|
|
|
|
SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
|
2006-07-27 19:09:32 +00:00
|
|
|
#if (QUANTUM_2POW_MIN < 4)
|
|
|
|
,
|
|
|
|
SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
|
|
|
|
SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
|
|
|
|
SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
|
|
|
|
SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
|
|
|
|
SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
|
|
|
|
SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
|
|
|
|
SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
|
|
|
|
SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
|
|
|
|
#endif
|
2006-07-01 16:51:10 +00:00
|
|
|
};
|
2006-04-04 03:51:47 +00:00
|
|
|
unsigned diff, regind, elm, bit;
|
|
|
|
|
|
|
|
assert(run->magic == ARENA_RUN_MAGIC);
|
2006-07-01 16:51:10 +00:00
|
|
|
assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
|
|
|
|
>= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
|
2006-04-04 03:51:47 +00:00
|
|
|
|
|
|
|
/*
|
2006-07-01 16:51:10 +00:00
|
|
|
* Avoid doing division with a variable divisor if possible. Using
|
|
|
|
* actual division here can reduce allocator throughput by over 20%!
|
2006-04-04 03:51:47 +00:00
|
|
|
*/
|
|
|
|
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
|
2006-07-01 16:51:10 +00:00
|
|
|
if ((size & (size - 1)) == 0) {
|
|
|
|
/*
|
|
|
|
* log2_table allows fast division of a power of two in the
|
|
|
|
* [1..128] range.
|
|
|
|
*
|
|
|
|
* (x / divisor) becomes (x >> log2_table[divisor - 1]).
|
|
|
|
*/
|
|
|
|
static const unsigned char log2_table[] = {
|
|
|
|
0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
|
|
|
|
};
|
|
|
|
|
|
|
|
if (size <= 128)
|
|
|
|
regind = (diff >> log2_table[size - 1]);
|
|
|
|
else if (size <= 32768)
|
|
|
|
regind = diff >> (8 + log2_table[(size >> 8) - 1]);
|
|
|
|
else {
|
|
|
|
/*
|
|
|
|
* The page size is too large for us to use the lookup
|
|
|
|
* table. Use real division.
|
|
|
|
*/
|
2006-04-04 03:51:47 +00:00
|
|
|
regind = diff / size;
|
2006-07-01 16:51:10 +00:00
|
|
|
}
|
|
|
|
} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
|
|
|
|
<< QUANTUM_2POW_MIN) + 2) {
|
|
|
|
regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
|
|
|
|
regind >>= SIZE_INV_SHIFT;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* size_invs isn't large enough to handle this size class, so
|
|
|
|
* calculate regind using actual division. This only happens
|
|
|
|
* if the user increases small_max via the 'S' runtime
|
|
|
|
* configuration option.
|
|
|
|
*/
|
|
|
|
regind = diff / size;
|
|
|
|
};
|
2006-09-08 17:52:15 +00:00
|
|
|
assert(diff == regind * size);
|
2006-04-04 03:51:47 +00:00
|
|
|
assert(regind < bin->nregs);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-04-04 03:51:47 +00:00
|
|
|
elm = regind >> (SIZEOF_INT_2POW + 3);
|
|
|
|
if (elm < run->regs_minelm)
|
|
|
|
run->regs_minelm = elm;
|
|
|
|
bit = regind - (elm << (SIZEOF_INT_2POW + 3));
|
|
|
|
assert((run->regs_mask[elm] & (1 << bit)) == 0);
|
|
|
|
run->regs_mask[elm] |= (1 << bit);
|
2006-07-01 16:51:10 +00:00
|
|
|
#undef SIZE_INV
|
|
|
|
#undef SIZE_INV_SHIFT
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static void
|
2007-03-23 05:05:48 +00:00
|
|
|
arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_chunk_t *chunk;
|
2007-03-23 05:05:48 +00:00
|
|
|
unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
|
|
|
|
unsigned i;
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
|
|
|
|
run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
|
|
|
|
>> pagesize_2pow);
|
|
|
|
total_pages = chunk->map[run_ind].npages;
|
|
|
|
need_pages = (size >> pagesize_2pow);
|
2007-03-23 05:05:48 +00:00
|
|
|
assert(need_pages <= total_pages);
|
|
|
|
rem_pages = total_pages - need_pages;
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
/* Split enough pages from the front of run to fit allocation size. */
|
|
|
|
map_offset = run_ind;
|
|
|
|
for (i = 0; i < need_pages; i++) {
|
|
|
|
chunk->map[map_offset + i].npages = need_pages;
|
|
|
|
chunk->map[map_offset + i].pos = i;
|
|
|
|
}
|
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Keep track of trailing unused pages for later use. */
|
|
|
|
if (rem_pages > 0) {
|
|
|
|
/* Update map for trailing pages. */
|
|
|
|
map_offset += need_pages;
|
|
|
|
chunk->map[map_offset].npages = rem_pages;
|
|
|
|
chunk->map[map_offset].pos = POS_FREE;
|
|
|
|
chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
|
|
|
|
chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
2006-03-17 09:00:27 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
chunk->pages_used += need_pages;
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static arena_chunk_t *
|
|
|
|
arena_chunk_alloc(arena_t *arena)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_chunk_t *chunk;
|
|
|
|
|
2006-12-23 00:18:51 +00:00
|
|
|
if (arena->spare != NULL) {
|
|
|
|
chunk = arena->spare;
|
|
|
|
arena->spare = NULL;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-12-23 00:18:51 +00:00
|
|
|
RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
|
|
|
|
} else {
|
2007-03-23 22:58:15 +00:00
|
|
|
chunk = (arena_chunk_t *)chunk_alloc(chunksize);
|
2006-12-23 00:18:51 +00:00
|
|
|
if (chunk == NULL)
|
|
|
|
return (NULL);
|
2007-03-23 05:05:48 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 22:58:15 +00:00
|
|
|
arena->stats.mapped += chunksize;
|
2007-03-23 05:05:48 +00:00
|
|
|
#endif
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-12-23 00:18:51 +00:00
|
|
|
chunk->arena = arena;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-12-23 00:18:51 +00:00
|
|
|
RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Claim that no pages are in use, since the header is merely
|
|
|
|
* overhead.
|
|
|
|
*/
|
|
|
|
chunk->pages_used = 0;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
chunk->max_frun_npages = chunk_npages -
|
|
|
|
arena_chunk_header_npages;
|
|
|
|
chunk->min_frun_ind = arena_chunk_header_npages;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-12-23 00:18:51 +00:00
|
|
|
/*
|
2007-03-23 05:05:48 +00:00
|
|
|
* Initialize enough of the map to support one maximal free run.
|
2006-12-23 00:18:51 +00:00
|
|
|
*/
|
2007-03-23 05:05:48 +00:00
|
|
|
chunk->map[arena_chunk_header_npages].npages = chunk_npages -
|
|
|
|
arena_chunk_header_npages;
|
|
|
|
chunk->map[arena_chunk_header_npages].pos = POS_FREE;
|
|
|
|
chunk->map[chunk_npages - 1].npages = chunk_npages -
|
|
|
|
arena_chunk_header_npages;
|
|
|
|
chunk->map[chunk_npages - 1].pos = POS_FREE;
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
return (chunk);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static void
|
2006-12-23 00:18:51 +00:00
|
|
|
arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
|
|
|
|
2006-12-23 00:18:51 +00:00
|
|
|
/*
|
|
|
|
* Remove chunk from the chunk tree, regardless of whether this chunk
|
|
|
|
* will be cached, so that the arena does not use it.
|
|
|
|
*/
|
2006-03-17 09:00:27 +00:00
|
|
|
RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-12-23 00:18:51 +00:00
|
|
|
if (opt_hint == false) {
|
2007-03-23 05:05:48 +00:00
|
|
|
if (arena->spare != NULL) {
|
2007-03-23 22:58:15 +00:00
|
|
|
chunk_dealloc((void *)arena->spare, chunksize);
|
2007-03-23 05:05:48 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 22:58:15 +00:00
|
|
|
arena->stats.mapped -= chunksize;
|
2007-03-23 05:05:48 +00:00
|
|
|
#endif
|
|
|
|
}
|
2006-12-23 00:18:51 +00:00
|
|
|
arena->spare = chunk;
|
|
|
|
} else {
|
|
|
|
assert(arena->spare == NULL);
|
2007-03-23 22:58:15 +00:00
|
|
|
chunk_dealloc((void *)chunk, chunksize);
|
2007-03-23 05:05:48 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 22:58:15 +00:00
|
|
|
arena->stats.mapped -= chunksize;
|
2007-03-23 05:05:48 +00:00
|
|
|
#endif
|
2006-12-23 00:18:51 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static void
|
2006-07-27 04:00:12 +00:00
|
|
|
arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(bin == run->bin);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
/* Promote. */
|
|
|
|
assert(run->free_min > run->nfree);
|
|
|
|
assert(run->quartile < RUN_Q100);
|
|
|
|
run->quartile++;
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
bin->stats.npromote++;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Re-file run. */
|
|
|
|
switch (run->quartile) {
|
|
|
|
case RUN_QINIT:
|
|
|
|
assert(0);
|
|
|
|
break;
|
|
|
|
case RUN_Q0:
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_before_insert(arena_bin_link(&bin->runs0), run,
|
|
|
|
link);
|
2006-03-30 20:25:52 +00:00
|
|
|
run->free_max = bin->nregs - 1;
|
|
|
|
run->free_min = (bin->nregs >> 1) + 1;
|
|
|
|
assert(run->nfree <= run->free_max);
|
|
|
|
assert(run->nfree >= run->free_min);
|
|
|
|
break;
|
|
|
|
case RUN_Q25:
|
|
|
|
qr_remove(run, link);
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_before_insert(arena_bin_link(&bin->runs25), run,
|
2006-03-30 20:25:52 +00:00
|
|
|
link);
|
|
|
|
run->free_max = ((bin->nregs >> 2) * 3) - 1;
|
|
|
|
run->free_min = (bin->nregs >> 2) + 1;
|
|
|
|
assert(run->nfree <= run->free_max);
|
|
|
|
assert(run->nfree >= run->free_min);
|
|
|
|
break;
|
|
|
|
case RUN_Q50:
|
|
|
|
qr_remove(run, link);
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_before_insert(arena_bin_link(&bin->runs50), run,
|
2006-03-30 20:25:52 +00:00
|
|
|
link);
|
|
|
|
run->free_max = (bin->nregs >> 1) - 1;
|
|
|
|
run->free_min = 1;
|
|
|
|
assert(run->nfree <= run->free_max);
|
|
|
|
assert(run->nfree >= run->free_min);
|
|
|
|
break;
|
|
|
|
case RUN_Q75:
|
2006-03-20 04:05:05 +00:00
|
|
|
/*
|
|
|
|
* Skip RUN_Q75 during promotion from RUN_Q50.
|
2006-03-26 23:37:25 +00:00
|
|
|
* Separate handling of RUN_Q75 and RUN_Q100 allows us
|
|
|
|
* to keep completely full runs in RUN_Q100, thus
|
2006-03-20 04:05:05 +00:00
|
|
|
* guaranteeing that runs in RUN_Q75 are only mostly
|
|
|
|
* full. This provides a method for avoiding a linear
|
|
|
|
* search for non-full runs, which avoids some
|
|
|
|
* pathological edge cases.
|
|
|
|
*/
|
|
|
|
run->quartile++;
|
2007-03-23 05:05:48 +00:00
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
/*
|
|
|
|
* Count as a double promotion, in order to keep
|
|
|
|
* promotions and demotions symmetric.
|
|
|
|
*/
|
|
|
|
bin->stats.npromote++;
|
|
|
|
#endif
|
2006-03-30 20:25:52 +00:00
|
|
|
/* Fall through. */
|
|
|
|
case RUN_Q100:
|
|
|
|
qr_remove(run, link);
|
|
|
|
assert(bin->runcur == run);
|
|
|
|
bin->runcur = NULL;
|
|
|
|
run->free_max = 0;
|
|
|
|
run->free_min = 0;
|
|
|
|
assert(run->nfree <= run->free_max);
|
|
|
|
assert(run->nfree >= run->free_min);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-07-27 04:00:12 +00:00
|
|
|
arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
|
2006-03-30 20:25:52 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
assert(bin == run->bin);
|
|
|
|
|
|
|
|
/* Demote. */
|
|
|
|
assert(run->free_max < run->nfree);
|
|
|
|
assert(run->quartile > RUN_QINIT);
|
|
|
|
run->quartile--;
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2006-03-30 20:25:52 +00:00
|
|
|
bin->stats.ndemote++;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Re-file run. */
|
|
|
|
switch (run->quartile) {
|
2006-03-24 22:13:49 +00:00
|
|
|
case RUN_QINIT:
|
2006-03-30 20:25:52 +00:00
|
|
|
qr_remove(run, link);
|
2006-03-17 09:00:27 +00:00
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
bin->stats.curruns--;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
2006-03-17 09:00:27 +00:00
|
|
|
if (bin->runcur == run)
|
|
|
|
bin->runcur = NULL;
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef MALLOC_DEBUG
|
2006-03-17 09:00:27 +00:00
|
|
|
run->magic = 0;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_run_dalloc(arena, run, bin->run_size);
|
|
|
|
break;
|
2006-03-20 04:05:05 +00:00
|
|
|
case RUN_Q0:
|
2006-03-30 20:25:52 +00:00
|
|
|
qr_remove(run, link);
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_before_insert(arena_bin_link(&bin->runs0), run,
|
|
|
|
link);
|
2006-03-30 20:25:52 +00:00
|
|
|
run->free_max = bin->nregs - 1;
|
|
|
|
run->free_min = (bin->nregs >> 1) + 1;
|
2006-03-24 22:13:49 +00:00
|
|
|
assert(run->nfree <= run->free_max);
|
|
|
|
assert(run->nfree >= run->free_min);
|
2006-03-17 09:00:27 +00:00
|
|
|
break;
|
2006-03-20 04:05:05 +00:00
|
|
|
case RUN_Q25:
|
2006-03-30 20:25:52 +00:00
|
|
|
qr_remove(run, link);
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_before_insert(arena_bin_link(&bin->runs25), run,
|
2006-03-24 00:28:08 +00:00
|
|
|
link);
|
2006-03-30 20:25:52 +00:00
|
|
|
run->free_max = ((bin->nregs >> 2) * 3) - 1;
|
|
|
|
run->free_min = (bin->nregs >> 2) + 1;
|
2006-03-24 22:13:49 +00:00
|
|
|
assert(run->nfree <= run->free_max);
|
|
|
|
assert(run->nfree >= run->free_min);
|
2006-03-17 09:00:27 +00:00
|
|
|
break;
|
2006-03-20 04:05:05 +00:00
|
|
|
case RUN_Q50:
|
2006-03-30 20:25:52 +00:00
|
|
|
qr_remove(run, link);
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_before_insert(arena_bin_link(&bin->runs50), run,
|
2006-03-24 00:28:08 +00:00
|
|
|
link);
|
2006-03-30 20:25:52 +00:00
|
|
|
run->free_max = (bin->nregs >> 1) - 1;
|
2006-03-20 04:05:05 +00:00
|
|
|
run->free_min = 1;
|
2006-03-24 22:13:49 +00:00
|
|
|
assert(run->nfree <= run->free_max);
|
|
|
|
assert(run->nfree >= run->free_min);
|
2006-03-20 04:05:05 +00:00
|
|
|
break;
|
2006-03-17 09:00:27 +00:00
|
|
|
case RUN_Q75:
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_before_insert(arena_bin_link(&bin->runs75), run,
|
2006-03-24 00:28:08 +00:00
|
|
|
link);
|
2006-03-30 20:25:52 +00:00
|
|
|
run->free_max = (bin->nregs >> 2) - 1;
|
2006-03-17 09:00:27 +00:00
|
|
|
run->free_min = 1;
|
2006-03-24 22:13:49 +00:00
|
|
|
assert(run->nfree <= run->free_max);
|
|
|
|
assert(run->nfree >= run->free_min);
|
2006-03-17 09:00:27 +00:00
|
|
|
break;
|
|
|
|
case RUN_Q100:
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
break;
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static arena_run_t *
|
2007-03-23 05:05:48 +00:00
|
|
|
arena_run_alloc(arena_t *arena, size_t size)
|
2006-03-17 09:00:27 +00:00
|
|
|
{
|
|
|
|
arena_chunk_t *chunk;
|
2007-03-23 05:05:48 +00:00
|
|
|
arena_run_t *run;
|
|
|
|
unsigned need_npages, limit_pages, compl_need_npages;
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
assert(size <= (chunksize - (arena_chunk_header_npages <<
|
2007-03-23 05:05:48 +00:00
|
|
|
pagesize_2pow)));
|
2007-03-23 22:58:15 +00:00
|
|
|
assert((size & pagesize_mask) == 0);
|
2006-03-17 09:00:27 +00:00
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/*
|
2007-03-23 05:05:48 +00:00
|
|
|
* Search through arena's chunks in address order for a free run that is
|
|
|
|
* large enough. Look for the first fit.
|
2006-03-26 23:37:25 +00:00
|
|
|
*/
|
2007-03-23 05:05:48 +00:00
|
|
|
need_npages = (size >> pagesize_2pow);
|
|
|
|
limit_pages = chunk_npages - arena_chunk_header_npages;
|
|
|
|
compl_need_npages = limit_pages - need_npages;
|
2006-03-17 09:00:27 +00:00
|
|
|
RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
|
2007-03-23 05:05:48 +00:00
|
|
|
/*
|
|
|
|
* Avoid searching this chunk if there are not enough
|
|
|
|
* contiguous free pages for there to possibly be a large
|
|
|
|
* enough free run.
|
|
|
|
*/
|
|
|
|
if (chunk->pages_used <= compl_need_npages &&
|
|
|
|
need_npages <= chunk->max_frun_npages) {
|
|
|
|
arena_chunk_map_t *mapelm;
|
|
|
|
unsigned i;
|
2007-03-23 22:58:15 +00:00
|
|
|
unsigned max_frun_npages = 0;
|
|
|
|
unsigned min_frun_ind = chunk_npages;
|
2007-03-23 05:05:48 +00:00
|
|
|
|
|
|
|
assert(chunk->min_frun_ind >=
|
|
|
|
arena_chunk_header_npages);
|
|
|
|
for (i = chunk->min_frun_ind; i < chunk_npages;) {
|
|
|
|
mapelm = &chunk->map[i];
|
|
|
|
if (mapelm->pos == POS_FREE) {
|
|
|
|
if (mapelm->npages >= need_npages) {
|
|
|
|
run = (arena_run_t *)
|
|
|
|
((uintptr_t)chunk + (i <<
|
|
|
|
pagesize_2pow));
|
|
|
|
/* Update page map. */
|
|
|
|
arena_run_split(arena, run,
|
|
|
|
size);
|
|
|
|
return (run);
|
|
|
|
}
|
|
|
|
if (mapelm->npages >
|
|
|
|
max_frun_npages) {
|
|
|
|
max_frun_npages =
|
|
|
|
mapelm->npages;
|
|
|
|
}
|
|
|
|
if (i < min_frun_ind) {
|
|
|
|
min_frun_ind = i;
|
|
|
|
if (i < chunk->min_frun_ind)
|
|
|
|
chunk->min_frun_ind = i;
|
|
|
|
}
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
2007-03-23 05:05:48 +00:00
|
|
|
i += mapelm->npages;
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
2007-03-23 05:05:48 +00:00
|
|
|
/*
|
|
|
|
* Search failure. Reset cached chunk->max_frun_npages.
|
|
|
|
* chunk->min_frun_ind was already reset above (if
|
|
|
|
* necessary).
|
|
|
|
*/
|
|
|
|
chunk->max_frun_npages = max_frun_npages;
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
/* No usable runs. Allocate a new chunk, then try again. */
|
2007-03-23 05:05:48 +00:00
|
|
|
chunk = arena_chunk_alloc(arena);
|
|
|
|
if (chunk == NULL)
|
2006-03-17 09:00:27 +00:00
|
|
|
return (NULL);
|
2007-03-23 05:05:48 +00:00
|
|
|
run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
|
|
|
|
pagesize_2pow));
|
|
|
|
/* Update page map. */
|
|
|
|
arena_run_split(arena, run, size);
|
|
|
|
return (run);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static void
|
|
|
|
arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_chunk_t *chunk;
|
2007-03-23 05:05:48 +00:00
|
|
|
unsigned run_ind, run_pages;
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
|
2007-03-23 05:05:48 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
|
|
|
|
>> pagesize_2pow);
|
2007-03-23 22:58:15 +00:00
|
|
|
assert(run_ind >= arena_chunk_header_npages);
|
|
|
|
assert(run_ind < (chunksize >> pagesize_2pow));
|
2006-03-17 09:00:27 +00:00
|
|
|
run_pages = (size >> pagesize_2pow);
|
2007-03-23 05:05:48 +00:00
|
|
|
assert(run_pages == chunk->map[run_ind].npages);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Subtract pages from count of pages used in chunk. */
|
|
|
|
chunk->pages_used -= run_pages;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Mark run as deallocated. */
|
2007-03-23 05:05:48 +00:00
|
|
|
assert(chunk->map[run_ind].npages == run_pages);
|
|
|
|
chunk->map[run_ind].pos = POS_FREE;
|
|
|
|
assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
|
|
|
|
chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/*
|
2006-03-26 23:37:25 +00:00
|
|
|
* Tell the kernel that we don't need the data in this run, but only if
|
|
|
|
* requested via runtime configuration.
|
2006-03-17 09:00:27 +00:00
|
|
|
*/
|
2007-03-23 05:05:48 +00:00
|
|
|
if (opt_hint)
|
2006-03-17 09:00:27 +00:00
|
|
|
madvise(run, size, MADV_FREE);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Try to coalesce with neighboring runs. */
|
|
|
|
if (run_ind > arena_chunk_header_npages &&
|
|
|
|
chunk->map[run_ind - 1].pos == POS_FREE) {
|
|
|
|
unsigned prev_npages;
|
|
|
|
|
|
|
|
/* Coalesce with previous run. */
|
|
|
|
prev_npages = chunk->map[run_ind - 1].npages;
|
|
|
|
run_ind -= prev_npages;
|
|
|
|
assert(chunk->map[run_ind].npages == prev_npages);
|
|
|
|
assert(chunk->map[run_ind].pos == POS_FREE);
|
|
|
|
run_pages += prev_npages;
|
|
|
|
|
|
|
|
chunk->map[run_ind].npages = run_pages;
|
|
|
|
assert(chunk->map[run_ind].pos == POS_FREE);
|
|
|
|
chunk->map[run_ind + run_pages - 1].npages = run_pages;
|
|
|
|
assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
if (run_ind + run_pages < chunk_npages &&
|
|
|
|
chunk->map[run_ind + run_pages].pos == POS_FREE) {
|
|
|
|
unsigned next_npages;
|
|
|
|
|
|
|
|
/* Coalesce with next run. */
|
|
|
|
next_npages = chunk->map[run_ind + run_pages].npages;
|
|
|
|
run_pages += next_npages;
|
|
|
|
assert(chunk->map[run_ind + run_pages - 1].npages ==
|
|
|
|
next_npages);
|
|
|
|
assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
|
|
|
|
|
|
|
|
chunk->map[run_ind].npages = run_pages;
|
|
|
|
chunk->map[run_ind].pos = POS_FREE;
|
|
|
|
chunk->map[run_ind + run_pages - 1].npages = run_pages;
|
|
|
|
assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
if (chunk->map[run_ind].npages > chunk->max_frun_npages)
|
|
|
|
chunk->max_frun_npages = chunk->map[run_ind].npages;
|
|
|
|
if (run_ind < chunk->min_frun_ind)
|
|
|
|
chunk->min_frun_ind = run_ind;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Deallocate chunk if it is now completely unused. */
|
|
|
|
if (chunk->pages_used == 0)
|
2006-12-23 00:18:51 +00:00
|
|
|
arena_chunk_dealloc(arena, chunk);
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static arena_run_t *
|
2006-07-27 04:00:12 +00:00
|
|
|
arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
|
2006-03-17 09:00:27 +00:00
|
|
|
{
|
|
|
|
arena_run_t *run;
|
|
|
|
unsigned i, remainder;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Look for a usable run. */
|
2007-01-31 22:54:19 +00:00
|
|
|
if ((run = qr_next(arena_bin_link(&bin->runs50), link))
|
|
|
|
!= arena_bin_link(&bin->runs50)
|
|
|
|
|| (run = qr_next(arena_bin_link(&bin->runs25), link))
|
|
|
|
!= arena_bin_link(&bin->runs25)
|
|
|
|
|| (run = qr_next(arena_bin_link(&bin->runs0), link))
|
|
|
|
!= arena_bin_link(&bin->runs0)
|
|
|
|
|| (run = qr_next(arena_bin_link(&bin->runs75), link))
|
|
|
|
!= arena_bin_link(&bin->runs75)) {
|
2006-03-20 04:05:05 +00:00
|
|
|
/* run is guaranteed to have available space. */
|
2006-03-17 09:00:27 +00:00
|
|
|
qr_remove(run, link);
|
|
|
|
return (run);
|
|
|
|
}
|
2006-03-26 23:37:25 +00:00
|
|
|
/* No existing runs have any space available. */
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Allocate a new run. */
|
2007-03-23 05:05:48 +00:00
|
|
|
run = arena_run_alloc(arena, bin->run_size);
|
2006-03-17 09:00:27 +00:00
|
|
|
if (run == NULL)
|
|
|
|
return (NULL);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Initialize run internals. */
|
|
|
|
qr_new(run, link);
|
|
|
|
run->bin = bin;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
for (i = 0; i < bin->regs_mask_nelms; i++)
|
2006-03-17 09:00:27 +00:00
|
|
|
run->regs_mask[i] = UINT_MAX;
|
2006-03-30 20:25:52 +00:00
|
|
|
remainder = bin->nregs % (1 << (SIZEOF_INT_2POW + 3));
|
2006-03-17 09:00:27 +00:00
|
|
|
if (remainder != 0) {
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
/* The last element has spare bits that need to be unset. */
|
2006-03-30 20:25:52 +00:00
|
|
|
run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
|
2006-03-17 09:00:27 +00:00
|
|
|
- remainder));
|
|
|
|
i++;
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
run->regs_minelm = 0;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
run->nfree = bin->nregs;
|
2006-03-24 22:13:49 +00:00
|
|
|
run->quartile = RUN_QINIT;
|
2006-03-17 09:00:27 +00:00
|
|
|
run->free_max = bin->nregs;
|
|
|
|
run->free_min = ((bin->nregs >> 2) * 3) + 1;
|
|
|
|
#ifdef MALLOC_DEBUG
|
|
|
|
run->magic = ARENA_RUN_MAGIC;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2006-03-17 09:00:27 +00:00
|
|
|
bin->stats.nruns++;
|
|
|
|
bin->stats.curruns++;
|
|
|
|
if (bin->stats.curruns > bin->stats.highruns)
|
|
|
|
bin->stats.highruns = bin->stats.curruns;
|
2003-10-25 12:56:51 +00:00
|
|
|
#endif
|
2006-03-17 09:00:27 +00:00
|
|
|
return (run);
|
|
|
|
}
|
1996-09-23 19:26:39 +00:00
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/* bin->runcur must have space available before this function is called. */
|
2006-03-17 09:00:27 +00:00
|
|
|
static inline void *
|
2006-07-27 04:00:12 +00:00
|
|
|
arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
|
2006-03-17 09:00:27 +00:00
|
|
|
{
|
|
|
|
void *ret;
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(run->magic == ARENA_RUN_MAGIC);
|
2006-03-20 04:05:05 +00:00
|
|
|
assert(run->nfree > 0);
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-04-04 03:51:47 +00:00
|
|
|
ret = arena_run_reg_alloc(run, bin);
|
|
|
|
assert(ret != NULL);
|
2006-03-17 09:00:27 +00:00
|
|
|
run->nfree--;
|
|
|
|
if (run->nfree < run->free_min) {
|
|
|
|
/* Promote run to higher fullness quartile. */
|
2006-07-27 04:00:12 +00:00
|
|
|
arena_bin_run_promote(arena, bin, run);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
1996-09-23 19:26:39 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
1996-09-23 19:26:39 +00:00
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
|
2006-01-13 18:38:56 +00:00
|
|
|
static void *
|
2006-07-27 04:00:12 +00:00
|
|
|
arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
|
2006-03-17 09:00:27 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100);
|
|
|
|
|
2006-07-27 04:00:12 +00:00
|
|
|
bin->runcur = arena_bin_nonfull_run_get(arena, bin);
|
2006-03-17 09:00:27 +00:00
|
|
|
if (bin->runcur == NULL)
|
|
|
|
return (NULL);
|
|
|
|
assert(bin->runcur->magic == ARENA_RUN_MAGIC);
|
2006-03-24 22:13:49 +00:00
|
|
|
assert(bin->runcur->nfree > 0);
|
2006-03-17 09:00:27 +00:00
|
|
|
|
2006-07-27 04:00:12 +00:00
|
|
|
return (arena_bin_malloc_easy(arena, bin, bin->runcur));
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
|
|
|
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
/*
|
|
|
|
* Calculate bin->run_size such that it meets the following constraints:
|
|
|
|
*
|
|
|
|
* *) bin->run_size >= min_run_size
|
|
|
|
* *) bin->run_size <= arena_maxclass
|
|
|
|
* *) bin->run_size <= RUN_MAX_SMALL
|
2007-03-23 05:05:48 +00:00
|
|
|
* *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
*
|
|
|
|
* bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
|
|
|
|
* also calculated here, since these settings are all interdependent.
|
|
|
|
*/
|
|
|
|
static size_t
|
|
|
|
arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
|
|
|
{
|
|
|
|
size_t try_run_size, good_run_size;
|
2007-03-23 22:58:15 +00:00
|
|
|
unsigned good_nregs, good_mask_nelms, good_reg0_offset;
|
|
|
|
unsigned try_nregs, try_mask_nelms, try_reg0_offset;
|
2007-03-23 05:05:48 +00:00
|
|
|
float max_ovrhd = RUN_MAX_OVRHD;
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
assert(min_run_size >= pagesize);
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
assert(min_run_size <= arena_maxclass);
|
|
|
|
assert(min_run_size <= RUN_MAX_SMALL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Calculate known-valid settings before entering the run_size
|
|
|
|
* expansion loop, so that the first part of the loop always copies
|
|
|
|
* valid settings.
|
|
|
|
*
|
|
|
|
* The do..while loop iteratively reduces the number of regions until
|
|
|
|
* the run header and the regions no longer overlap. A closed formula
|
|
|
|
* would be quite messy, since there is an interdependency between the
|
|
|
|
* header's mask length and the number of regions.
|
|
|
|
*/
|
|
|
|
try_run_size = min_run_size;
|
|
|
|
try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
|
|
|
|
+ 1; /* Counter-act the first line of the loop. */
|
|
|
|
do {
|
|
|
|
try_nregs--;
|
|
|
|
try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
|
|
|
|
((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
|
|
|
|
try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
|
|
|
|
} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
|
|
|
|
> try_reg0_offset);
|
|
|
|
|
|
|
|
/* run_size expansion loop. */
|
|
|
|
do {
|
|
|
|
/*
|
|
|
|
* Copy valid settings before trying more aggressive settings.
|
|
|
|
*/
|
|
|
|
good_run_size = try_run_size;
|
|
|
|
good_nregs = try_nregs;
|
|
|
|
good_mask_nelms = try_mask_nelms;
|
|
|
|
good_reg0_offset = try_reg0_offset;
|
|
|
|
|
|
|
|
/* Try more aggressive settings. */
|
2007-03-23 05:05:48 +00:00
|
|
|
try_run_size += pagesize;
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
try_nregs = ((try_run_size - sizeof(arena_run_t)) /
|
|
|
|
bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
|
|
|
|
do {
|
|
|
|
try_nregs--;
|
|
|
|
try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
|
|
|
|
((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
|
|
|
|
1 : 0);
|
|
|
|
try_reg0_offset = try_run_size - (try_nregs *
|
|
|
|
bin->reg_size);
|
|
|
|
} while (sizeof(arena_run_t) + (sizeof(unsigned) *
|
|
|
|
(try_mask_nelms - 1)) > try_reg0_offset);
|
|
|
|
} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
|
2007-03-23 05:05:48 +00:00
|
|
|
&& max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
&& ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
|
2007-03-23 05:05:48 +00:00
|
|
|
max_ovrhd);
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
|
|
|
|
assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
|
|
|
|
<= good_reg0_offset);
|
|
|
|
assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
|
|
|
|
|
|
|
|
/* Copy final settings. */
|
|
|
|
bin->run_size = good_run_size;
|
|
|
|
bin->nregs = good_nregs;
|
|
|
|
bin->regs_mask_nelms = good_mask_nelms;
|
|
|
|
bin->reg0_offset = good_reg0_offset;
|
|
|
|
|
|
|
|
return (good_run_size);
|
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
static void *
|
|
|
|
arena_malloc(arena_t *arena, size_t size)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
|
|
|
void *ret;
|
1996-10-26 08:19:07 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
assert(arena != NULL);
|
|
|
|
assert(arena->magic == ARENA_MAGIC);
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(size != 0);
|
|
|
|
assert(QUANTUM_CEILING(size) <= arena_maxclass);
|
1996-09-23 19:26:39 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
if (size <= bin_maxclass) {
|
|
|
|
arena_bin_t *bin;
|
|
|
|
arena_run_t *run;
|
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/* Small allocation. */
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
if (size < small_min) {
|
2006-03-26 23:37:25 +00:00
|
|
|
/* Tiny. */
|
2006-03-17 09:00:27 +00:00
|
|
|
size = pow2_ceil(size);
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
|
2007-01-31 22:54:19 +00:00
|
|
|
1)))];
|
2006-07-27 04:00:12 +00:00
|
|
|
#if (!defined(NDEBUG) || defined(MALLOC_STATS))
|
2006-03-26 23:37:25 +00:00
|
|
|
/*
|
2006-07-27 04:00:12 +00:00
|
|
|
* Bin calculation is always correct, but we may need
|
|
|
|
* to fix size for the purposes of assertions and/or
|
|
|
|
* stats accuracy.
|
2006-03-17 09:00:27 +00:00
|
|
|
*/
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
if (size < (1 << TINY_MIN_2POW))
|
|
|
|
size = (1 << TINY_MIN_2POW);
|
2006-03-17 09:00:27 +00:00
|
|
|
#endif
|
|
|
|
} else if (size <= small_max) {
|
2006-03-26 23:37:25 +00:00
|
|
|
/* Quantum-spaced. */
|
2006-03-17 09:00:27 +00:00
|
|
|
size = QUANTUM_CEILING(size);
|
|
|
|
bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
|
|
|
|
- 1];
|
|
|
|
} else {
|
2006-03-26 23:37:25 +00:00
|
|
|
/* Sub-page. */
|
2006-03-17 09:00:27 +00:00
|
|
|
size = pow2_ceil(size);
|
|
|
|
bin = &arena->bins[ntbins + nqbins
|
2007-01-31 22:54:19 +00:00
|
|
|
+ (ffs((int)(size >> opt_small_max_2pow)) - 2)];
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
|
|
|
assert(size == bin->reg_size);
|
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
malloc_mutex_lock(&arena->mtx);
|
2006-03-20 04:05:05 +00:00
|
|
|
if ((run = bin->runcur) != NULL)
|
2006-07-27 04:00:12 +00:00
|
|
|
ret = arena_bin_malloc_easy(arena, bin, run);
|
2006-03-17 09:00:27 +00:00
|
|
|
else
|
2006-07-27 04:00:12 +00:00
|
|
|
ret = arena_bin_malloc_hard(arena, bin);
|
2006-03-17 09:00:27 +00:00
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
if (ret == NULL) {
|
|
|
|
malloc_mutex_unlock(&arena->mtx);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
bin->stats.nrequests++;
|
2007-03-23 05:05:48 +00:00
|
|
|
arena->stats.nmalloc_small++;
|
2007-03-23 22:58:15 +00:00
|
|
|
arena->stats.allocated_small += size;
|
2006-03-17 09:00:27 +00:00
|
|
|
#endif
|
|
|
|
} else {
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Large allocation. */
|
|
|
|
size = PAGE_CEILING(size);
|
2006-03-30 20:25:52 +00:00
|
|
|
malloc_mutex_lock(&arena->mtx);
|
2007-03-23 05:05:48 +00:00
|
|
|
ret = (void *)arena_run_alloc(arena, size);
|
2007-03-23 22:58:15 +00:00
|
|
|
if (ret == NULL) {
|
|
|
|
malloc_mutex_unlock(&arena->mtx);
|
|
|
|
return (NULL);
|
|
|
|
}
|
2006-03-30 20:25:52 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 05:05:48 +00:00
|
|
|
arena->stats.nmalloc_large++;
|
2007-03-23 22:58:15 +00:00
|
|
|
arena->stats.allocated_large += size;
|
2006-03-30 20:25:52 +00:00
|
|
|
#endif
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
malloc_mutex_unlock(&arena->mtx);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
if (opt_junk)
|
|
|
|
memset(ret, 0xa5, size);
|
|
|
|
else if (opt_zero)
|
|
|
|
memset(ret, 0, size);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
|
|
|
|
unsigned npages)
|
|
|
|
{
|
|
|
|
unsigned i;
|
|
|
|
|
|
|
|
assert(npages > 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Modifiy the map such that arena_run_dalloc() sees the run as
|
|
|
|
* separately allocated.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < npages; i++) {
|
|
|
|
chunk->map[pageind + i].npages = npages;
|
|
|
|
chunk->map[pageind + i].pos = i;
|
|
|
|
}
|
|
|
|
arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
|
|
|
|
pagesize_2pow)), npages << pagesize_2pow);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Only handles large allocations that require more than page alignment. */
|
|
|
|
static void *
|
|
|
|
arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
|
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
size_t offset;
|
|
|
|
arena_chunk_t *chunk;
|
|
|
|
unsigned pageind, i, npages;
|
|
|
|
|
|
|
|
assert((size & pagesize_mask) == 0);
|
|
|
|
assert((alignment & pagesize_mask) == 0);
|
|
|
|
|
|
|
|
npages = size >> pagesize_2pow;
|
|
|
|
|
|
|
|
malloc_mutex_lock(&arena->mtx);
|
|
|
|
ret = (void *)arena_run_alloc(arena, alloc_size);
|
|
|
|
if (ret == NULL) {
|
|
|
|
malloc_mutex_unlock(&arena->mtx);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
|
|
|
|
|
|
|
|
offset = (uintptr_t)ret & (alignment - 1);
|
|
|
|
assert((offset & pagesize_mask) == 0);
|
|
|
|
assert(offset < alloc_size);
|
|
|
|
if (offset == 0) {
|
|
|
|
pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
|
|
|
|
pagesize_2pow);
|
|
|
|
|
|
|
|
/* Update the map for the run to be kept. */
|
|
|
|
for (i = 0; i < npages; i++) {
|
|
|
|
chunk->map[pageind + i].npages = npages;
|
|
|
|
assert(chunk->map[pageind + i].pos == i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Trim trailing space. */
|
|
|
|
arena_palloc_trim(arena, chunk, pageind + npages,
|
|
|
|
(alloc_size - size) >> pagesize_2pow);
|
|
|
|
} else {
|
|
|
|
size_t leadsize, trailsize;
|
|
|
|
|
|
|
|
leadsize = alignment - offset;
|
|
|
|
ret = (void *)((uintptr_t)ret + leadsize);
|
|
|
|
pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
|
|
|
|
pagesize_2pow);
|
|
|
|
|
|
|
|
/* Update the map for the run to be kept. */
|
|
|
|
for (i = 0; i < npages; i++) {
|
|
|
|
chunk->map[pageind + i].npages = npages;
|
|
|
|
chunk->map[pageind + i].pos = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Trim leading space. */
|
|
|
|
arena_palloc_trim(arena, chunk, pageind - (leadsize >>
|
|
|
|
pagesize_2pow), leadsize >> pagesize_2pow);
|
|
|
|
|
|
|
|
trailsize = alloc_size - leadsize - size;
|
|
|
|
if (trailsize != 0) {
|
|
|
|
/* Trim trailing space. */
|
|
|
|
assert(trailsize < alloc_size);
|
|
|
|
arena_palloc_trim(arena, chunk, pageind + npages,
|
|
|
|
trailsize >> pagesize_2pow);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
arena->stats.nmalloc_large++;
|
|
|
|
arena->stats.allocated_large += size;
|
|
|
|
#endif
|
|
|
|
malloc_mutex_unlock(&arena->mtx);
|
|
|
|
|
|
|
|
if (opt_junk)
|
2006-03-19 18:28:06 +00:00
|
|
|
memset(ret, 0xa5, size);
|
2007-03-23 22:58:15 +00:00
|
|
|
else if (opt_zero)
|
2006-03-19 18:28:06 +00:00
|
|
|
memset(ret, 0, size);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/* Return the size of the allocation pointed to by ptr. */
|
|
|
|
static size_t
|
2006-03-30 20:25:52 +00:00
|
|
|
arena_salloc(const void *ptr)
|
2006-03-26 23:37:25 +00:00
|
|
|
{
|
|
|
|
size_t ret;
|
|
|
|
arena_chunk_t *chunk;
|
2007-03-23 22:58:15 +00:00
|
|
|
arena_chunk_map_t *mapelm;
|
|
|
|
unsigned pageind;
|
2006-03-26 23:37:25 +00:00
|
|
|
|
|
|
|
assert(ptr != NULL);
|
|
|
|
assert(CHUNK_ADDR2BASE(ptr) != ptr);
|
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
/*
|
|
|
|
* No arena data structures that we query here can change in a way that
|
|
|
|
* affects this function, so we don't need to lock.
|
|
|
|
*/
|
2006-03-26 23:37:25 +00:00
|
|
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
|
|
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
|
2007-03-23 22:58:15 +00:00
|
|
|
mapelm = &chunk->map[pageind];
|
|
|
|
if (mapelm->pos != 0 || ptr != (void *)((uintptr_t)chunk) + (pageind <<
|
2007-03-23 05:05:48 +00:00
|
|
|
pagesize_2pow)) {
|
2006-03-26 23:37:25 +00:00
|
|
|
arena_run_t *run;
|
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
pageind -= mapelm->pos;
|
2006-03-26 23:37:25 +00:00
|
|
|
|
|
|
|
run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
|
|
|
|
assert(run->magic == ARENA_RUN_MAGIC);
|
|
|
|
ret = run->bin->reg_size;
|
|
|
|
} else
|
2007-03-23 22:58:15 +00:00
|
|
|
ret = mapelm->npages << pagesize_2pow;
|
2006-03-26 23:37:25 +00:00
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2006-03-19 18:28:06 +00:00
|
|
|
static void *
|
2006-03-30 20:25:52 +00:00
|
|
|
arena_ralloc(void *ptr, size_t size, size_t oldsize)
|
2006-03-19 18:28:06 +00:00
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/* Avoid moving the allocation if the size class would not change. */
|
2006-03-19 18:28:06 +00:00
|
|
|
if (size < small_min) {
|
|
|
|
if (oldsize < small_min &&
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
|
|
|
|
== ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
|
2006-03-19 18:28:06 +00:00
|
|
|
goto IN_PLACE;
|
|
|
|
} else if (size <= small_max) {
|
2006-03-26 23:37:25 +00:00
|
|
|
if (oldsize >= small_min && oldsize <= small_max &&
|
2006-03-19 18:28:06 +00:00
|
|
|
(QUANTUM_CEILING(size) >> opt_quantum_2pow)
|
|
|
|
== (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
|
|
|
|
goto IN_PLACE;
|
|
|
|
} else {
|
2007-03-23 05:05:48 +00:00
|
|
|
/*
|
|
|
|
* We make no attempt to resize runs here, though it would be
|
|
|
|
* possible to do so.
|
|
|
|
*/
|
|
|
|
if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
|
2006-03-19 18:28:06 +00:00
|
|
|
goto IN_PLACE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2006-03-26 23:37:25 +00:00
|
|
|
* If we get here, then size and oldsize are different enough that we
|
|
|
|
* need to use a different size class. In that case, fall back to
|
|
|
|
* allocating new space and copying.
|
2006-03-19 18:28:06 +00:00
|
|
|
*/
|
2006-03-30 20:25:52 +00:00
|
|
|
ret = arena_malloc(choose_arena(), size);
|
2006-03-19 18:28:06 +00:00
|
|
|
if (ret == NULL)
|
|
|
|
return (NULL);
|
|
|
|
|
|
|
|
if (size < oldsize)
|
|
|
|
memcpy(ret, ptr, size);
|
|
|
|
else
|
|
|
|
memcpy(ret, ptr, oldsize);
|
|
|
|
idalloc(ptr);
|
2006-01-13 18:38:56 +00:00
|
|
|
return (ret);
|
2006-03-19 18:28:06 +00:00
|
|
|
IN_PLACE:
|
|
|
|
if (opt_junk && size < oldsize)
|
|
|
|
memset(&((char *)ptr)[size], 0x5a, oldsize - size);
|
|
|
|
else if (opt_zero && size > oldsize)
|
|
|
|
memset(&((char *)ptr)[size], 0, size - oldsize);
|
|
|
|
return (ptr);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
1996-09-23 19:26:39 +00:00
|
|
|
|
1995-09-16 09:28:13 +00:00
|
|
|
static void
|
2006-03-30 20:25:52 +00:00
|
|
|
arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
|
1995-09-16 09:28:13 +00:00
|
|
|
{
|
2006-03-17 09:00:27 +00:00
|
|
|
unsigned pageind;
|
2007-03-23 22:58:15 +00:00
|
|
|
arena_chunk_map_t *mapelm;
|
2006-03-17 09:00:27 +00:00
|
|
|
size_t size;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
assert(arena != NULL);
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(arena->magic == ARENA_MAGIC);
|
2006-03-30 20:25:52 +00:00
|
|
|
assert(chunk->arena == arena);
|
2006-01-13 18:38:56 +00:00
|
|
|
assert(ptr != NULL);
|
|
|
|
assert(CHUNK_ADDR2BASE(ptr) != ptr);
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
|
2007-03-23 22:58:15 +00:00
|
|
|
mapelm = &chunk->map[pageind];
|
|
|
|
if (mapelm->pos != 0 || ptr != (void *)((uintptr_t)chunk) + (pageind <<
|
2007-03-23 05:05:48 +00:00
|
|
|
pagesize_2pow)) {
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_run_t *run;
|
2006-03-30 20:25:52 +00:00
|
|
|
arena_bin_t *bin;
|
2000-11-26 10:30:18 +00:00
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/* Small allocation. */
|
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
pageind -= mapelm->pos;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
|
|
|
|
assert(run->magic == ARENA_RUN_MAGIC);
|
2006-03-30 20:25:52 +00:00
|
|
|
bin = run->bin;
|
|
|
|
size = bin->reg_size;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
if (opt_junk)
|
|
|
|
memset(ptr, 0x5a, size);
|
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
malloc_mutex_lock(&arena->mtx);
|
2006-04-04 03:51:47 +00:00
|
|
|
arena_run_reg_dalloc(run, bin, ptr, size);
|
2006-03-17 09:00:27 +00:00
|
|
|
run->nfree++;
|
|
|
|
if (run->nfree > run->free_max) {
|
|
|
|
/* Demote run to lower fullness quartile. */
|
2006-07-27 04:00:12 +00:00
|
|
|
arena_bin_run_demote(arena, bin, run);
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
2007-03-23 05:05:48 +00:00
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
arena->stats.allocated_small -= size;
|
|
|
|
arena->stats.ndalloc_small++;
|
|
|
|
#endif
|
2006-03-17 09:00:27 +00:00
|
|
|
} else {
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Large allocation. */
|
2006-03-26 23:37:25 +00:00
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
size = mapelm->npages << pagesize_2pow;
|
2007-03-23 05:05:48 +00:00
|
|
|
assert((((uintptr_t)ptr) & pagesize_mask) == 0);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-03-19 18:28:06 +00:00
|
|
|
if (opt_junk)
|
2006-03-17 09:00:27 +00:00
|
|
|
memset(ptr, 0x5a, size);
|
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
malloc_mutex_lock(&arena->mtx);
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_run_dalloc(arena, (arena_run_t *)ptr, size);
|
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 05:05:48 +00:00
|
|
|
arena->stats.allocated_large -= size;
|
|
|
|
arena->stats.ndalloc_large++;
|
2006-03-17 09:00:27 +00:00
|
|
|
#endif
|
2007-03-23 05:05:48 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
malloc_mutex_unlock(&arena->mtx);
|
1995-09-16 09:28:13 +00:00
|
|
|
}
|
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
static bool
|
|
|
|
arena_new(arena_t *arena)
|
1994-05-27 05:00:24 +00:00
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
unsigned i;
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_bin_t *bin;
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
size_t pow2_size, prev_run_size;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
malloc_mutex_init(&arena->mtx);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
memset(&arena->stats, 0, sizeof(arena_stats_t));
|
|
|
|
#endif
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Initialize chunks. */
|
2006-01-13 18:38:56 +00:00
|
|
|
RB_INIT(&arena->chunks);
|
2006-12-23 00:18:51 +00:00
|
|
|
arena->spare = NULL;
|
1997-05-30 20:39:32 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Initialize bins. */
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
prev_run_size = pagesize;
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
/* (2^n)-spaced tiny bins. */
|
|
|
|
for (i = 0; i < ntbins; i++) {
|
|
|
|
bin = &arena->bins[i];
|
|
|
|
bin->runcur = NULL;
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_new(arena_bin_link(&bin->runs0), link);
|
|
|
|
qr_new(arena_bin_link(&bin->runs25), link);
|
|
|
|
qr_new(arena_bin_link(&bin->runs50), link);
|
|
|
|
qr_new(arena_bin_link(&bin->runs75), link);
|
2006-03-17 09:00:27 +00:00
|
|
|
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
bin->reg_size = (1 << (TINY_MIN_2POW + i));
|
2006-03-17 09:00:27 +00:00
|
|
|
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
|
2006-01-16 05:13:49 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2006-03-17 09:00:27 +00:00
|
|
|
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
|
|
|
|
#endif
|
|
|
|
}
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Quantum-spaced bins. */
|
|
|
|
for (; i < ntbins + nqbins; i++) {
|
|
|
|
bin = &arena->bins[i];
|
|
|
|
bin->runcur = NULL;
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_new(arena_bin_link(&bin->runs0), link);
|
|
|
|
qr_new(arena_bin_link(&bin->runs25), link);
|
|
|
|
qr_new(arena_bin_link(&bin->runs50), link);
|
|
|
|
qr_new(arena_bin_link(&bin->runs75), link);
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
bin->reg_size = quantum * (i - ntbins + 1);
|
|
|
|
|
|
|
|
pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/* (2^n)-spaced sub-page bins. */
|
|
|
|
for (; i < ntbins + nqbins + nsbins; i++) {
|
2006-03-17 09:00:27 +00:00
|
|
|
bin = &arena->bins[i];
|
|
|
|
bin->runcur = NULL;
|
2007-01-31 22:54:19 +00:00
|
|
|
qr_new(arena_bin_link(&bin->runs0), link);
|
|
|
|
qr_new(arena_bin_link(&bin->runs25), link);
|
|
|
|
qr_new(arena_bin_link(&bin->runs50), link);
|
|
|
|
qr_new(arena_bin_link(&bin->runs75), link);
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
|
|
|
|
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
2006-03-17 09:00:27 +00:00
|
|
|
}
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef MALLOC_DEBUG
|
|
|
|
arena->magic = ARENA_MAGIC;
|
|
|
|
#endif
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-27 07:46:22 +00:00
|
|
|
return (false);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
1996-10-20 13:20:57 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Create a new arena and insert it into the arenas array at index ind. */
|
|
|
|
static arena_t *
|
|
|
|
arenas_extend(unsigned ind)
|
|
|
|
{
|
|
|
|
arena_t *ret;
|
1996-10-20 13:20:57 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Allocate enough space for trailing bins. */
|
|
|
|
ret = (arena_t *)base_alloc(sizeof(arena_t)
|
2006-03-26 23:37:25 +00:00
|
|
|
+ (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
|
2006-01-16 05:13:49 +00:00
|
|
|
if (ret != NULL && arena_new(ret) == false) {
|
2006-01-13 18:38:56 +00:00
|
|
|
arenas[ind] = ret;
|
2006-01-16 05:13:49 +00:00
|
|
|
return (ret);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
2006-01-16 05:13:49 +00:00
|
|
|
/* Only reached if there is an OOM error. */
|
1995-12-18 12:03:54 +00:00
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
/*
|
|
|
|
* OOM here is quite inconvenient to propagate, since dealing with it
|
|
|
|
* would require a check for failure in the fast path. Instead, punt
|
|
|
|
* by using arenas[0]. In practice, this is an extremely unlikely
|
|
|
|
* failure.
|
|
|
|
*/
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Error initializing arena\n", "", "");
|
2006-01-16 05:13:49 +00:00
|
|
|
if (opt_abort)
|
|
|
|
abort();
|
|
|
|
|
|
|
|
return (arenas[0]);
|
1994-05-27 05:00:24 +00:00
|
|
|
}
|
|
|
|
|
1995-09-16 09:28:13 +00:00
|
|
|
/*
|
2006-01-13 18:38:56 +00:00
|
|
|
* End arena.
|
1995-09-16 09:28:13 +00:00
|
|
|
*/
|
2006-01-13 18:38:56 +00:00
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* Begin general internal functions.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
static void *
|
2006-03-19 18:28:06 +00:00
|
|
|
huge_malloc(size_t size)
|
1995-09-16 09:28:13 +00:00
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
void *ret;
|
2006-04-27 01:03:00 +00:00
|
|
|
size_t csize;
|
2006-01-13 18:38:56 +00:00
|
|
|
chunk_node_t *node;
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-04-27 01:03:00 +00:00
|
|
|
/* Allocate one or more contiguous chunks for this request. */
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-04-27 01:03:00 +00:00
|
|
|
csize = CHUNK_CEILING(size);
|
|
|
|
if (csize == 0) {
|
2006-01-13 18:38:56 +00:00
|
|
|
/* size is large enough to cause size_t wrap-around. */
|
2006-01-27 07:46:22 +00:00
|
|
|
return (NULL);
|
1996-10-20 13:20:57 +00:00
|
|
|
}
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Allocate a chunk node with which to track the chunk. */
|
2006-01-16 05:13:49 +00:00
|
|
|
node = base_chunk_node_alloc();
|
2006-01-27 07:46:22 +00:00
|
|
|
if (node == NULL)
|
|
|
|
return (NULL);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-04-27 01:03:00 +00:00
|
|
|
ret = chunk_alloc(csize);
|
2006-01-13 18:38:56 +00:00
|
|
|
if (ret == NULL) {
|
2006-01-16 05:13:49 +00:00
|
|
|
base_chunk_node_dealloc(node);
|
2006-01-27 07:46:22 +00:00
|
|
|
return (NULL);
|
1995-10-08 18:44:20 +00:00
|
|
|
}
|
|
|
|
|
2006-06-20 20:38:25 +00:00
|
|
|
/* Insert node into huge. */
|
2006-01-13 18:38:56 +00:00
|
|
|
node->chunk = ret;
|
2006-04-27 01:03:00 +00:00
|
|
|
node->size = csize;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
malloc_mutex_lock(&chunks_mtx);
|
|
|
|
RB_INSERT(chunk_tree_s, &huge, node);
|
|
|
|
#ifdef MALLOC_STATS
|
2006-03-17 09:00:27 +00:00
|
|
|
huge_nmalloc++;
|
2006-04-27 01:03:00 +00:00
|
|
|
huge_allocated += csize;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
malloc_mutex_unlock(&chunks_mtx);
|
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
if (opt_junk)
|
2006-04-27 01:03:00 +00:00
|
|
|
memset(ret, 0xa5, csize);
|
2007-03-23 22:58:15 +00:00
|
|
|
else if (opt_zero)
|
2006-04-27 01:03:00 +00:00
|
|
|
memset(ret, 0, csize);
|
2006-03-19 18:28:06 +00:00
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
/* Only handles large allocations that require more than chunk alignment. */
|
|
|
|
static void *
|
|
|
|
huge_palloc(size_t alignment, size_t size)
|
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
size_t alloc_size, chunk_size, offset;
|
|
|
|
chunk_node_t *node;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This allocation requires alignment that is even larger than chunk
|
|
|
|
* alignment. This means that huge_malloc() isn't good enough.
|
|
|
|
*
|
|
|
|
* Allocate almost twice as many chunks as are demanded by the size or
|
|
|
|
* alignment, in order to assure the alignment can be achieved, then
|
|
|
|
* unmap leading and trailing chunks.
|
|
|
|
*/
|
2007-03-24 20:44:06 +00:00
|
|
|
assert(alignment >= chunksize);
|
2007-03-23 22:58:15 +00:00
|
|
|
|
|
|
|
chunk_size = CHUNK_CEILING(size);
|
|
|
|
|
|
|
|
if (size >= alignment)
|
|
|
|
alloc_size = chunk_size + alignment - chunksize;
|
|
|
|
else
|
|
|
|
alloc_size = (alignment << 1) - chunksize;
|
|
|
|
|
|
|
|
/* Allocate a chunk node with which to track the chunk. */
|
|
|
|
node = base_chunk_node_alloc();
|
|
|
|
if (node == NULL)
|
|
|
|
return (NULL);
|
|
|
|
|
|
|
|
ret = chunk_alloc(alloc_size);
|
|
|
|
if (ret == NULL) {
|
|
|
|
base_chunk_node_dealloc(node);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
offset = (uintptr_t)ret & (alignment - 1);
|
|
|
|
assert((offset & chunksize_mask) == 0);
|
|
|
|
assert(offset < alloc_size);
|
|
|
|
if (offset == 0) {
|
|
|
|
/* Trim trailing space. */
|
|
|
|
chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
|
|
|
|
- chunk_size);
|
|
|
|
} else {
|
|
|
|
size_t trailsize;
|
|
|
|
|
|
|
|
/* Trim leading space. */
|
|
|
|
chunk_dealloc(ret, alignment - offset);
|
|
|
|
|
|
|
|
ret = (void *)((uintptr_t)ret + (alignment - offset));
|
|
|
|
|
|
|
|
trailsize = alloc_size - (alignment - offset) - chunk_size;
|
|
|
|
if (trailsize != 0) {
|
|
|
|
/* Trim trailing space. */
|
|
|
|
assert(trailsize < alloc_size);
|
|
|
|
chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
|
|
|
|
trailsize);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Insert node into huge. */
|
|
|
|
node->chunk = ret;
|
|
|
|
node->size = chunk_size;
|
|
|
|
|
|
|
|
malloc_mutex_lock(&chunks_mtx);
|
|
|
|
RB_INSERT(chunk_tree_s, &huge, node);
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
huge_allocated += size;
|
|
|
|
#endif
|
|
|
|
malloc_mutex_unlock(&chunks_mtx);
|
|
|
|
|
|
|
|
if (opt_junk)
|
|
|
|
memset(ret, 0xa5, chunk_size);
|
|
|
|
else if (opt_zero)
|
|
|
|
memset(ret, 0, chunk_size);
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2006-03-19 18:28:06 +00:00
|
|
|
static void *
|
|
|
|
huge_ralloc(void *ptr, size_t size, size_t oldsize)
|
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/* Avoid moving the allocation if the size class would not change. */
|
2006-03-19 18:28:06 +00:00
|
|
|
if (oldsize > arena_maxclass &&
|
2006-03-26 23:37:25 +00:00
|
|
|
CHUNK_CEILING(size) == CHUNK_CEILING(oldsize))
|
2006-03-19 18:28:06 +00:00
|
|
|
return (ptr);
|
|
|
|
|
|
|
|
/*
|
2006-03-26 23:37:25 +00:00
|
|
|
* If we get here, then size and oldsize are different enough that we
|
|
|
|
* need to use a different size class. In that case, fall back to
|
|
|
|
* allocating new space and copying.
|
2006-03-19 18:28:06 +00:00
|
|
|
*/
|
|
|
|
ret = huge_malloc(size);
|
|
|
|
if (ret == NULL)
|
|
|
|
return (NULL);
|
|
|
|
|
|
|
|
if (CHUNK_ADDR2BASE(ptr) == ptr) {
|
|
|
|
/* The old allocation is a chunk. */
|
|
|
|
if (size < oldsize)
|
|
|
|
memcpy(ret, ptr, size);
|
|
|
|
else
|
|
|
|
memcpy(ret, ptr, oldsize);
|
|
|
|
} else {
|
|
|
|
/* The old allocation is a region. */
|
|
|
|
assert(oldsize < size);
|
|
|
|
memcpy(ret, ptr, oldsize);
|
|
|
|
}
|
|
|
|
idalloc(ptr);
|
2006-01-13 18:38:56 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
1996-10-20 13:20:57 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
static void
|
|
|
|
huge_dalloc(void *ptr)
|
|
|
|
{
|
|
|
|
chunk_node_t key;
|
|
|
|
chunk_node_t *node;
|
|
|
|
|
|
|
|
malloc_mutex_lock(&chunks_mtx);
|
|
|
|
|
|
|
|
/* Extract from tree of huge allocations. */
|
|
|
|
key.chunk = ptr;
|
|
|
|
node = RB_FIND(chunk_tree_s, &huge, &key);
|
|
|
|
assert(node != NULL);
|
|
|
|
assert(node->chunk == ptr);
|
|
|
|
RB_REMOVE(chunk_tree_s, &huge, node);
|
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
/* Update counters. */
|
2006-03-17 09:00:27 +00:00
|
|
|
huge_ndalloc++;
|
|
|
|
huge_allocated -= node->size;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
1996-10-20 13:20:57 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_mutex_unlock(&chunks_mtx);
|
1996-10-20 13:20:57 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Unmap chunk. */
|
2006-03-19 18:28:06 +00:00
|
|
|
#ifdef USE_BRK
|
|
|
|
if (opt_junk)
|
|
|
|
memset(node->chunk, 0x5a, node->size);
|
|
|
|
#endif
|
2006-01-13 18:38:56 +00:00
|
|
|
chunk_dealloc(node->chunk, node->size);
|
1996-10-20 13:20:57 +00:00
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
base_chunk_node_dealloc(node);
|
1994-05-27 05:00:24 +00:00
|
|
|
}
|
|
|
|
|
1995-09-16 09:28:13 +00:00
|
|
|
static void *
|
2006-03-30 20:25:52 +00:00
|
|
|
imalloc(size_t size)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
|
|
|
void *ret;
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
assert(size != 0);
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
if (size <= arena_maxclass)
|
2006-03-30 20:25:52 +00:00
|
|
|
ret = arena_malloc(choose_arena(), size);
|
2006-01-13 18:38:56 +00:00
|
|
|
else
|
2006-03-19 18:28:06 +00:00
|
|
|
ret = huge_malloc(size);
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
return (ret);
|
1995-09-16 09:28:13 +00:00
|
|
|
}
|
1994-05-27 05:00:24 +00:00
|
|
|
|
1996-09-23 19:26:39 +00:00
|
|
|
static void *
|
2006-03-30 20:25:52 +00:00
|
|
|
ipalloc(size_t alignment, size_t size)
|
1995-05-30 05:51:47 +00:00
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
void *ret;
|
2007-03-24 20:44:06 +00:00
|
|
|
size_t ceil_size;
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
/*
|
2007-03-23 22:58:15 +00:00
|
|
|
* Round size up to the nearest multiple of alignment.
|
|
|
|
*
|
|
|
|
* This done, we can take advantage of the fact that for each small
|
|
|
|
* size class, every object is aligned at the smallest power of two
|
|
|
|
* that is non-zero in the base two representation of the size. For
|
|
|
|
* example:
|
2006-07-01 16:51:10 +00:00
|
|
|
*
|
|
|
|
* Size | Base 2 | Minimum alignment
|
|
|
|
* -----+----------+------------------
|
|
|
|
* 96 | 1100000 | 32
|
|
|
|
* 144 | 10100000 | 32
|
|
|
|
* 192 | 11000000 | 64
|
|
|
|
*
|
|
|
|
* Depending on runtime settings, it is possible that arena_malloc()
|
|
|
|
* will further round up to a power of two, but that never causes
|
|
|
|
* correctness issues.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
2007-03-24 20:44:06 +00:00
|
|
|
ceil_size = (size + (alignment - 1)) & (-alignment);
|
|
|
|
/*
|
|
|
|
* (ceil_size < size) protects against the combination of maximal
|
|
|
|
* alignment and size greater than maximal alignment.
|
|
|
|
*/
|
|
|
|
if (ceil_size < size) {
|
2006-03-17 09:00:27 +00:00
|
|
|
/* size_t overflow. */
|
|
|
|
return (NULL);
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-24 20:44:06 +00:00
|
|
|
if (ceil_size <= pagesize || (alignment <= pagesize
|
|
|
|
&& ceil_size <= arena_maxclass))
|
|
|
|
ret = arena_malloc(choose_arena(), ceil_size);
|
2006-01-13 18:38:56 +00:00
|
|
|
else {
|
2007-03-23 22:58:15 +00:00
|
|
|
size_t run_size;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
/*
|
2007-03-24 20:44:06 +00:00
|
|
|
* We can't achieve sub-page alignment, so round up alignment
|
2007-03-23 22:58:15 +00:00
|
|
|
* permanently; it makes later calculations simpler.
|
|
|
|
*/
|
|
|
|
alignment = PAGE_CEILING(alignment);
|
2007-03-24 20:44:06 +00:00
|
|
|
ceil_size = PAGE_CEILING(size);
|
|
|
|
/*
|
|
|
|
* (ceil_size < size) protects against very large sizes within
|
|
|
|
* pagesize of SIZE_T_MAX.
|
|
|
|
*
|
|
|
|
* (ceil_size + alignment < ceil_size) protects against the
|
|
|
|
* combination of maximal alignment and ceil_size large enough
|
|
|
|
* to cause overflow. This is similar to the first overflow
|
|
|
|
* check above, but it needs to be repeated due to the new
|
|
|
|
* ceil_size value, which may now be *equal* to maximal
|
|
|
|
* alignment, whereas before we only detected overflow if the
|
|
|
|
* original size was *greater* than maximal alignment.
|
|
|
|
*/
|
|
|
|
if (ceil_size < size || ceil_size + alignment < ceil_size) {
|
2007-03-23 22:58:15 +00:00
|
|
|
/* size_t overflow. */
|
|
|
|
return (NULL);
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
/*
|
|
|
|
* Calculate the size of the over-size run that arena_palloc()
|
|
|
|
* would need to allocate in order to guarantee the alignment.
|
|
|
|
*/
|
2007-03-24 20:44:06 +00:00
|
|
|
if (ceil_size >= alignment)
|
|
|
|
run_size = ceil_size + alignment - pagesize;
|
|
|
|
else {
|
|
|
|
/*
|
|
|
|
* It is possible that (alignment << 1) will cause
|
|
|
|
* overflow, but it doesn't matter because we also
|
|
|
|
* subtract pagesize, which in the case of overflow
|
|
|
|
* leaves us with a very large run_size. That causes
|
|
|
|
* the first conditional below to fail, which means
|
|
|
|
* that the bogus run_size value never gets used for
|
|
|
|
* anything important.
|
|
|
|
*/
|
2007-03-23 22:58:15 +00:00
|
|
|
run_size = (alignment << 1) - pagesize;
|
2007-03-24 20:44:06 +00:00
|
|
|
}
|
2006-03-19 18:28:06 +00:00
|
|
|
|
2007-03-24 20:44:06 +00:00
|
|
|
if (run_size <= arena_maxclass) {
|
|
|
|
ret = arena_palloc(choose_arena(), alignment, ceil_size,
|
2007-03-23 22:58:15 +00:00
|
|
|
run_size);
|
|
|
|
} else if (alignment <= chunksize)
|
2007-03-24 20:44:06 +00:00
|
|
|
ret = huge_malloc(ceil_size);
|
2007-03-23 22:58:15 +00:00
|
|
|
else
|
2007-03-24 20:44:06 +00:00
|
|
|
ret = huge_palloc(alignment, ceil_size);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-20 03:11:11 +00:00
|
|
|
assert(((uintptr_t)ret & (alignment - 1)) == 0);
|
2006-01-13 18:38:56 +00:00
|
|
|
return (ret);
|
1995-09-16 09:28:13 +00:00
|
|
|
}
|
|
|
|
|
1996-09-23 19:26:39 +00:00
|
|
|
static void *
|
2006-03-30 20:25:52 +00:00
|
|
|
icalloc(size_t size)
|
1995-09-16 09:28:13 +00:00
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
void *ret;
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
if (size <= arena_maxclass) {
|
2006-03-30 20:25:52 +00:00
|
|
|
ret = arena_malloc(choose_arena(), size);
|
2006-03-17 09:00:27 +00:00
|
|
|
if (ret == NULL)
|
|
|
|
return (NULL);
|
|
|
|
memset(ret, 0, size);
|
|
|
|
} else {
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
|
|
|
* The virtual memory system provides zero-filled pages, so
|
2006-03-19 18:28:06 +00:00
|
|
|
* there is no need to do so manually, unless opt_junk is
|
|
|
|
* enabled, in which case huge_malloc() fills huge allocations
|
|
|
|
* with junk.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
2006-03-19 18:28:06 +00:00
|
|
|
ret = huge_malloc(size);
|
|
|
|
if (ret == NULL)
|
|
|
|
return (NULL);
|
|
|
|
|
|
|
|
if (opt_junk)
|
|
|
|
memset(ret, 0, size);
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef USE_BRK
|
2006-03-19 18:28:06 +00:00
|
|
|
else if ((uintptr_t)ret >= (uintptr_t)brk_base
|
|
|
|
&& (uintptr_t)ret < (uintptr_t)brk_max) {
|
2006-03-26 23:37:25 +00:00
|
|
|
/*
|
2006-01-13 18:38:56 +00:00
|
|
|
* This may be a re-used brk chunk. Therefore, zero
|
|
|
|
* the memory.
|
|
|
|
*/
|
2006-03-17 09:00:27 +00:00
|
|
|
memset(ret, 0, size);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
static size_t
|
2006-03-26 23:37:25 +00:00
|
|
|
isalloc(const void *ptr)
|
2006-01-13 18:38:56 +00:00
|
|
|
{
|
|
|
|
size_t ret;
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_chunk_t *chunk;
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
assert(ptr != NULL);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
|
|
if (chunk != ptr) {
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Region. */
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(chunk->arena->magic == ARENA_MAGIC);
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
ret = arena_salloc(ptr);
|
2006-01-13 18:38:56 +00:00
|
|
|
} else {
|
2006-03-17 09:00:27 +00:00
|
|
|
chunk_node_t *node, key;
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Chunk (huge allocation). */
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_mutex_lock(&chunks_mtx);
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Extract from tree of huge allocations. */
|
2007-01-31 22:54:19 +00:00
|
|
|
key.chunk = __DECONST(void *, ptr);
|
2006-01-13 18:38:56 +00:00
|
|
|
node = RB_FIND(chunk_tree_s, &huge, &key);
|
|
|
|
assert(node != NULL);
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
ret = node->size;
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_mutex_unlock(&chunks_mtx);
|
1995-10-08 18:44:20 +00:00
|
|
|
}
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
return (ret);
|
|
|
|
}
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-03-26 23:37:25 +00:00
|
|
|
static void *
|
2006-03-30 20:25:52 +00:00
|
|
|
iralloc(void *ptr, size_t size)
|
2006-03-26 23:37:25 +00:00
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
size_t oldsize;
|
|
|
|
|
|
|
|
assert(ptr != NULL);
|
|
|
|
assert(size != 0);
|
|
|
|
|
|
|
|
oldsize = isalloc(ptr);
|
|
|
|
|
|
|
|
if (size <= arena_maxclass)
|
2006-03-30 20:25:52 +00:00
|
|
|
ret = arena_ralloc(ptr, size, oldsize);
|
2006-03-26 23:37:25 +00:00
|
|
|
else
|
|
|
|
ret = huge_ralloc(ptr, size, oldsize);
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
static void
|
|
|
|
idalloc(void *ptr)
|
|
|
|
{
|
2006-03-17 09:00:27 +00:00
|
|
|
arena_chunk_t *chunk;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
assert(ptr != NULL);
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
|
|
if (chunk != ptr) {
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Region. */
|
2006-03-30 20:25:52 +00:00
|
|
|
arena_dalloc(chunk->arena, chunk, ptr);
|
2006-01-13 18:38:56 +00:00
|
|
|
} else
|
|
|
|
huge_dalloc(ptr);
|
|
|
|
}
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
static void
|
|
|
|
malloc_print_stats(void)
|
|
|
|
{
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
if (opt_print_stats) {
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
char s[UMAX2S_BUFSIZE];
|
|
|
|
_malloc_message("___ Begin malloc statistics ___\n", "", "",
|
|
|
|
"");
|
|
|
|
_malloc_message("Assertions ",
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef NDEBUG
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
"disabled",
|
2006-01-13 18:38:56 +00:00
|
|
|
#else
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
"enabled",
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
"\n", "");
|
2007-03-23 05:05:48 +00:00
|
|
|
_malloc_message("Boolean MALLOC_OPTIONS: ",
|
|
|
|
opt_abort ? "A" : "a",
|
|
|
|
opt_junk ? "J" : "j",
|
|
|
|
opt_hint ? "H" : "h");
|
|
|
|
_malloc_message(opt_utrace ? "PU" : "Pu",
|
|
|
|
opt_sysv ? "V" : "v",
|
|
|
|
opt_xmalloc ? "X" : "x",
|
|
|
|
opt_zero ? "Z\n" : "z\n");
|
|
|
|
|
|
|
|
_malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
|
|
|
|
_malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
|
|
|
|
_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
|
|
|
|
"\n", "");
|
|
|
|
_malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
|
|
|
|
_malloc_message("Max small size: ", umax2s(small_max, s), "\n",
|
|
|
|
"");
|
|
|
|
|
2007-03-23 22:58:15 +00:00
|
|
|
_malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
|
2007-03-23 05:05:48 +00:00
|
|
|
_malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
{
|
2007-03-23 05:05:48 +00:00
|
|
|
size_t allocated, mapped;
|
2006-03-17 09:00:27 +00:00
|
|
|
unsigned i;
|
2006-03-26 23:37:25 +00:00
|
|
|
arena_t *arena;
|
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/* Calculate and print allocated/mapped stats. */
|
2006-03-26 23:37:25 +00:00
|
|
|
|
|
|
|
/* arenas. */
|
|
|
|
for (i = 0, allocated = 0; i < narenas; i++) {
|
|
|
|
if (arenas[i] != NULL) {
|
|
|
|
malloc_mutex_lock(&arenas[i]->mtx);
|
2007-03-23 05:05:48 +00:00
|
|
|
allocated +=
|
|
|
|
arenas[i]->stats.allocated_small;
|
|
|
|
allocated +=
|
|
|
|
arenas[i]->stats.allocated_large;
|
2006-03-26 23:37:25 +00:00
|
|
|
malloc_mutex_unlock(&arenas[i]->mtx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
/* huge/base. */
|
2006-03-26 23:37:25 +00:00
|
|
|
malloc_mutex_lock(&chunks_mtx);
|
|
|
|
allocated += huge_allocated;
|
2007-03-23 22:58:15 +00:00
|
|
|
mapped = stats_chunks.curchunks * chunksize;
|
2006-03-26 23:37:25 +00:00
|
|
|
malloc_mutex_unlock(&chunks_mtx);
|
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
malloc_mutex_lock(&base_mtx);
|
|
|
|
mapped += base_mapped;
|
|
|
|
malloc_mutex_unlock(&base_mtx);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
malloc_printf("Allocated: %zu, mapped: %zu\n",
|
|
|
|
allocated, mapped);
|
2006-09-08 17:52:15 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Print chunk stats. */
|
|
|
|
{
|
|
|
|
chunk_stats_t chunks_stats;
|
|
|
|
|
|
|
|
malloc_mutex_lock(&chunks_mtx);
|
|
|
|
chunks_stats = stats_chunks;
|
|
|
|
malloc_mutex_unlock(&chunks_mtx);
|
|
|
|
|
2007-03-23 05:05:48 +00:00
|
|
|
malloc_printf("chunks: nchunks "
|
|
|
|
"highchunks curchunks\n");
|
|
|
|
malloc_printf(" %13llu%13lu%13lu\n",
|
2006-03-26 23:37:25 +00:00
|
|
|
chunks_stats.nchunks,
|
2006-01-13 18:38:56 +00:00
|
|
|
chunks_stats.highchunks,
|
|
|
|
chunks_stats.curchunks);
|
|
|
|
}
|
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Print chunk stats. */
|
2007-03-23 05:05:48 +00:00
|
|
|
malloc_printf(
|
|
|
|
"huge: nmalloc ndalloc allocated\n");
|
|
|
|
malloc_printf(" %12llu %12llu %12zu\n",
|
|
|
|
huge_nmalloc, huge_ndalloc, huge_allocated
|
2007-03-23 22:58:15 +00:00
|
|
|
* chunksize);
|
2006-03-17 09:00:27 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Print stats for each arena. */
|
2006-03-17 09:00:27 +00:00
|
|
|
for (i = 0; i < narenas; i++) {
|
2006-01-13 18:38:56 +00:00
|
|
|
arena = arenas[i];
|
|
|
|
if (arena != NULL) {
|
|
|
|
malloc_printf(
|
2007-03-23 05:05:48 +00:00
|
|
|
"\narenas[%u]:\n", i);
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_mutex_lock(&arena->mtx);
|
2006-03-17 09:00:27 +00:00
|
|
|
stats_print(arena);
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_mutex_unlock(&arena->mtx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* #ifdef MALLOC_STATS */
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message("--- End malloc statistics ---\n", "", "", "");
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
1994-05-27 05:00:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2006-01-13 18:38:56 +00:00
|
|
|
* FreeBSD's pthreads implementation calls malloc(3), so the malloc
|
|
|
|
* implementation has to take pains to avoid infinite recursion during
|
|
|
|
* initialization.
|
1994-05-27 05:00:24 +00:00
|
|
|
*/
|
2006-03-17 09:00:27 +00:00
|
|
|
static inline bool
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_init(void)
|
1994-05-27 05:00:24 +00:00
|
|
|
{
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
if (malloc_initialized == false)
|
2006-01-19 02:11:05 +00:00
|
|
|
return (malloc_init_hard());
|
|
|
|
|
|
|
|
return (false);
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
1996-10-20 13:20:57 +00:00
|
|
|
|
2006-01-19 02:11:05 +00:00
|
|
|
static bool
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_init_hard(void)
|
|
|
|
{
|
|
|
|
unsigned i, j;
|
|
|
|
int linklen;
|
|
|
|
char buf[PATH_MAX + 1];
|
|
|
|
const char *opts;
|
|
|
|
|
2006-04-04 19:46:28 +00:00
|
|
|
malloc_mutex_lock(&init_lock);
|
|
|
|
if (malloc_initialized) {
|
|
|
|
/*
|
|
|
|
* Another thread initialized the allocator before this one
|
|
|
|
* acquired init_lock.
|
|
|
|
*/
|
|
|
|
malloc_mutex_unlock(&init_lock);
|
|
|
|
return (false);
|
|
|
|
}
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Get number of CPUs. */
|
|
|
|
{
|
|
|
|
int mib[2];
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
mib[0] = CTL_HW;
|
|
|
|
mib[1] = HW_NCPU;
|
|
|
|
len = sizeof(ncpus);
|
|
|
|
if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
|
|
|
|
/* Error. */
|
|
|
|
ncpus = 1;
|
|
|
|
}
|
|
|
|
}
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Get page size. */
|
|
|
|
{
|
|
|
|
long result;
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
result = sysconf(_SC_PAGESIZE);
|
|
|
|
assert(result != -1);
|
|
|
|
pagesize = (unsigned) result;
|
2006-03-17 09:00:27 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We assume that pagesize is a power of 2 when calculating
|
2007-03-23 05:05:48 +00:00
|
|
|
* pagesize_mask and pagesize_2pow.
|
2006-03-17 09:00:27 +00:00
|
|
|
*/
|
|
|
|
assert(((result - 1) & result) == 0);
|
2007-03-23 05:05:48 +00:00
|
|
|
pagesize_mask = result - 1;
|
2007-01-31 22:54:19 +00:00
|
|
|
pagesize_2pow = ffs((int)result) - 1;
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
for (i = 0; i < 3; i++) {
|
|
|
|
/* Get runtime configuration. */
|
|
|
|
switch (i) {
|
|
|
|
case 0:
|
|
|
|
if ((linklen = readlink("/etc/malloc.conf", buf,
|
|
|
|
sizeof(buf) - 1)) != -1) {
|
|
|
|
/*
|
|
|
|
* Use the contents of the "/etc/malloc.conf"
|
|
|
|
* symbolic link's name.
|
|
|
|
*/
|
|
|
|
buf[linklen] = '\0';
|
|
|
|
opts = buf;
|
|
|
|
} else {
|
|
|
|
/* No configuration specified. */
|
|
|
|
buf[0] = '\0';
|
|
|
|
opts = buf;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
if (issetugid() == 0 && (opts =
|
|
|
|
getenv("MALLOC_OPTIONS")) != NULL) {
|
|
|
|
/*
|
|
|
|
* Do nothing; opts is already initialized to
|
|
|
|
* the value of the MALLOC_OPTIONS environment
|
|
|
|
* variable.
|
|
|
|
*/
|
|
|
|
} else {
|
|
|
|
/* No configuration specified. */
|
|
|
|
buf[0] = '\0';
|
|
|
|
opts = buf;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
if (_malloc_options != NULL) {
|
|
|
|
/*
|
|
|
|
* Use options that were compiled into the program.
|
|
|
|
*/
|
|
|
|
opts = _malloc_options;
|
|
|
|
} else {
|
|
|
|
/* No configuration specified. */
|
|
|
|
buf[0] = '\0';
|
|
|
|
opts = buf;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* NOTREACHED */
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (j = 0; opts[j] != '\0'; j++) {
|
|
|
|
switch (opts[j]) {
|
|
|
|
case 'a':
|
|
|
|
opt_abort = false;
|
|
|
|
break;
|
|
|
|
case 'A':
|
|
|
|
opt_abort = true;
|
|
|
|
break;
|
2006-03-17 09:00:27 +00:00
|
|
|
case 'h':
|
|
|
|
opt_hint = false;
|
2006-01-13 18:38:56 +00:00
|
|
|
break;
|
2006-03-17 09:00:27 +00:00
|
|
|
case 'H':
|
|
|
|
opt_hint = true;
|
2006-01-13 18:38:56 +00:00
|
|
|
break;
|
|
|
|
case 'j':
|
|
|
|
opt_junk = false;
|
|
|
|
break;
|
|
|
|
case 'J':
|
|
|
|
opt_junk = true;
|
|
|
|
break;
|
|
|
|
case 'k':
|
2006-03-17 09:00:27 +00:00
|
|
|
/*
|
|
|
|
* Run fullness quartile limits don't have
|
|
|
|
* enough resolution if there are too few
|
|
|
|
* regions for the largest bin size classes.
|
|
|
|
*/
|
2006-05-10 00:07:45 +00:00
|
|
|
if (opt_chunk_2pow > pagesize_2pow + 4)
|
2006-01-13 18:38:56 +00:00
|
|
|
opt_chunk_2pow--;
|
|
|
|
break;
|
|
|
|
case 'K':
|
2007-03-23 05:05:48 +00:00
|
|
|
/*
|
|
|
|
* There must be fewer pages in a chunk than
|
|
|
|
* can be recorded by the pos field of
|
|
|
|
* arena_chunk_map_t, in order to make POS_FREE
|
|
|
|
* special.
|
|
|
|
*/
|
|
|
|
if (opt_chunk_2pow - pagesize_2pow
|
|
|
|
< (sizeof(uint32_t) << 3) - 1)
|
2006-01-13 18:38:56 +00:00
|
|
|
opt_chunk_2pow++;
|
|
|
|
break;
|
|
|
|
case 'n':
|
|
|
|
opt_narenas_lshift--;
|
|
|
|
break;
|
|
|
|
case 'N':
|
|
|
|
opt_narenas_lshift++;
|
|
|
|
break;
|
|
|
|
case 'p':
|
|
|
|
opt_print_stats = false;
|
|
|
|
break;
|
|
|
|
case 'P':
|
|
|
|
opt_print_stats = true;
|
|
|
|
break;
|
|
|
|
case 'q':
|
|
|
|
if (opt_quantum_2pow > QUANTUM_2POW_MIN)
|
|
|
|
opt_quantum_2pow--;
|
|
|
|
break;
|
|
|
|
case 'Q':
|
2006-03-17 09:00:27 +00:00
|
|
|
if (opt_quantum_2pow < pagesize_2pow - 1)
|
2006-01-13 18:38:56 +00:00
|
|
|
opt_quantum_2pow++;
|
|
|
|
break;
|
2006-03-17 09:00:27 +00:00
|
|
|
case 's':
|
|
|
|
if (opt_small_max_2pow > QUANTUM_2POW_MIN)
|
|
|
|
opt_small_max_2pow--;
|
|
|
|
break;
|
|
|
|
case 'S':
|
|
|
|
if (opt_small_max_2pow < pagesize_2pow - 1)
|
|
|
|
opt_small_max_2pow++;
|
|
|
|
break;
|
2006-01-13 18:38:56 +00:00
|
|
|
case 'u':
|
|
|
|
opt_utrace = false;
|
|
|
|
break;
|
|
|
|
case 'U':
|
|
|
|
opt_utrace = true;
|
|
|
|
break;
|
|
|
|
case 'v':
|
|
|
|
opt_sysv = false;
|
|
|
|
break;
|
|
|
|
case 'V':
|
|
|
|
opt_sysv = true;
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
opt_xmalloc = false;
|
|
|
|
break;
|
|
|
|
case 'X':
|
|
|
|
opt_xmalloc = true;
|
|
|
|
break;
|
|
|
|
case 'z':
|
|
|
|
opt_zero = false;
|
|
|
|
break;
|
|
|
|
case 'Z':
|
|
|
|
opt_zero = true;
|
|
|
|
break;
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
default: {
|
|
|
|
char cbuf[2];
|
|
|
|
|
|
|
|
cbuf[0] = opts[j];
|
|
|
|
cbuf[1] = '\0';
|
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Unsupported character in "
|
|
|
|
"malloc options: '", cbuf, "'\n");
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Take care to call atexit() only once. */
|
|
|
|
if (opt_print_stats) {
|
|
|
|
/* Print statistics at exit. */
|
|
|
|
atexit(malloc_print_stats);
|
|
|
|
}
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-03-17 09:00:27 +00:00
|
|
|
/* Set variables according to the value of opt_small_max_2pow. */
|
|
|
|
if (opt_small_max_2pow < opt_quantum_2pow)
|
|
|
|
opt_small_max_2pow = opt_quantum_2pow;
|
|
|
|
small_max = (1 << opt_small_max_2pow);
|
|
|
|
|
|
|
|
/* Set bin-related variables. */
|
|
|
|
bin_maxclass = (pagesize >> 1);
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
assert(opt_quantum_2pow >= TINY_MIN_2POW);
|
|
|
|
ntbins = opt_quantum_2pow - TINY_MIN_2POW;
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(ntbins <= opt_quantum_2pow);
|
|
|
|
nqbins = (small_max >> opt_quantum_2pow);
|
2006-03-26 23:37:25 +00:00
|
|
|
nsbins = pagesize_2pow - opt_small_max_2pow - 1;
|
2006-03-17 09:00:27 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Set variables according to the value of opt_quantum_2pow. */
|
|
|
|
quantum = (1 << opt_quantum_2pow);
|
|
|
|
quantum_mask = quantum - 1;
|
2006-03-24 22:13:49 +00:00
|
|
|
if (ntbins > 0)
|
|
|
|
small_min = (quantum >> 1) + 1;
|
|
|
|
else
|
|
|
|
small_min = 1;
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(small_min <= quantum);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Set variables according to the value of opt_chunk_2pow. */
|
2007-03-23 22:58:15 +00:00
|
|
|
chunksize = (1LU << opt_chunk_2pow);
|
|
|
|
chunksize_mask = chunksize - 1;
|
|
|
|
chunk_npages = (chunksize >> pagesize_2pow);
|
2007-03-23 05:05:48 +00:00
|
|
|
{
|
|
|
|
unsigned header_size;
|
|
|
|
|
|
|
|
header_size = sizeof(arena_chunk_t) + (sizeof(arena_chunk_map_t)
|
|
|
|
* (chunk_npages - 1));
|
|
|
|
arena_chunk_header_npages = (header_size >> pagesize_2pow);
|
|
|
|
if ((header_size & pagesize_mask) != 0)
|
|
|
|
arena_chunk_header_npages++;
|
|
|
|
}
|
2007-03-23 22:58:15 +00:00
|
|
|
arena_maxclass = chunksize - (arena_chunk_header_npages <<
|
2007-03-23 05:05:48 +00:00
|
|
|
pagesize_2pow);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
UTRACE(0, 0, 0);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifdef MALLOC_STATS
|
|
|
|
memset(&stats_chunks, 0, sizeof(chunk_stats_t));
|
|
|
|
#endif
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Various sanity checks that regard configuration. */
|
2006-03-17 09:00:27 +00:00
|
|
|
assert(quantum >= sizeof(void *));
|
2006-01-13 18:38:56 +00:00
|
|
|
assert(quantum <= pagesize);
|
2007-03-23 22:58:15 +00:00
|
|
|
assert(chunksize >= pagesize);
|
|
|
|
assert(quantum * 4 <= chunksize);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/* Initialize chunks data. */
|
|
|
|
malloc_mutex_init(&chunks_mtx);
|
|
|
|
RB_INIT(&huge);
|
|
|
|
#ifdef USE_BRK
|
2006-09-08 17:52:15 +00:00
|
|
|
malloc_mutex_init(&brk_mtx);
|
2006-01-13 18:38:56 +00:00
|
|
|
brk_base = sbrk(0);
|
|
|
|
brk_prev = brk_base;
|
2006-04-27 01:03:00 +00:00
|
|
|
brk_max = brk_base;
|
2006-01-13 18:38:56 +00:00
|
|
|
#endif
|
|
|
|
#ifdef MALLOC_STATS
|
2006-03-17 09:00:27 +00:00
|
|
|
huge_nmalloc = 0;
|
|
|
|
huge_ndalloc = 0;
|
2006-01-13 18:38:56 +00:00
|
|
|
huge_allocated = 0;
|
|
|
|
#endif
|
|
|
|
RB_INIT(&old_chunks);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
/* Initialize base allocation data structures. */
|
2006-03-24 00:28:08 +00:00
|
|
|
#ifdef MALLOC_STATS
|
2007-03-23 05:05:48 +00:00
|
|
|
base_mapped = 0;
|
2006-03-24 00:28:08 +00:00
|
|
|
#endif
|
|
|
|
#ifdef USE_BRK
|
|
|
|
/*
|
2006-09-08 17:52:15 +00:00
|
|
|
* Allocate a base chunk here, since it doesn't actually have to be
|
|
|
|
* chunk-aligned. Doing this before allocating any other chunks allows
|
|
|
|
* the use of space that would otherwise be wasted.
|
2006-03-24 00:28:08 +00:00
|
|
|
*/
|
2006-09-08 17:52:15 +00:00
|
|
|
base_chunk_alloc(0);
|
2006-03-24 00:28:08 +00:00
|
|
|
#endif
|
2006-01-16 05:13:49 +00:00
|
|
|
base_chunk_nodes = NULL;
|
|
|
|
malloc_mutex_init(&base_mtx);
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
if (ncpus > 1) {
|
2006-03-26 23:37:25 +00:00
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* For SMP systems, create four times as many arenas as there
|
|
|
|
* are CPUs by default.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
2006-03-17 09:00:27 +00:00
|
|
|
opt_narenas_lshift += 2;
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Determine how many arenas to use. */
|
2006-02-04 01:11:30 +00:00
|
|
|
narenas = ncpus;
|
2006-03-26 23:41:35 +00:00
|
|
|
if (opt_narenas_lshift > 0) {
|
|
|
|
if ((narenas << opt_narenas_lshift) > narenas)
|
|
|
|
narenas <<= opt_narenas_lshift;
|
|
|
|
/*
|
|
|
|
* Make sure not to exceed the limits of what base_malloc()
|
|
|
|
* can handle.
|
|
|
|
*/
|
2007-03-23 22:58:15 +00:00
|
|
|
if (narenas * sizeof(arena_t *) > chunksize)
|
|
|
|
narenas = chunksize / sizeof(arena_t *);
|
2006-03-26 23:41:35 +00:00
|
|
|
} else if (opt_narenas_lshift < 0) {
|
|
|
|
if ((narenas << opt_narenas_lshift) < narenas)
|
|
|
|
narenas <<= opt_narenas_lshift;
|
|
|
|
/* Make sure there is at least one arena. */
|
|
|
|
if (narenas == 0)
|
|
|
|
narenas = 1;
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
#ifdef NO_TLS
|
|
|
|
if (narenas > 1) {
|
|
|
|
static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
|
|
|
|
23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
|
|
|
|
89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
|
|
|
|
151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
|
|
|
|
223, 227, 229, 233, 239, 241, 251, 257, 263};
|
2007-01-31 22:54:19 +00:00
|
|
|
unsigned nprimes, parenas;
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Pick a prime number of hash arenas that is more than narenas
|
|
|
|
* so that direct hashing of pthread_self() pointers tends to
|
|
|
|
* spread allocations evenly among the arenas.
|
|
|
|
*/
|
|
|
|
assert((narenas & 1) == 0); /* narenas must be even. */
|
2006-03-30 20:25:52 +00:00
|
|
|
nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
|
2006-01-13 18:38:56 +00:00
|
|
|
parenas = primes[nprimes - 1]; /* In case not enough primes. */
|
|
|
|
for (i = 1; i < nprimes; i++) {
|
|
|
|
if (primes[i] > narenas) {
|
|
|
|
parenas = primes[i];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
narenas = parenas;
|
|
|
|
}
|
|
|
|
#endif
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
#ifndef NO_TLS
|
|
|
|
next_arena = 0;
|
|
|
|
#endif
|
1995-09-16 09:28:13 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/* Allocate and initialize arenas. */
|
2006-01-16 05:13:49 +00:00
|
|
|
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
|
2006-04-04 19:46:28 +00:00
|
|
|
if (arenas == NULL) {
|
|
|
|
malloc_mutex_unlock(&init_lock);
|
2006-01-19 02:11:05 +00:00
|
|
|
return (true);
|
2006-04-04 19:46:28 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
|
|
|
* Zero the array. In practice, this should always be pre-zeroed,
|
|
|
|
* since it was just mmap()ed, but let's be sure.
|
|
|
|
*/
|
|
|
|
memset(arenas, 0, sizeof(arena_t *) * narenas);
|
1995-10-08 18:44:20 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
|
|
|
* Initialize one arena here. The rest are lazily created in
|
|
|
|
* arena_choose_hard().
|
|
|
|
*/
|
|
|
|
arenas_extend(0);
|
2006-04-04 19:46:28 +00:00
|
|
|
if (arenas[0] == NULL) {
|
|
|
|
malloc_mutex_unlock(&init_lock);
|
2006-01-19 02:11:05 +00:00
|
|
|
return (true);
|
2006-04-04 19:46:28 +00:00
|
|
|
}
|
1996-07-03 05:03:07 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_mutex_init(&arenas_mtx);
|
2004-03-07 20:41:27 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_initialized = true;
|
2006-04-04 19:46:28 +00:00
|
|
|
malloc_mutex_unlock(&init_lock);
|
2006-01-19 02:11:05 +00:00
|
|
|
return (false);
|
1996-07-03 05:03:07 +00:00
|
|
|
}
|
1997-06-22 17:54:27 +00:00
|
|
|
|
2004-02-21 09:14:38 +00:00
|
|
|
/*
|
2006-03-17 09:00:27 +00:00
|
|
|
* End general internal functions.
|
2006-01-13 18:38:56 +00:00
|
|
|
*/
|
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
|
|
|
* Begin malloc(3)-compatible functions.
|
2004-02-21 09:14:38 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
void *
|
|
|
|
malloc(size_t size)
|
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
void *ret;
|
|
|
|
|
2006-01-19 02:11:05 +00:00
|
|
|
if (malloc_init()) {
|
|
|
|
ret = NULL;
|
|
|
|
goto RETURN;
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
if (size == 0) {
|
|
|
|
if (opt_sysv == false)
|
2006-06-30 20:54:15 +00:00
|
|
|
size = 1;
|
|
|
|
else {
|
2006-01-13 18:38:56 +00:00
|
|
|
ret = NULL;
|
2006-06-30 20:54:15 +00:00
|
|
|
goto RETURN;
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
2004-02-21 09:14:38 +00:00
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
ret = imalloc(size);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-01-19 02:11:05 +00:00
|
|
|
RETURN:
|
2006-01-13 18:38:56 +00:00
|
|
|
if (ret == NULL) {
|
|
|
|
if (opt_xmalloc) {
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Error in malloc(): out of memory\n", "",
|
|
|
|
"");
|
2006-01-13 18:38:56 +00:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
errno = ENOMEM;
|
2006-03-19 18:28:06 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
UTRACE(0, size, ret);
|
|
|
|
return (ret);
|
2004-02-21 09:14:38 +00:00
|
|
|
}
|
|
|
|
|
2006-01-12 07:28:21 +00:00
|
|
|
int
|
|
|
|
posix_memalign(void **memptr, size_t alignment, size_t size)
|
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
int ret;
|
|
|
|
void *result;
|
2006-01-12 07:28:21 +00:00
|
|
|
|
2006-01-19 02:11:05 +00:00
|
|
|
if (malloc_init())
|
2006-01-13 18:38:56 +00:00
|
|
|
result = NULL;
|
2006-01-19 02:11:05 +00:00
|
|
|
else {
|
|
|
|
/* Make sure that alignment is a large enough power of 2. */
|
|
|
|
if (((alignment - 1) & alignment) != 0
|
|
|
|
|| alignment < sizeof(void *)) {
|
|
|
|
if (opt_xmalloc) {
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Error in posix_memalign(): "
|
|
|
|
"invalid alignment\n", "", "");
|
2006-01-19 02:11:05 +00:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
result = NULL;
|
|
|
|
ret = EINVAL;
|
|
|
|
goto RETURN;
|
|
|
|
}
|
2006-01-12 07:28:21 +00:00
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
result = ipalloc(alignment, size);
|
2006-01-19 02:11:05 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
if (result == NULL) {
|
|
|
|
if (opt_xmalloc) {
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Error in posix_memalign(): out of memory\n",
|
|
|
|
"", "");
|
2006-01-13 18:38:56 +00:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
ret = ENOMEM;
|
|
|
|
goto RETURN;
|
2006-03-19 18:28:06 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
*memptr = result;
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
RETURN:
|
|
|
|
UTRACE(0, size, result);
|
|
|
|
return (ret);
|
2006-01-12 07:28:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void *
|
|
|
|
calloc(size_t num, size_t size)
|
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
void *ret;
|
2006-03-30 20:25:52 +00:00
|
|
|
size_t num_size;
|
2006-01-12 07:28:21 +00:00
|
|
|
|
2006-01-19 02:11:05 +00:00
|
|
|
if (malloc_init()) {
|
2007-01-31 22:54:19 +00:00
|
|
|
num_size = 0;
|
2006-01-19 02:11:05 +00:00
|
|
|
ret = NULL;
|
|
|
|
goto RETURN;
|
|
|
|
}
|
2006-01-12 07:28:21 +00:00
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
num_size = num * size;
|
|
|
|
if (num_size == 0) {
|
2006-08-13 21:54:47 +00:00
|
|
|
if ((opt_sysv == false) && ((num == 0) || (size == 0)))
|
2006-06-30 20:54:15 +00:00
|
|
|
num_size = 1;
|
|
|
|
else {
|
2006-01-13 18:38:56 +00:00
|
|
|
ret = NULL;
|
2006-06-30 20:54:15 +00:00
|
|
|
goto RETURN;
|
|
|
|
}
|
2006-04-04 03:51:47 +00:00
|
|
|
/*
|
|
|
|
* Try to avoid division here. We know that it isn't possible to
|
|
|
|
* overflow during multiplication if neither operand uses any of the
|
|
|
|
* most significant half of the bits in a size_t.
|
|
|
|
*/
|
|
|
|
} else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
|
|
|
|
&& (num_size / size != num)) {
|
2006-01-13 18:38:56 +00:00
|
|
|
/* size_t overflow. */
|
|
|
|
ret = NULL;
|
|
|
|
goto RETURN;
|
|
|
|
}
|
2006-01-12 07:28:21 +00:00
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
ret = icalloc(num_size);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
RETURN:
|
|
|
|
if (ret == NULL) {
|
|
|
|
if (opt_xmalloc) {
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Error in calloc(): out of memory\n", "",
|
|
|
|
"");
|
2006-01-13 18:38:56 +00:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
errno = ENOMEM;
|
|
|
|
}
|
2006-01-12 07:28:21 +00:00
|
|
|
|
2006-03-30 20:25:52 +00:00
|
|
|
UTRACE(0, num_size, ret);
|
2006-01-13 18:38:56 +00:00
|
|
|
return (ret);
|
2006-01-12 07:28:21 +00:00
|
|
|
}
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
void *
|
|
|
|
realloc(void *ptr, size_t size)
|
2004-02-21 09:14:38 +00:00
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
void *ret;
|
|
|
|
|
2006-06-30 20:54:15 +00:00
|
|
|
if (size == 0) {
|
|
|
|
if (opt_sysv == false)
|
|
|
|
size = 1;
|
|
|
|
else {
|
|
|
|
if (ptr != NULL)
|
|
|
|
idalloc(ptr);
|
|
|
|
ret = NULL;
|
|
|
|
goto RETURN;
|
|
|
|
}
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-06-30 20:54:15 +00:00
|
|
|
if (ptr != NULL) {
|
|
|
|
assert(malloc_initialized);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-06-30 20:54:15 +00:00
|
|
|
ret = iralloc(ptr, size);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-06-30 20:54:15 +00:00
|
|
|
if (ret == NULL) {
|
|
|
|
if (opt_xmalloc) {
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Error in realloc(): out of "
|
|
|
|
"memory\n", "", "");
|
2006-06-30 20:54:15 +00:00
|
|
|
abort();
|
2006-03-19 18:28:06 +00:00
|
|
|
}
|
2006-06-30 20:54:15 +00:00
|
|
|
errno = ENOMEM;
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
|
|
|
} else {
|
2006-06-30 20:54:15 +00:00
|
|
|
if (malloc_init())
|
|
|
|
ret = NULL;
|
|
|
|
else
|
|
|
|
ret = imalloc(size);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
2006-06-30 20:54:15 +00:00
|
|
|
if (ret == NULL) {
|
|
|
|
if (opt_xmalloc) {
|
Avoid using vsnprintf(3) unless MALLOC_STATS is defined, in order to
avoid substantial potential bloat for static binaries that do not
otherwise use any printf(3)-family functions. [1]
Rearrange arena_run_t so that the region bitmask can be minimally sized
according to constraints related to each bin's size class. Previously,
the region bitmask was the same size for all run headers, which wasted
a measurable amount of memory.
Rather than making runs for small objects as large as possible, make
runs as small as possible such that header overhead stays below a
certain bound. There are two exceptions that override the header
overhead bound:
1) If the bound is impossible to honor, it is relaxed on a
per-size-class basis. Since there is one bit of header
overhead per object (plus a constant), it is impossible to
achieve a header overhead less than or equal to 1/(# of bits
per object). For the current setting of maximum 0.5% header
overhead, this relaxation comes into play for {2, 4, 8,
16}-byte objects, for which header overhead is (on 64-bit
systems) {7.1, 4.3, 2.2, 1.2}%, respectively.
2) There is still a cap on small run size, still set to 64kB.
This comes into play for {1024, 2048}-byte objects, for which
header overhead is {1.6, 3.1}%, respectively.
In practice, this reduces the run sizes, which makes worst case
low-water memory usage due to fragmentation less bad. It also reduces
worst case high-water run fragmentation due to non-full runs, but this
is only a constant improvement (most important to small short-lived
processes).
Reduce the default chunk size from 2MB to 1MB. Benchmarks indicate that
the external fragmentation reduction makes 1MB the new sweet spot (as
small as possible without adversely affecting performance).
Reported by: [1] kientzle
2007-03-20 03:44:10 +00:00
|
|
|
_malloc_message(_getprogname(),
|
|
|
|
": (malloc) Error in realloc(): out of "
|
|
|
|
"memory\n", "", "");
|
2006-06-30 20:54:15 +00:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
errno = ENOMEM;
|
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
}
|
2004-02-21 09:14:38 +00:00
|
|
|
|
2006-06-30 20:54:15 +00:00
|
|
|
RETURN:
|
2006-01-13 18:38:56 +00:00
|
|
|
UTRACE(ptr, size, ret);
|
|
|
|
return (ret);
|
2004-02-21 09:14:38 +00:00
|
|
|
}
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
void
|
|
|
|
free(void *ptr)
|
2004-02-21 09:14:38 +00:00
|
|
|
{
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
UTRACE(ptr, 0, 0);
|
2006-06-30 20:54:15 +00:00
|
|
|
if (ptr != NULL) {
|
2006-01-13 18:38:56 +00:00
|
|
|
assert(malloc_initialized);
|
|
|
|
|
|
|
|
idalloc(ptr);
|
|
|
|
}
|
2004-02-21 09:14:38 +00:00
|
|
|
}
|
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
/*
|
|
|
|
* End malloc(3)-compatible functions.
|
|
|
|
*/
|
|
|
|
/******************************************************************************/
|
2006-03-28 22:16:04 +00:00
|
|
|
/*
|
|
|
|
* Begin non-standard functions.
|
|
|
|
*/
|
|
|
|
|
|
|
|
size_t
|
|
|
|
malloc_usable_size(const void *ptr)
|
|
|
|
{
|
|
|
|
|
|
|
|
assert(ptr != NULL);
|
|
|
|
|
2006-06-30 20:54:15 +00:00
|
|
|
return (isalloc(ptr));
|
2006-03-28 22:16:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* End non-standard functions.
|
|
|
|
*/
|
|
|
|
/******************************************************************************/
|
2006-01-12 07:28:21 +00:00
|
|
|
/*
|
|
|
|
* Begin library-private functions, used by threading libraries for protection
|
|
|
|
* of malloc during fork(). These functions are only called if the program is
|
|
|
|
* running in threaded mode, so there is no need to check whether the program
|
|
|
|
* is threaded here.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
|
|
|
_malloc_prefork(void)
|
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
unsigned i;
|
|
|
|
|
|
|
|
/* Acquire all mutexes in a safe order. */
|
|
|
|
|
|
|
|
malloc_mutex_lock(&arenas_mtx);
|
|
|
|
for (i = 0; i < narenas; i++) {
|
|
|
|
if (arenas[i] != NULL)
|
|
|
|
malloc_mutex_lock(&arenas[i]->mtx);
|
|
|
|
}
|
|
|
|
malloc_mutex_unlock(&arenas_mtx);
|
2006-01-12 07:28:21 +00:00
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
malloc_mutex_lock(&base_mtx);
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
malloc_mutex_lock(&chunks_mtx);
|
2006-01-12 07:28:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
_malloc_postfork(void)
|
|
|
|
{
|
2006-01-13 18:38:56 +00:00
|
|
|
unsigned i;
|
|
|
|
|
|
|
|
/* Release all mutexes, now that fork() has completed. */
|
|
|
|
|
|
|
|
malloc_mutex_unlock(&chunks_mtx);
|
|
|
|
|
2006-01-16 05:13:49 +00:00
|
|
|
malloc_mutex_unlock(&base_mtx);
|
2006-01-12 07:28:21 +00:00
|
|
|
|
2006-01-13 18:38:56 +00:00
|
|
|
malloc_mutex_lock(&arenas_mtx);
|
|
|
|
for (i = 0; i < narenas; i++) {
|
|
|
|
if (arenas[i] != NULL)
|
|
|
|
malloc_mutex_unlock(&arenas[i]->mtx);
|
|
|
|
}
|
|
|
|
malloc_mutex_unlock(&arenas_mtx);
|
2006-01-12 07:28:21 +00:00
|
|
|
}
|
2006-01-13 18:38:56 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* End library-private functions.
|
|
|
|
*/
|
|
|
|
/******************************************************************************/
|