Use fixed point integer math instead of floating point math when
calculating run sizes. Use of the floating point unit was a potential pessimization to context switching for applications that do not otherwise use floating point math. [1] Reformat cpp macro-related comments to improve consistency. Submitted by: das
This commit is contained in:
parent
3b94f3069c
commit
b720912697
@ -101,7 +101,14 @@
|
||||
/* #define MALLOC_PRODUCTION */
|
||||
|
||||
#ifndef MALLOC_PRODUCTION
|
||||
/*
|
||||
* MALLOC_DEBUG enables assertions and other sanity checks, and disables
|
||||
* inline functions.
|
||||
*/
|
||||
# define MALLOC_DEBUG
|
||||
|
||||
/* MALLOC_STATS enables statistics calculation. */
|
||||
# define MALLOC_STATS
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -156,11 +163,6 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include "un-namespace.h"
|
||||
|
||||
/* MALLOC_STATS enables statistics calculation. */
|
||||
#ifndef MALLOC_PRODUCTION
|
||||
# define MALLOC_STATS
|
||||
#endif
|
||||
|
||||
#ifdef MALLOC_DEBUG
|
||||
# ifdef NDEBUG
|
||||
# undef NDEBUG
|
||||
@ -267,35 +269,40 @@ __FBSDID("$FreeBSD$");
|
||||
#define SMALL_MAX_DEFAULT (1U << SMALL_MAX_2POW_DEFAULT)
|
||||
|
||||
/*
|
||||
* Maximum desired run header overhead. Runs are sized as small as possible
|
||||
* such that this setting is still honored, without violating other constraints.
|
||||
* The goal is to make runs as small as possible without exceeding a per run
|
||||
* external fragmentation threshold.
|
||||
* RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
|
||||
* as small as possible such that this setting is still honored, without
|
||||
* violating other constraints. The goal is to make runs as small as possible
|
||||
* without exceeding a per run external fragmentation threshold.
|
||||
*
|
||||
* Note that it is possible to set this low enough that it cannot be honored
|
||||
* for some/all object sizes, since there is one bit of header overhead per
|
||||
* object (plus a constant). In such cases, this constraint is relaxed.
|
||||
* We use binary fixed point math for overhead computations, where the binary
|
||||
* point is implicitly RUN_BFP bits to the left.
|
||||
*
|
||||
* RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
|
||||
* overhead for which RUN_MAX_OVRHD is relaxed.
|
||||
* Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
|
||||
* honored for some/all object sizes, since there is one bit of header overhead
|
||||
* per object (plus a constant). This constraint is relaxed (ignored) for runs
|
||||
* that are so small that the per-region overhead is greater than:
|
||||
*
|
||||
* (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
|
||||
*/
|
||||
#define RUN_MAX_OVRHD 0.015
|
||||
#define RUN_MAX_OVRHD_RELAX 1.5
|
||||
#define RUN_BFP 12
|
||||
/* \/ Implicit binary fixed point. */
|
||||
#define RUN_MAX_OVRHD 0x0000003dU
|
||||
#define RUN_MAX_OVRHD_RELAX 0x00001800U
|
||||
|
||||
/* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
|
||||
#define RUN_MAX_SMALL_2POW 15
|
||||
#define RUN_MAX_SMALL (1U << RUN_MAX_SMALL_2POW)
|
||||
|
||||
#ifdef MALLOC_LAZY_FREE
|
||||
/* Default size of each arena's lazy free cache. */
|
||||
# define LAZY_FREE_2POW_DEFAULT 8
|
||||
/*
|
||||
* Number of pseudo-random probes to conduct before considering the cache to be
|
||||
* overly full. It takes on average n probes to detect fullness of (n-1)/n.
|
||||
* However, we are effectively doing multiple non-independent trials (each
|
||||
* deallocation is a trial), so the actual average threshold for clearing the
|
||||
* cache is somewhat lower.
|
||||
*/
|
||||
/* Default size of each arena's lazy free cache. */
|
||||
# define LAZY_FREE_2POW_DEFAULT 8
|
||||
/*
|
||||
* Number of pseudo-random probes to conduct before considering the cache to
|
||||
* be overly full. It takes on average n probes to detect fullness of
|
||||
* (n-1)/n. However, we are effectively doing multiple non-independent
|
||||
* trials (each deallocation is a trial), so the actual average threshold
|
||||
* for clearing the cache is somewhat lower.
|
||||
*/
|
||||
# define LAZY_FREE_NPROBES 5
|
||||
#endif
|
||||
|
||||
@ -323,20 +330,20 @@ __FBSDID("$FreeBSD$");
|
||||
#define BLOCK_COST_2POW 4
|
||||
|
||||
#ifdef MALLOC_BALANCE
|
||||
/*
|
||||
* We use an exponential moving average to track recent lock contention, where
|
||||
* the size of the history window is N, and alpha=2/(N+1).
|
||||
*
|
||||
* Due to integer math rounding, very small values here can cause substantial
|
||||
* degradation in accuracy, thus making the moving average decay faster than it
|
||||
* would with precise calculation.
|
||||
*/
|
||||
/*
|
||||
* We use an exponential moving average to track recent lock contention,
|
||||
* where the size of the history window is N, and alpha=2/(N+1).
|
||||
*
|
||||
* Due to integer math rounding, very small values here can cause
|
||||
* substantial degradation in accuracy, thus making the moving average decay
|
||||
* faster than it would with precise calculation.
|
||||
*/
|
||||
# define BALANCE_ALPHA_INV_2POW 9
|
||||
|
||||
/*
|
||||
* Threshold value for the exponential moving contention average at which to
|
||||
* re-assign a thread.
|
||||
*/
|
||||
/*
|
||||
* Threshold value for the exponential moving contention average at which to
|
||||
* re-assign a thread.
|
||||
*/
|
||||
# define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4))
|
||||
#endif
|
||||
|
||||
@ -2468,7 +2475,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
size_t try_run_size, good_run_size;
|
||||
unsigned good_nregs, good_mask_nelms, good_reg0_offset;
|
||||
unsigned try_nregs, try_mask_nelms, try_reg0_offset;
|
||||
float max_ovrhd = RUN_MAX_OVRHD;
|
||||
|
||||
assert(min_run_size >= pagesize);
|
||||
assert(min_run_size <= arena_maxclass);
|
||||
@ -2486,7 +2492,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
*/
|
||||
try_run_size = min_run_size;
|
||||
try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
|
||||
+ 1; /* Counter-act the first line of the loop. */
|
||||
+ 1; /* Counter-act try_nregs-- in loop. */
|
||||
do {
|
||||
try_nregs--;
|
||||
try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
|
||||
@ -2519,9 +2525,8 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
} while (sizeof(arena_run_t) + (sizeof(unsigned) *
|
||||
(try_mask_nelms - 1)) > try_reg0_offset);
|
||||
} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
|
||||
&& max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
|
||||
&& ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
|
||||
max_ovrhd);
|
||||
&& RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
|
||||
&& (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
|
||||
|
||||
assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
|
||||
<= good_reg0_offset);
|
||||
|
Loading…
x
Reference in New Issue
Block a user