Use fixed point integer math instead of floating point math when

calculating run sizes.  Use of the floating point unit was a potential
pessimization to context switching for applications that do not otherwise
use floating point math. [1]

Reformat cpp macro-related comments to improve consistency.

Submitted by:	das
This commit is contained in:
jasone 2007-12-18 05:27:57 +00:00
parent 3b94f3069c
commit b720912697

View File

@ -101,7 +101,14 @@
/* #define MALLOC_PRODUCTION */
#ifndef MALLOC_PRODUCTION
/*
* MALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
*/
# define MALLOC_DEBUG
/* MALLOC_STATS enables statistics calculation. */
# define MALLOC_STATS
#endif
/*
@ -156,11 +163,6 @@ __FBSDID("$FreeBSD$");
#include "un-namespace.h"
/* MALLOC_STATS enables statistics calculation. */
#ifndef MALLOC_PRODUCTION
# define MALLOC_STATS
#endif
#ifdef MALLOC_DEBUG
# ifdef NDEBUG
# undef NDEBUG
@ -267,35 +269,40 @@ __FBSDID("$FreeBSD$");
#define SMALL_MAX_DEFAULT (1U << SMALL_MAX_2POW_DEFAULT)
/*
* Maximum desired run header overhead. Runs are sized as small as possible
* such that this setting is still honored, without violating other constraints.
* The goal is to make runs as small as possible without exceeding a per run
* external fragmentation threshold.
* RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
* as small as possible such that this setting is still honored, without
* violating other constraints. The goal is to make runs as small as possible
* without exceeding a per run external fragmentation threshold.
*
* Note that it is possible to set this low enough that it cannot be honored
* for some/all object sizes, since there is one bit of header overhead per
* object (plus a constant). In such cases, this constraint is relaxed.
* We use binary fixed point math for overhead computations, where the binary
* point is implicitly RUN_BFP bits to the left.
*
* RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
* overhead for which RUN_MAX_OVRHD is relaxed.
* Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
* honored for some/all object sizes, since there is one bit of header overhead
* per object (plus a constant). This constraint is relaxed (ignored) for runs
* that are so small that the per-region overhead is greater than:
*
* (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
*/
#define RUN_MAX_OVRHD 0.015
#define RUN_MAX_OVRHD_RELAX 1.5
#define RUN_BFP 12
/* \/ Implicit binary fixed point. */
#define RUN_MAX_OVRHD 0x0000003dU
#define RUN_MAX_OVRHD_RELAX 0x00001800U
/* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
#define RUN_MAX_SMALL_2POW 15
#define RUN_MAX_SMALL (1U << RUN_MAX_SMALL_2POW)
#ifdef MALLOC_LAZY_FREE
/* Default size of each arena's lazy free cache. */
# define LAZY_FREE_2POW_DEFAULT 8
/*
* Number of pseudo-random probes to conduct before considering the cache to be
* overly full. It takes on average n probes to detect fullness of (n-1)/n.
* However, we are effectively doing multiple non-independent trials (each
* deallocation is a trial), so the actual average threshold for clearing the
* cache is somewhat lower.
*/
/* Default size of each arena's lazy free cache. */
# define LAZY_FREE_2POW_DEFAULT 8
/*
* Number of pseudo-random probes to conduct before considering the cache to
* be overly full. It takes on average n probes to detect fullness of
* (n-1)/n. However, we are effectively doing multiple non-independent
* trials (each deallocation is a trial), so the actual average threshold
* for clearing the cache is somewhat lower.
*/
# define LAZY_FREE_NPROBES 5
#endif
@ -323,20 +330,20 @@ __FBSDID("$FreeBSD$");
#define BLOCK_COST_2POW 4
#ifdef MALLOC_BALANCE
/*
* We use an exponential moving average to track recent lock contention, where
* the size of the history window is N, and alpha=2/(N+1).
*
* Due to integer math rounding, very small values here can cause substantial
* degradation in accuracy, thus making the moving average decay faster than it
* would with precise calculation.
*/
/*
* We use an exponential moving average to track recent lock contention,
* where the size of the history window is N, and alpha=2/(N+1).
*
* Due to integer math rounding, very small values here can cause
* substantial degradation in accuracy, thus making the moving average decay
* faster than it would with precise calculation.
*/
# define BALANCE_ALPHA_INV_2POW 9
/*
* Threshold value for the exponential moving contention average at which to
* re-assign a thread.
*/
/*
* Threshold value for the exponential moving contention average at which to
* re-assign a thread.
*/
# define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4))
#endif
@ -2468,7 +2475,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
size_t try_run_size, good_run_size;
unsigned good_nregs, good_mask_nelms, good_reg0_offset;
unsigned try_nregs, try_mask_nelms, try_reg0_offset;
float max_ovrhd = RUN_MAX_OVRHD;
assert(min_run_size >= pagesize);
assert(min_run_size <= arena_maxclass);
@ -2486,7 +2492,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
*/
try_run_size = min_run_size;
try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
+ 1; /* Counter-act the first line of the loop. */
+ 1; /* Counter-act try_nregs-- in loop. */
do {
try_nregs--;
try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
@ -2519,9 +2525,8 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
} while (sizeof(arena_run_t) + (sizeof(unsigned) *
(try_mask_nelms - 1)) > try_reg0_offset);
} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
&& max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
&& ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
max_ovrhd);
&& RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
&& (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
<= good_reg0_offset);