Use fixed point integer math instead of floating point math when

calculating run sizes. Use of the floating point unit was a potential pessimization to context switching for applications that do not otherwise use floating point math. [1] Reformat cpp macro-related comments to improve consistency. Submitted by: das
2007-12-18 05:27:57 +00:00 · 2007-12-18 05:27:57 +00:00 · b720912697
commit b720912697
parent 3b94f3069c
1 changed files with 47 additions and 42 deletions
--- a/lib/libc/stdlib/malloc.c
+++ b/lib/libc/stdlib/malloc.c
@ -101,7 +101,14 @@
 /* #define	MALLOC_PRODUCTION */

 #ifndef MALLOC_PRODUCTION
+   /*
+    * MALLOC_DEBUG enables assertions and other sanity checks, and disables
+    * inline functions.
+    */
 #  define MALLOC_DEBUG
+
+   /* MALLOC_STATS enables statistics calculation. */
+#  define MALLOC_STATS
 #endif

 /*
@ -156,11 +163,6 @@ __FBSDID("$FreeBSD$");

 #include "un-namespace.h"

-/* MALLOC_STATS enables statistics calculation. */
-#ifndef MALLOC_PRODUCTION
-#  define MALLOC_STATS
-#endif
-
 #ifdef MALLOC_DEBUG
 #  ifdef NDEBUG
 #    undef NDEBUG
@ -267,35 +269,40 @@ __FBSDID("$FreeBSD$");
 #define	SMALL_MAX_DEFAULT	(1U << SMALL_MAX_2POW_DEFAULT)

 /*
- * Maximum desired run header overhead.  Runs are sized as small as possible
- * such that this setting is still honored, without violating other constraints.
- * The goal is to make runs as small as possible without exceeding a per run
- * external fragmentation threshold.
+ * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
+ * as small as possible such that this setting is still honored, without
+ * violating other constraints.  The goal is to make runs as small as possible
+ * without exceeding a per run external fragmentation threshold.
 *
- * Note that it is possible to set this low enough that it cannot be honored
- * for some/all object sizes, since there is one bit of header overhead per
- * object (plus a constant).  In such cases, this constraint is relaxed.
+ * We use binary fixed point math for overhead computations, where the binary
+ * point is implicitly RUN_BFP bits to the left.
 *
- * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
- * overhead for which RUN_MAX_OVRHD is relaxed.
+ * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
+ * honored for some/all object sizes, since there is one bit of header overhead
+ * per object (plus a constant).  This constraint is relaxed (ignored) for runs
+ * that are so small that the per-region overhead is greater than:
+ *
+ *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
 */
-#define	RUN_MAX_OVRHD		0.015
-#define	RUN_MAX_OVRHD_RELAX	1.5
+#define	RUN_BFP			12
+/*                                    \/   Implicit binary fixed point. */
+#define	RUN_MAX_OVRHD		0x0000003dU
+#define	RUN_MAX_OVRHD_RELAX	0x00001800U

 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 #define	RUN_MAX_SMALL_2POW	15
 #define	RUN_MAX_SMALL		(1U << RUN_MAX_SMALL_2POW)

 #ifdef MALLOC_LAZY_FREE
-/* Default size of each arena's lazy free cache. */
-#  define LAZY_FREE_2POW_DEFAULT	8
-/*
- * Number of pseudo-random probes to conduct before considering the cache to be
- * overly full.  It takes on average n probes to detect fullness of (n-1)/n.
- * However, we are effectively doing multiple non-independent trials (each
- * deallocation is a trial), so the actual average threshold for clearing the
- * cache is somewhat lower.
- */
+   /* Default size of each arena's lazy free cache. */
+#  define LAZY_FREE_2POW_DEFAULT 8
+   /*
+    * Number of pseudo-random probes to conduct before considering the cache to
+    * be overly full.  It takes on average n probes to detect fullness of
+    * (n-1)/n.  However, we are effectively doing multiple non-independent
+    * trials (each deallocation is a trial), so the actual average threshold
+    * for clearing the cache is somewhat lower.
+    */
 #  define LAZY_FREE_NPROBES	5
 #endif

@ -323,20 +330,20 @@ __FBSDID("$FreeBSD$");
 #define	BLOCK_COST_2POW		4

 #ifdef MALLOC_BALANCE
-/*
- * We use an exponential moving average to track recent lock contention, where
- * the size of the history window is N, and alpha=2/(N+1).
- *
- * Due to integer math rounding, very small values here can cause substantial
- * degradation in accuracy, thus making the moving average decay faster than it
- * would with precise calculation.
- */
+   /*
+    * We use an exponential moving average to track recent lock contention,
+    * where the size of the history window is N, and alpha=2/(N+1).
+    *
+    * Due to integer math rounding, very small values here can cause
+    * substantial degradation in accuracy, thus making the moving average decay
+    * faster than it would with precise calculation.
+    */
 #  define BALANCE_ALPHA_INV_2POW	9

-/*
- * Threshold value for the exponential moving contention average at which to
- * re-assign a thread.
- */
+   /*
+    * Threshold value for the exponential moving contention average at which to
+    * re-assign a thread.
+    */
 #  define BALANCE_THRESHOLD_DEFAULT	(1U << (SPIN_LIMIT_2POW-4))
 #endif

@ -2468,7 +2475,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 	size_t try_run_size, good_run_size;
 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
-	float max_ovrhd = RUN_MAX_OVRHD;

 	assert(min_run_size >= pagesize);
 	assert(min_run_size <= arena_maxclass);
@ -2486,7 +2492,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 	 */
 	try_run_size = min_run_size;
 	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
-	    + 1; /* Counter-act the first line of the loop. */
+	    + 1; /* Counter-act try_nregs-- in loop. */
 	do {
 		try_nregs--;
 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
@ -2519,9 +2525,8 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
 		    (try_mask_nelms - 1)) > try_reg0_offset);
 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
-	    && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
-	    && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
-	    max_ovrhd);
+	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
+	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);

 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
 	    <= good_reg0_offset);