Update jemalloc to 4.3.1.

2016-11-09 18:42:30 +00:00 · 2016-11-09 18:42:30 +00:00 · bde951447f
commit bde951447f
parent c925d87e2a
44 changed files with 7567 additions and 6553 deletions
--- a/contrib/jemalloc/ChangeLog
+++ b/contrib/jemalloc/ChangeLog
@ -4,6 +4,59 @@ brevity.  Much more detail can be found in the git revision history:

    https://github.com/jemalloc/jemalloc

+* 4.3.1 (November 7, 2016)
+
+  Bug fixes:
+  - Fix a severe virtual memory leak.  This regression was first released in
+    4.3.0.  (@interwq, @jasone)
+  - Refactor atomic and prng APIs to restore support for 32-bit platforms that
+    use pre-C11 toolchains, e.g. FreeBSD's mips.  (@jasone)
+
+* 4.3.0 (November 4, 2016)
+
+  This is the first release that passes the test suite for multiple Windows
+  configurations, thanks in large part to @glandium setting up continuous
+  integration via AppVeyor (and Travis CI for Linux and OS X).
+
+  New features:
+  - Add "J" (JSON) support to malloc_stats_print().  (@jasone)
+  - Add Cray compiler support.  (@ronawho)
+
+  Optimizations:
+  - Add/use adaptive spinning for bootstrapping and radix tree node
+    initialization.  (@jasone)
+
+  Bug fixes:
+  - Fix large allocation to search starting in the optimal size class heap,
+    which can substantially reduce virtual memory churn and fragmentation.  This
+    regression was first released in 4.0.0.  (@mjp41, @jasone)
+  - Fix stats.arenas.<i>.nthreads accounting.  (@interwq)
+  - Fix and simplify decay-based purging.  (@jasone)
+  - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
+    deadlocks during thread exit.  (@jasone)
+  - Fix over-sized allocation of radix tree leaf nodes.  (@mjp41, @ogaun,
+    @jasone)
+  - Fix over-sized allocation of arena_t (plus associated stats) data
+    structures.  (@jasone, @interwq)
+  - Fix EXTRA_CFLAGS to not affect configuration.  (@jasone)
+  - Fix a Valgrind integration bug.  (@ronawho)
+  - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)
+  - Fix a file descriptor leak on Linux.  This regression was first released in
+    4.2.0.  (@vsarunas, @jasone)
+  - Fix static linking of jemalloc with glibc.  (@djwatson)
+  - Use syscall(2) rather than {open,read,close}(2) during boot on Linux.  This
+    works around other libraries' system call wrappers performing reentrant
+    allocation.  (@kspinka, @Whissi, @jasone)
+  - Fix OS X default zone replacement to work with OS X 10.12.  (@glandium,
+    @jasone)
+  - Fix cached memory management to avoid needless commit/decommit operations
+    during purging, which resolves permanent virtual memory map fragmentation
+    issues on Windows.  (@mjp41, @jasone)
+  - Fix TSD fetches to avoid (recursive) allocation.  This is relevant to
+    non-TLS and Windows configurations.  (@jasone)
+  - Fix malloc_conf overriding to work on Windows.  (@jasone)
+  - Forcibly disable lazy-lock on Windows (was forcibly *enabled*).  (@jasone)
+
 * 4.2.1 (June 8, 2016)

  Bug fixes:
--- a/contrib/jemalloc/FREEBSD-Xlist
+++ b/contrib/jemalloc/FREEBSD-Xlist
@ -1,6 +1,8 @@
 $FreeBSD$
+.appveyor.yml
 .autom4te.cfg
 .git*
+.travis.yml
 FREEBSD-*
 INSTALL
 Makefile*
--- a/contrib/jemalloc/FREEBSD-diffs
+++ b/contrib/jemalloc/FREEBSD-diffs
@ -1,11 +1,11 @@
 diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
-index c4a44e3..4626e9b 100644
+index 3d2e721..b361db2 100644
 --- a/doc/jemalloc.xml.in
 +++ b/doc/jemalloc.xml.in
@@ -53,11 +53,23 @@
     <para>This manual describes jemalloc @jemalloc_version@.  More information
     can be found at the <ulink
-     url="http://www.canonware.com/jemalloc/">jemalloc website</ulink>.</para>
+     url="http://jemalloc.net/">jemalloc website</ulink>.</para>
 +
 +    <para>The following configuration options are enabled in libc's built-in
 +    jemalloc: <option>--enable-fill</option>,
@ -27,30 +27,30 @@ index c4a44e3..4626e9b 100644
       <refsect2>
         <title>Standard API</title>
         <funcprototype>
-@@ -2961,4 +2973,18 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
-     <para>The <function>posix_memalign<parameter/></function> function conforms
-     to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
+@@ -2963,4 +2975,18 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
+     <para>The <function>posix_memalign()</function> function conforms
+     to IEEE Std 1003.1-2001 (<quote>POSIX.1</quote>).</para>
   </refsect1>
 +  <refsect1 id="history">
 +    <title>HISTORY</title>
-+    <para>The <function>malloc_usable_size<parameter/></function> and
-+    <function>posix_memalign<parameter/></function> functions first appeared in
-+    FreeBSD 7.0.</para>
+    <para>The <function>malloc_usable_size()</function> and
+    <function>posix_memalign()</function> functions first appeared in FreeBSD
+    7.0.</para>
 +
-+    <para>The <function>aligned_alloc<parameter/></function>,
-+    <function>malloc_stats_print<parameter/></function>, and
-+    <function>mallctl*<parameter/></function> functions first appeared in
-+    FreeBSD 10.0.</para>
+    <para>The <function>aligned_alloc()</function>,
+    <function>malloc_stats_print()</function>, and
+    <function>mallctl*()</function> functions first appeared in FreeBSD
+    10.0.</para>
 +
-+    <para>The <function>*allocx<parameter/></function> functions first appeared
-+    in FreeBSD 11.0.</para>
+    <para>The <function>*allocx()</function> functions first appeared in FreeBSD
+    11.0.</para>
 +  </refsect1>
 </refentry>
 diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
-index b1de2b6..da6b6d2 100644
+index f39ce54..a3ba55d 100644
 --- a/include/jemalloc/internal/arena.h
 +++ b/include/jemalloc/internal/arena.h
-@@ -718,8 +718,13 @@ arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
+@@ -719,8 +719,13 @@ arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t *
 arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind)
 {
@ -64,7 +64,7 @@ index b1de2b6..da6b6d2 100644
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-@@ -778,8 +783,13 @@ arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
+@@ -779,8 +784,13 @@ arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
 JEMALLOC_ALWAYS_INLINE const size_t *
 arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind)
 {
@ -79,7 +79,7 @@ index b1de2b6..da6b6d2 100644
 
 JEMALLOC_ALWAYS_INLINE size_t
 diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
-index 8f82edd..78e2df2 100644
+index fdc8fef..56a35a4 100644
 --- a/include/jemalloc/internal/jemalloc_internal.h.in
 +++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -8,6 +8,9 @@
@ -108,7 +108,7 @@ index 8f82edd..78e2df2 100644
 static const bool config_prof =
 #ifdef JEMALLOC_PROF
 diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
-index 2b8ca5d..42d97f2 100644
+index c907d91..4626632 100644
 --- a/include/jemalloc/internal/jemalloc_internal_decls.h
 +++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -1,6 +1,9 @@
@ -122,10 +122,10 @@ index 2b8ca5d..42d97f2 100644
 #ifdef _WIN32
 #  include <windows.h>
 diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
-index 5221799..60ab041 100644
+index b442d2d..76518db 100644
 --- a/include/jemalloc/internal/mutex.h
 +++ b/include/jemalloc/internal/mutex.h
-@@ -52,9 +52,6 @@ struct malloc_mutex_s {
+@@ -57,9 +57,6 @@ struct malloc_mutex_s {
 
 #ifdef JEMALLOC_LAZY_LOCK
 extern bool isthreaded;
@ -135,7 +135,7 @@ index 5221799..60ab041 100644
 #endif
 
 bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
-@@ -62,6 +59,7 @@ bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+@@ -67,6 +64,7 @@ bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
 void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
@ -144,10 +144,10 @@ index 5221799..60ab041 100644
 
 #endif /* JEMALLOC_H_EXTERNS */
 diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
-index f2b6a55..69369c9 100644
+index 87c8c9b..df576f6 100644
 --- a/include/jemalloc/internal/private_symbols.txt
 +++ b/include/jemalloc/internal/private_symbols.txt
-@@ -311,7 +311,6 @@ iralloct_realign
+@@ -307,7 +307,6 @@ iralloct_realign
 isalloc
 isdalloct
 isqalloc
@ -335,7 +335,7 @@ index f943891..47d032c 100755
 +#include "jemalloc_FreeBSD.h"
 EOF
 diff --git a/src/jemalloc.c b/src/jemalloc.c
-index 5d1f493..46dd1d1 100644
+index 38650ff..f659b55 100644
 --- a/src/jemalloc.c
 +++ b/src/jemalloc.c
@@ -4,6 +4,10 @@
@ -347,9 +347,9 @@ index 5d1f493..46dd1d1 100644
 +__sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0);
 +
 /* Runtime configuration options. */
- const char	*je_malloc_conf JEMALLOC_ATTR(weak);
- bool	opt_abort =
-@@ -2673,6 +2677,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
+ const char	*je_malloc_conf
+ #ifndef _WIN32
+@@ -2756,6 +2760,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
  */
 /******************************************************************************/
 /*
@ -457,7 +457,7 @@ index 5d1f493..46dd1d1 100644
  * The following functions are used by threading libraries for protection of
  * malloc during fork().
  */
-@@ -2814,4 +2919,11 @@ jemalloc_postfork_child(void)
+@@ -2894,4 +2999,11 @@ jemalloc_postfork_child(void)
 	ctl_postfork_child(tsd_tsdn(tsd));
 }
 
@ -470,7 +470,7 @@ index 5d1f493..46dd1d1 100644
 +
 /******************************************************************************/
 diff --git a/src/mutex.c b/src/mutex.c
-index a1fac34..a24e420 100644
+index 6333e73..13f8d79 100644
 --- a/src/mutex.c
 +++ b/src/mutex.c
@@ -66,6 +66,17 @@ pthread_create(pthread_t *__restrict thread,
@ -491,7 +491,7 @@ index a1fac34..a24e420 100644
 #endif
 
 bool
-@@ -140,7 +151,7 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
+@@ -142,7 +153,7 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
 }
 
 bool
@ -500,7 +500,7 @@ index a1fac34..a24e420 100644
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-@@ -154,3 +165,14 @@ malloc_mutex_boot(void)
+@@ -156,3 +167,14 @@ malloc_mutex_boot(void)
 #endif
 	return (false);
 }
@ -516,7 +516,7 @@ index a1fac34..a24e420 100644
 +#endif
 +}
 diff --git a/src/util.c b/src/util.c
-index a1c4a2a..04f9153 100644
+index 7905267..bee1c77 100644
 --- a/src/util.c
 +++ b/src/util.c
@@ -67,6 +67,22 @@ wrtmessage(void *cbopaque, const char *s)
--- a/contrib/jemalloc/VERSION
+++ b/contrib/jemalloc/VERSION
@ -1 +1 @@
-4.2.1-0-g3de035335255d553bdb344c32ffdb603816195d8
+4.3.1-0-g0110fa8451af905affd77c3bea0d545fee2251b2
--- a/contrib/jemalloc/doc/jemalloc.3
+++ b/contrib/jemalloc/doc/jemalloc.3
--- a/contrib/jemalloc/include/jemalloc/internal/arena.h
+++ b/contrib/jemalloc/include/jemalloc/internal/arena.h
@ -42,6 +42,7 @@ typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
 typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
 typedef struct arena_chunk_s arena_chunk_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
@ -257,6 +258,49 @@ struct arena_bin_info_s {
 	uint32_t		reg0_offset;
 };

+struct arena_decay_s {
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			time;
+	/* time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		epoch;
+	/* Deadline randomness generator. */
+	uint64_t		jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
+	 */
+	nstime_t		deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between arena->decay.ndirty and
+	 * arena->ndirty to determine how many dirty pages, if any, were
+	 * generated.
+	 */
+	size_t			ndirty;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to epoch.
+	 */
+	size_t			backlog[SMOOTHSTEP_NSTEPS];
+};
+
 struct arena_bin_s {
 	/*
 	 * All operations on runcur, runs, and stats require that lock be
@ -326,7 +370,7 @@ struct arena_s {
 	 * PRNG state for cache index randomization of large allocation base
 	 * pointers.
 	 */
-	uint64_t		offset_state;
+	size_t			offset_state;

 	dss_prec_t		dss_prec;

@ -394,52 +438,8 @@ struct arena_s {
 	arena_runs_dirty_link_t	runs_dirty;
 	extent_node_t		chunks_cache;

-	/*
-	 * Approximate time in seconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	ssize_t			decay_time;
-	/* decay_time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		decay_interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		decay_epoch;
-	/* decay_deadline randomness generator. */
-	uint64_t		decay_jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of decay_interval and
-	 * per epoch jitter which is a uniform random variable in
-	 * [0..decay_interval).  Epochs always advance by precise multiples of
-	 * decay_interval, but we randomize the deadline to reduce the
-	 * likelihood of arenas purging in lockstep.
-	 */
-	nstime_t		decay_deadline;
-	/*
-	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between decay_ndirty and ndirty to
-	 * determine how many dirty pages, if any, were generated, and record
-	 * the result in decay_backlog.
-	 */
-	size_t			decay_ndirty;
-	/*
-	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
-	 * to the current contents of decay_backlog, i.e. the limit on how many
-	 * pages are allowed to exist for the decay epochs.
-	 */
-	size_t			decay_backlog_npages_limit;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to decay_epoch.
-	 */
-	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
+	/* Decay-based purging state. */
+	arena_decay_t		decay;

 	/* Extant huge allocations. */
 	ql_head(extent_node_t)	huge;
@ -470,10 +470,12 @@ struct arena_s {
 	arena_bin_t		bins[NBINS];

 	/*
-	 * Quantized address-ordered heaps of this arena's available runs.  The
-	 * heaps are used for first-best-fit run allocation.
+	 * Size-segregated address-ordered heaps of this arena's available runs,
+	 * used for first-best-fit run allocation.  Runs are quantized, i.e.
+	 * they reside in the last heap which corresponds to a size class less
+	 * than or equal to the run size.
 	 */
-	arena_run_heap_t	runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[NPSIZES];
 };

 /* Used in conjunction with tsd for fast arena-related context lookup. */
@ -505,7 +507,6 @@ extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
-extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */

@ -601,7 +602,7 @@ unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
-bool	arena_boot(void);
+void	arena_boot(void);
 void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
--- a/contrib/jemalloc/include/jemalloc/internal/chunk.h
+++ b/contrib/jemalloc/include/jemalloc/internal/chunk.h
@ -58,7 +58,7 @@ void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool dalloc_node);
+    bool *zero, bool *commit, bool dalloc_node);
 void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
    bool *zero, bool *commit);
@ -71,9 +71,6 @@ bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
    size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(tsdn_t *tsdn);
-void	chunk_postfork_parent(tsdn_t *tsdn);
-void	chunk_postfork_child(tsdn_t *tsdn);

 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
--- a/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h
+++ b/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h
@ -21,15 +21,13 @@ extern const char *dss_prec_names[];
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS

-dss_prec_t	chunk_dss_prec_get(tsdn_t *tsdn);
-bool	chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
+dss_prec_t	chunk_dss_prec_get(void);
+bool	chunk_dss_prec_set(dss_prec_t dss_prec);
 void	*chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
    size_t size, size_t alignment, bool *zero, bool *commit);
-bool	chunk_in_dss(tsdn_t *tsdn, void *chunk);
-bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(tsdn_t *tsdn);
-void	chunk_dss_postfork_parent(tsdn_t *tsdn);
-void	chunk_dss_postfork_child(tsdn_t *tsdn);
+bool	chunk_in_dss(void *chunk);
+bool	chunk_dss_mergeable(void *chunk_a, void *chunk_b);
+void	chunk_dss_boot(void);

 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
--- a/contrib/jemalloc/include/jemalloc/internal/ckh.h
+++ b/contrib/jemalloc/include/jemalloc/internal/ckh.h
@ -64,13 +64,13 @@ struct ckh_s {
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS

-bool	ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
    ckh_keycomp_t *keycomp);
-void	ckh_delete(tsdn_t *tsdn, ckh_t *ckh);
+void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
 size_t	ckh_count(ckh_t *ckh);
 bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool	ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data);
-bool	ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
+bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
    void **data);
 bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 void	ckh_string_hash(const void *key, size_t r_hash[2]);
--- a/contrib/jemalloc/include/jemalloc/internal/huge.h
+++ b/contrib/jemalloc/include/jemalloc/internal/huge.h
@ -17,7 +17,7 @@ bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
    size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
+typedef void (huge_dalloc_junk_t)(void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
 void	huge_dalloc(tsdn_t *tsdn, void *ptr);
--- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h
+++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h
@ -159,7 +159,9 @@ static const bool config_cache_oblivious =
 #endif

 #include "jemalloc/internal/ph.h"
+#ifndef __PGI
 #define	RB_COMPACT
+#endif
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"
@ -182,6 +184,9 @@ static const bool config_cache_oblivious =

 #include "jemalloc/internal/jemalloc_internal_macros.h"

+/* Page size index type. */
+typedef unsigned pszind_t;
+
 /* Size class index type. */
 typedef unsigned szind_t;

@ -231,7 +236,7 @@ typedef unsigned szind_t;
 #  ifdef __alpha__
 #    define LG_QUANTUM		4
 #  endif
-#  if (defined(__sparc64__) || defined(__sparcv9))
+#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
 #    define LG_QUANTUM		4
 #  endif
 #  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
@ -361,6 +366,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@ -393,6 +399,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@ -452,11 +459,16 @@ extern unsigned	narenas_auto;
 */
 extern arena_t	**arenas;

+/*
+ * pind2sz_tab encodes the same information as could be computed by
+ * pind2sz_compute().
+ */
+extern size_t const	pind2sz_tab[NPSIZES];
 /*
 * index2size_tab encodes the same information as could be computed (at
 * unacceptable cost in some code paths) by index2size_compute().
 */
-extern size_t const	index2size_tab[NSIZES+1];
+extern size_t const	index2size_tab[NSIZES];
 /*
 * size2index_tab is a compact lookup table that rounds request sizes up to
 * size classes.  In order to reduce cache footprint, the table is compressed,
@ -464,6 +476,7 @@ extern size_t const	index2size_tab[NSIZES+1];
 */
 extern uint8_t const	size2index_tab[];

+arena_t	*a0get(void);
 void	*a0malloc(size_t size);
 void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
@ -489,6 +502,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@ -521,6 +535,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@ -540,6 +555,11 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/huge.h"

 #ifndef JEMALLOC_ENABLE_INLINE
+pszind_t	psz2ind(size_t psz);
+size_t	pind2sz_compute(pszind_t pind);
+size_t	pind2sz_lookup(pszind_t pind);
+size_t	pind2sz(pszind_t pind);
+size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
 szind_t	size2index_lookup(size_t size);
 szind_t	size2index(size_t size);
@ -552,7 +572,7 @@ size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t	*arena_ichoose(tsdn_t *tsdn, arena_t *arena);
+arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
    bool refresh_if_missing);
 arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
@ -560,10 +580,90 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif

 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE pszind_t
+psz2ind(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (NPSIZES);
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return (ind);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz_compute(pszind_t pind)
+{
+
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return (sz);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz_lookup(pszind_t pind)
+{
+	size_t ret = (size_t)pind2sz_tab[pind];
+	assert(ret == pind2sz_compute(pind));
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	assert(pind < NPSIZES);
+	return (pind2sz_lookup(pind));
+}
+
+JEMALLOC_INLINE size_t
+psz2u(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (0);
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return (usize);
+	}
+}
+
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {

+	if (unlikely(size > HUGE_MAXCLASS))
+		return (NSIZES);
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@ -572,9 +672,7 @@ size2index_compute(size_t size)
 	}
 #endif
 	{
-		szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		szind_t x = lg_floor((size<<1)-1);
 		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
 		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
 		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
@ -660,6 +758,8 @@ JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size)
 {

+	if (unlikely(size > HUGE_MAXCLASS))
+		return (0);
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@ -669,9 +769,7 @@ s2u_compute(size_t size)
 	}
 #endif
 	{
-		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		size_t x = lg_floor((size<<1)-1);
 		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 		size_t delta = ZU(1) << lg_delta;
@ -812,14 +910,10 @@ arena_choose(tsd_t *tsd, arena_t *arena)
 }

 JEMALLOC_INLINE arena_t *
-arena_ichoose(tsdn_t *tsdn, arena_t *arena)
+arena_ichoose(tsd_t *tsd, arena_t *arena)
 {

-	assert(!tsdn_null(tsdn) || arena != NULL);
-
-	if (!tsdn_null(tsdn))
-		return (arena_choose_impl(tsdn_tsd(tsdn), NULL, true));
-	return (arena);
+	return (arena_choose_impl(tsd, arena, true));
 }

 JEMALLOC_INLINE arena_tdata_t *
--- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
@ -20,8 +20,18 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef JEMALLOC_OS_UNFAIR_LOCK
+#    include <os/lock.h>
+#  endif
+#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#    include <sched.h>
+#  endif
 #  include <errno.h>
 #  include <sys/time.h>
+#  include <time.h>
+#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#    include <mach/mach_time.h>
+#  endif
 #endif
 #include <sys/types.h>

--- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
+++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
@ -61,12 +61,20 @@
 */
 #define JEMALLOC_HAVE_MADVISE 

+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+/* #undef JEMALLOC_OS_UNFAIR_LOCK */
+
 /*
 * Defined if OSSpin*() functions are available, as provided by Darwin, and
 * documented in the spinlock(3) manual page.
 */
 /* #undef JEMALLOC_OSSPIN */

+/* Defined if syscall(2) is available. */
+#define JEMALLOC_HAVE_SYSCALL 
+
 /*
 * Defined if secure_getenv(3) is available.
 */
@ -77,6 +85,21 @@
 */
 #define JEMALLOC_HAVE_ISSETUGID 

+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
+
 /*
 * Defined if _malloc_thread_cleanup() exists.  At least in the case of
 * FreeBSD, pthread_key_create() allocates, which if used during malloc
@ -189,6 +212,12 @@
 /* TLS is used to map arenas and magazine caches to threads. */
 #define JEMALLOC_TLS 

+/*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#define JEMALLOC_INTERNAL_UNREACHABLE abort
+
 /*
 * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
 * use ffs_*() from util.h.
--- a/contrib/jemalloc/include/jemalloc/internal/mb.h
+++ b/contrib/jemalloc/include/jemalloc/internal/mb.h
@ -105,8 +105,8 @@ mb_write(void)
 	malloc_mutex_t mtx;

 	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
-	malloc_mutex_lock(NULL, &mtx);
-	malloc_mutex_unlock(NULL, &mtx);
+	malloc_mutex_lock(TSDN_NULL, &mtx);
+	malloc_mutex_unlock(TSDN_NULL, &mtx);
 }
 #endif
 #endif
--- a/contrib/jemalloc/include/jemalloc/internal/mutex.h
+++ b/contrib/jemalloc/include/jemalloc/internal/mutex.h
@ -5,6 +5,9 @@ typedef struct malloc_mutex_s malloc_mutex_t;

 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_OSSPIN))
 #  define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
@ -35,6 +38,8 @@ struct malloc_mutex_s {
 #  else
 	CRITICAL_SECTION	lock;
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
@ -86,6 +91,8 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #  else
 		EnterCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_lock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockLock(&mutex->lock);
 #else
@ -107,6 +114,8 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #  else
 		LeaveCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_unlock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockUnlock(&mutex->lock);
 #else
--- a/contrib/jemalloc/include/jemalloc/internal/nstime.h
+++ b/contrib/jemalloc/include/jemalloc/internal/nstime.h
@ -1,9 +1,6 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES

-#define	JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
-    && _POSIX_MONOTONIC_CLOCK >= 0
-
 typedef struct nstime_s nstime_t;

 /* Maximum supported number of seconds (~584 years). */
@ -34,9 +31,12 @@ void	nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void	nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t	nstime_divide(const nstime_t *time, const nstime_t *divisor);
 #ifdef JEMALLOC_JET
+typedef bool (nstime_monotonic_t)(void);
+extern nstime_monotonic_t *nstime_monotonic;
 typedef bool (nstime_update_t)(nstime_t *);
 extern nstime_update_t *nstime_update;
 #else
+bool	nstime_monotonic(void);
 bool	nstime_update(nstime_t *time);
 #endif

--- a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h
+++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h
@ -1,4 +1,5 @@
 #define	a0dalloc JEMALLOC_N(a0dalloc)
+#define	a0get JEMALLOC_N(a0get)
 #define	a0malloc JEMALLOC_N(a0malloc)
 #define	arena_aalloc JEMALLOC_N(arena_aalloc)
 #define	arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small)
@ -167,20 +168,15 @@
 #define	chunk_dalloc_wrapper JEMALLOC_N(chunk_dalloc_wrapper)
 #define	chunk_deregister JEMALLOC_N(chunk_deregister)
 #define	chunk_dss_boot JEMALLOC_N(chunk_dss_boot)
-#define	chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child)
-#define	chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent)
+#define	chunk_dss_mergeable JEMALLOC_N(chunk_dss_mergeable)
 #define	chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get)
 #define	chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set)
-#define	chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork)
 #define	chunk_hooks_default JEMALLOC_N(chunk_hooks_default)
 #define	chunk_hooks_get JEMALLOC_N(chunk_hooks_get)
 #define	chunk_hooks_set JEMALLOC_N(chunk_hooks_set)
 #define	chunk_in_dss JEMALLOC_N(chunk_in_dss)
 #define	chunk_lookup JEMALLOC_N(chunk_lookup)
 #define	chunk_npages JEMALLOC_N(chunk_npages)
-#define	chunk_postfork_child JEMALLOC_N(chunk_postfork_child)
-#define	chunk_postfork_parent JEMALLOC_N(chunk_postfork_parent)
-#define	chunk_prefork JEMALLOC_N(chunk_prefork)
 #define	chunk_purge_wrapper JEMALLOC_N(chunk_purge_wrapper)
 #define	chunk_register JEMALLOC_N(chunk_register)
 #define	chunks_rtree JEMALLOC_N(chunks_rtree)
@ -359,6 +355,7 @@
 #define	nstime_imultiply JEMALLOC_N(nstime_imultiply)
 #define	nstime_init JEMALLOC_N(nstime_init)
 #define	nstime_init2 JEMALLOC_N(nstime_init2)
+#define	nstime_monotonic JEMALLOC_N(nstime_monotonic)
 #define	nstime_ns JEMALLOC_N(nstime_ns)
 #define	nstime_nsec JEMALLOC_N(nstime_nsec)
 #define	nstime_sec JEMALLOC_N(nstime_sec)
@ -400,11 +397,22 @@
 #define	pages_purge JEMALLOC_N(pages_purge)
 #define	pages_trim JEMALLOC_N(pages_trim)
 #define	pages_unmap JEMALLOC_N(pages_unmap)
+#define	pind2sz JEMALLOC_N(pind2sz)
+#define	pind2sz_compute JEMALLOC_N(pind2sz_compute)
+#define	pind2sz_lookup JEMALLOC_N(pind2sz_lookup)
+#define	pind2sz_tab JEMALLOC_N(pind2sz_tab)
 #define	pow2_ceil_u32 JEMALLOC_N(pow2_ceil_u32)
 #define	pow2_ceil_u64 JEMALLOC_N(pow2_ceil_u64)
 #define	pow2_ceil_zu JEMALLOC_N(pow2_ceil_zu)
-#define	prng_lg_range JEMALLOC_N(prng_lg_range)
-#define	prng_range JEMALLOC_N(prng_range)
+#define	prng_lg_range_u32 JEMALLOC_N(prng_lg_range_u32)
+#define	prng_lg_range_u64 JEMALLOC_N(prng_lg_range_u64)
+#define	prng_lg_range_zu JEMALLOC_N(prng_lg_range_zu)
+#define	prng_range_u32 JEMALLOC_N(prng_range_u32)
+#define	prng_range_u64 JEMALLOC_N(prng_range_u64)
+#define	prng_range_zu JEMALLOC_N(prng_range_zu)
+#define	prng_state_next_u32 JEMALLOC_N(prng_state_next_u32)
+#define	prng_state_next_u64 JEMALLOC_N(prng_state_next_u64)
+#define	prng_state_next_zu JEMALLOC_N(prng_state_next_zu)
 #define	prof_active JEMALLOC_N(prof_active)
 #define	prof_active_get JEMALLOC_N(prof_active_get)
 #define	prof_active_get_unlocked JEMALLOC_N(prof_active_get_unlocked)
@ -453,12 +461,13 @@
 #define	prof_thread_active_set JEMALLOC_N(prof_thread_active_set)
 #define	prof_thread_name_get JEMALLOC_N(prof_thread_name_get)
 #define	prof_thread_name_set JEMALLOC_N(prof_thread_name_set)
+#define	psz2ind JEMALLOC_N(psz2ind)
+#define	psz2u JEMALLOC_N(psz2u)
 #define	purge_mode_names JEMALLOC_N(purge_mode_names)
 #define	quarantine JEMALLOC_N(quarantine)
 #define	quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook)
 #define	quarantine_alloc_hook_work JEMALLOC_N(quarantine_alloc_hook_work)
 #define	quarantine_cleanup JEMALLOC_N(quarantine_cleanup)
-#define	register_zone JEMALLOC_N(register_zone)
 #define	rtree_child_read JEMALLOC_N(rtree_child_read)
 #define	rtree_child_read_hard JEMALLOC_N(rtree_child_read_hard)
 #define	rtree_child_tryread JEMALLOC_N(rtree_child_tryread)
@ -476,7 +485,6 @@
 #define	rtree_val_write JEMALLOC_N(rtree_val_write)
 #define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
 #define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
-#define	run_quantize_max JEMALLOC_N(run_quantize_max)
 #define	s2u JEMALLOC_N(s2u)
 #define	s2u_compute JEMALLOC_N(s2u_compute)
 #define	s2u_lookup JEMALLOC_N(s2u_lookup)
@ -486,6 +494,8 @@
 #define	size2index_compute JEMALLOC_N(size2index_compute)
 #define	size2index_lookup JEMALLOC_N(size2index_lookup)
 #define	size2index_tab JEMALLOC_N(size2index_tab)
+#define	spin_adaptive JEMALLOC_N(spin_adaptive)
+#define	spin_init JEMALLOC_N(spin_init)
 #define	stats_cactive JEMALLOC_N(stats_cactive)
 #define	stats_cactive_add JEMALLOC_N(stats_cactive_add)
 #define	stats_cactive_get JEMALLOC_N(stats_cactive_get)
@ -544,7 +554,9 @@
 #define	tsd_cleanup JEMALLOC_N(tsd_cleanup)
 #define	tsd_cleanup_wrapper JEMALLOC_N(tsd_cleanup_wrapper)
 #define	tsd_fetch JEMALLOC_N(tsd_fetch)
+#define	tsd_fetch_impl JEMALLOC_N(tsd_fetch_impl)
 #define	tsd_get JEMALLOC_N(tsd_get)
+#define	tsd_get_allocates JEMALLOC_N(tsd_get_allocates)
 #define	tsd_iarena_get JEMALLOC_N(tsd_iarena_get)
 #define	tsd_iarena_set JEMALLOC_N(tsd_iarena_set)
 #define	tsd_iarenap_get JEMALLOC_N(tsd_iarenap_get)
@ -603,9 +615,11 @@
 #define	witness_lock_error JEMALLOC_N(witness_lock_error)
 #define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
 #define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
+#define	witness_owner JEMALLOC_N(witness_owner)
 #define	witness_owner_error JEMALLOC_N(witness_owner_error)
 #define	witness_postfork_child JEMALLOC_N(witness_postfork_child)
 #define	witness_postfork_parent JEMALLOC_N(witness_postfork_parent)
 #define	witness_prefork JEMALLOC_N(witness_prefork)
 #define	witness_unlock JEMALLOC_N(witness_unlock)
 #define	witnesses_cleanup JEMALLOC_N(witnesses_cleanup)
+#define	zone_register JEMALLOC_N(zone_register)
--- a/contrib/jemalloc/include/jemalloc/internal/prng.h
+++ b/contrib/jemalloc/include/jemalloc/internal/prng.h
@ -19,8 +19,12 @@
 * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
 * bits.
 */
-#define	PRNG_A	UINT64_C(6364136223846793005)
-#define	PRNG_C	UINT64_C(1442695040888963407)
+
+#define	PRNG_A_32	UINT32_C(1103515241)
+#define	PRNG_C_32	UINT32_C(12347)
+
+#define	PRNG_A_64	UINT64_C(6364136223846793005)
+#define	PRNG_C_64	UINT64_C(1442695040888963407)

 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@ -35,28 +39,133 @@
 #ifdef JEMALLOC_H_INLINES

 #ifndef JEMALLOC_ENABLE_INLINE
-uint64_t	prng_lg_range(uint64_t *state, unsigned lg_range);
-uint64_t	prng_range(uint64_t *state, uint64_t range);
+uint32_t	prng_state_next_u32(uint32_t state);
+uint64_t	prng_state_next_u64(uint64_t state);
+size_t	prng_state_next_zu(size_t state);
+
+uint32_t	prng_lg_range_u32(uint32_t *state, unsigned lg_range,
+    bool atomic);
+uint64_t	prng_lg_range_u64(uint64_t *state, unsigned lg_range);
+size_t	prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);
+
+uint32_t	prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
+uint64_t	prng_range_u64(uint64_t *state, uint64_t range);
+size_t	prng_range_zu(size_t *state, size_t range, bool atomic);
 #endif

 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
-JEMALLOC_ALWAYS_INLINE uint64_t
-prng_lg_range(uint64_t *state, unsigned lg_range)
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_state_next_u32(uint32_t state)
 {
-	uint64_t ret;
+
+	return ((state * PRNG_A_32) + PRNG_C_32);
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_state_next_u64(uint64_t state)
+{
+
+	return ((state * PRNG_A_64) + PRNG_C_64);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_state_next_zu(size_t state)
+{
+
+#if LG_SIZEOF_PTR == 2
+	return ((state * PRNG_A_32) + PRNG_C_32);
+#elif LG_SIZEOF_PTR == 3
+	return ((state * PRNG_A_64) + PRNG_C_64);
+#else
+#error Unsupported pointer size
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
+{
+	uint32_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= 32);
+
+	if (atomic) {
+		uint32_t state0;
+
+		do {
+			state0 = atomic_read_uint32(state);
+			state1 = prng_state_next_u32(state0);
+		} while (atomic_cas_uint32(state, state0, state1));
+	} else {
+		state1 = prng_state_next_u32(*state);
+		*state = state1;
+	}
+	ret = state1 >> (32 - lg_range);
+
+	return (ret);
+}
+
+/* 64-bit atomic operations cannot be supported on all relevant platforms. */
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_lg_range_u64(uint64_t *state, unsigned lg_range)
+{
+	uint64_t ret, state1;

 	assert(lg_range > 0);
 	assert(lg_range <= 64);

-	ret = (*state * PRNG_A) + PRNG_C;
-	*state = ret;
-	ret >>= (64 - lg_range);
+	state1 = prng_state_next_u64(*state);
+	*state = state1;
+	ret = state1 >> (64 - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
+{
+	size_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
+
+	if (atomic) {
+		size_t state0;
+
+		do {
+			state0 = atomic_read_z(state);
+			state1 = prng_state_next_zu(state0);
+		} while (atomic_cas_z(state, state0, state1));
+	} else {
+		state1 = prng_state_next_zu(*state);
+		*state = state1;
+	}
+	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
+{
+	uint32_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_u32(state, lg_range, atomic);
+	} while (ret >= range);

 	return (ret);
 }

 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_range(uint64_t *state, uint64_t range)
+prng_range_u64(uint64_t *state, uint64_t range)
 {
 	uint64_t ret;
 	unsigned lg_range;
@ -68,7 +177,26 @@ prng_range(uint64_t *state, uint64_t range)

 	/* Generate a result in [0..range) via repeated trial. */
 	do {
-		ret = prng_lg_range(state, lg_range);
+		ret = prng_lg_range_u64(state, lg_range);
+	} while (ret >= range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_range_zu(size_t *state, size_t range, bool atomic)
+{
+	size_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_zu(state, lg_range, atomic);
 	} while (ret >= range);

 	return (ret);
--- a/contrib/jemalloc/include/jemalloc/internal/prof.h
+++ b/contrib/jemalloc/include/jemalloc/internal/prof.h
@ -299,9 +299,9 @@ extern prof_dump_header_t *prof_dump_header;
 void	prof_idump(tsdn_t *tsdn);
 bool	prof_mdump(tsd_t *tsd, const char *filename);
 void	prof_gdump(tsdn_t *tsdn);
-prof_tdata_t	*prof_tdata_init(tsdn_t *tsdn);
+prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void	prof_reset(tsdn_t *tsdn, size_t lg_sample);
+void	prof_reset(tsd_t *tsd, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
 bool	prof_active_get(tsdn_t *tsdn);
 bool	prof_active_set(tsdn_t *tsdn, bool active);
@ -315,7 +315,7 @@ bool	prof_gdump_get(tsdn_t *tsdn);
 bool	prof_gdump_set(tsdn_t *tsdn, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(tsdn_t *tsdn);
+bool	prof_boot2(tsd_t *tsd);
 void	prof_prefork0(tsdn_t *tsdn);
 void	prof_prefork1(tsdn_t *tsdn);
 void	prof_postfork_parent(tsdn_t *tsdn);
@ -384,7 +384,7 @@ prof_tdata_get(tsd_t *tsd, bool create)
 	if (create) {
 		if (unlikely(tdata == NULL)) {
 			if (tsd_nominal(tsd)) {
-				tdata = prof_tdata_init(tsd_tsdn(tsd));
+				tdata = prof_tdata_init(tsd);
 				tsd_prof_tdata_set(tsd, tdata);
 			}
 		} else if (unlikely(tdata->expired)) {
--- a/contrib/jemalloc/include/jemalloc/internal/size_classes.h
+++ b/contrib/jemalloc/include/jemalloc/internal/size_classes.h
--- a/contrib/jemalloc/include/jemalloc/internal/spin.h
+++ b/contrib/jemalloc/include/jemalloc/internal/spin.h
@ -0,0 +1,51 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct spin_s spin_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct spin_s {
+	unsigned iteration;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	spin_init(spin_t *spin);
+void	spin_adaptive(spin_t *spin);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
+JEMALLOC_INLINE void
+spin_init(spin_t *spin)
+{
+
+	spin->iteration = 0;
+}
+
+JEMALLOC_INLINE void
+spin_adaptive(spin_t *spin)
+{
+	volatile uint64_t i;
+
+	for (i = 0; i < (KQU(1) << spin->iteration); i++)
+		CPU_SPINWAIT;
+
+	if (spin->iteration < 63)
+		spin->iteration++;
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
--- a/contrib/jemalloc/include/jemalloc/internal/tcache.h
+++ b/contrib/jemalloc/include/jemalloc/internal/tcache.h
@ -145,7 +145,7 @@ tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsdn_t *tsdn, unsigned *r_ind);
+bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn);
--- a/contrib/jemalloc/include/jemalloc/internal/tsd.h
+++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h
@ -48,7 +48,7 @@ typedef enum {
 *
 *   bool example_tsd_boot(void) {...}
 *   bool example_tsd_booted_get(void) {...}
- *   example_t *example_tsd_get() {...}
+ *   example_t *example_tsd_get(bool init) {...}
 *   void example_tsd_set(example_t *val) {...}
 *
 * Note that all of the functions deal in terms of (a_type *) rather than
@ -105,7 +105,7 @@ a_name##tsd_boot(void);							\
 a_attr bool								\
 a_name##tsd_booted_get(void);						\
 a_attr a_type *								\
-a_name##tsd_get(void);							\
+a_name##tsd_get(bool init);						\
 a_attr void								\
 a_name##tsd_set(a_type *val);

@ -213,9 +213,15 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
@ -264,9 +270,15 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
@ -325,14 +337,14 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	DWORD error = GetLastError();					\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    TlsGetValue(a_name##tsd_tsd);				\
 	SetLastError(error);						\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
 		if (wrapper == NULL) {					\
@ -392,14 +404,22 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
@ -408,7 +428,7 @@ a_name##tsd_set(a_type *val)						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
@ -452,12 +472,12 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    pthread_getspecific(a_name##tsd_tsd);			\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		tsd_init_block_t block;					\
 		wrapper = tsd_init_check_recursion(			\
 		    &a_name##tsd_init_head, &block);			\
@ -520,14 +540,22 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
@ -536,7 +564,7 @@ a_name##tsd_set(a_type *val)						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
@ -639,6 +667,7 @@ void	tsd_cleanup(void *arg);
 #ifndef JEMALLOC_ENABLE_INLINE
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)

+tsd_t	*tsd_fetch_impl(bool init);
 tsd_t	*tsd_fetch(void);
 tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
@ -658,9 +687,13 @@ malloc_tsd_externs(, tsd_t)
 malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)

 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch(void)
+tsd_fetch_impl(bool init)
 {
-	tsd_t *tsd = tsd_get();
+	tsd_t *tsd = tsd_get(init);
+
+	if (!init && tsd_get_allocates() && tsd == NULL)
+		return (NULL);
+	assert(tsd != NULL);

 	if (unlikely(tsd->state != tsd_state_nominal)) {
 		if (tsd->state == tsd_state_uninitialized) {
@ -677,6 +710,13 @@ tsd_fetch(void)
 	return (tsd);
 }

+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void)
+{
+
+	return (tsd_fetch_impl(true));
+}
+
 JEMALLOC_ALWAYS_INLINE tsdn_t *
 tsd_tsdn(tsd_t *tsd)
 {
@ -723,7 +763,7 @@ tsdn_fetch(void)
 	if (!tsd_booted_get())
 		return (NULL);

-	return (tsd_tsdn(tsd_fetch()));
+	return (tsd_tsdn(tsd_fetch_impl(false)));
 }

 JEMALLOC_ALWAYS_INLINE bool
--- a/contrib/jemalloc/include/jemalloc/internal/util.h
+++ b/contrib/jemalloc/include/jemalloc/internal/util.h
@ -61,30 +61,20 @@
 #	define JEMALLOC_CC_SILENCE_INIT(v)
 #endif

-#define	JEMALLOC_GNUC_PREREQ(major, minor)				\
-    (!defined(__clang__) &&						\
-    (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
-#ifndef __has_builtin
-#  define __has_builtin(builtin) (0)
-#endif
-#define	JEMALLOC_CLANG_HAS_BUILTIN(builtin)				\
-    (defined(__clang__) && __has_builtin(builtin))
-
 #ifdef __GNUC__
 #	define likely(x)   __builtin_expect(!!(x), 1)
 #	define unlikely(x) __builtin_expect(!!(x), 0)
-#  if JEMALLOC_GNUC_PREREQ(4, 6) ||					\
-      JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
-#	define unreachable() __builtin_unreachable()
-#  else
-#	define unreachable() abort()
-#  endif
 #else
 #	define likely(x)   !!(x)
 #	define unlikely(x) !!(x)
-#	define unreachable() abort()
 #endif

+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+
 #include "jemalloc/internal/assert.h"

 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
--- a/contrib/jemalloc/include/jemalloc/internal/witness.h
+++ b/contrib/jemalloc/include/jemalloc/internal/witness.h
@ -108,6 +108,7 @@ void	witness_postfork_child(tsd_t *tsd);
 #ifdef JEMALLOC_H_INLINES

 #ifndef JEMALLOC_ENABLE_INLINE
+bool	witness_owner(tsd_t *tsd, const witness_t *witness);
 void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_lockless(tsdn_t *tsdn);
@ -116,12 +117,25 @@ void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #endif

 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE bool
+witness_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return (true);
+	}
+
+	return (false);
+}
+
 JEMALLOC_INLINE void
 witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 {
 	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;

 	if (!config_debug)
 		return;
@ -132,11 +146,8 @@ witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;

-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			return;
-	}
+	if (witness_owner(tsd, witness))
+		return;
 	witness_owner_error(witness);
 }

@ -238,10 +249,16 @@ witness_unlock(tsdn_t *tsdn, witness_t *witness)
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;

-	witness_assert_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_remove(witnesses, witness, link);
+	/*
+	 * Check whether owner before removal, rather than relying on
+	 * witness_assert_owner() to abort, so that unit tests can test this
+	 * function's failure mode without causing undefined behavior.
+	 */
+	if (witness_owner(tsd, witness)) {
+		witnesses = tsd_witnessesp_get(tsd);
+		ql_remove(witnesses, witness, link);
+	} else
+		witness_assert_owner(tsdn, witness);
 }
 #endif

--- a/contrib/jemalloc/include/jemalloc/jemalloc.h
+++ b/contrib/jemalloc/include/jemalloc/jemalloc.h
@ -87,12 +87,12 @@ extern "C" {
 #include <limits.h>
 #include <strings.h>

-#define	JEMALLOC_VERSION "4.2.1-0-g3de035335255d553bdb344c32ffdb603816195d8"
+#define	JEMALLOC_VERSION "4.3.1-0-g0110fa8451af905affd77c3bea0d545fee2251b2"
 #define	JEMALLOC_VERSION_MAJOR 4
-#define	JEMALLOC_VERSION_MINOR 2
+#define	JEMALLOC_VERSION_MINOR 3
 #define	JEMALLOC_VERSION_BUGFIX 1
 #define	JEMALLOC_VERSION_NREV 0
-#define	JEMALLOC_VERSION_GID "3de035335255d553bdb344c32ffdb603816195d8"
+#define	JEMALLOC_VERSION_GID "0110fa8451af905affd77c3bea0d545fee2251b2"

 #  define MALLOCX_LG_ALIGN(la)	((int)(la))
 #  if LG_SIZEOF_PTR == 2
--- a/contrib/jemalloc/src/arena.c
+++ b/contrib/jemalloc/src/arena.c
@ -21,15 +21,8 @@ size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-size_t		run_quantize_max; /* Max run_quantize_*() input. */
-static size_t	small_maxrun; /* Max run size for small size classes. */
-static bool	*small_run_tab; /* Valid small run page multiples. */
-static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
-static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
-static szind_t	runs_avail_bias; /* Size index for first runs_avail tree. */
-static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */

 /******************************************************************************/
 /*
@ -37,6 +30,8 @@ static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
 * definition.
 */

+static void	arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk);
 static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
    size_t ndirty_limit);
 static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run,
@ -77,83 +72,6 @@ arena_run_addr_comp(const arena_chunk_map_misc_t *a,
 ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
    ph_link, arena_run_addr_comp)

-static size_t
-run_quantize_floor_compute(size_t size)
-{
-	size_t qsize;
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	/* Don't change sizes that are valid small run sizes. */
-	if (size <= small_maxrun && small_run_tab[size >> LG_PAGE])
-		return (size);
-
-	/*
-	 * Round down to the nearest run size that can actually be requested
-	 * during normal large allocation.  Add large_pad so that cache index
-	 * randomization can offset the allocation from the page boundary.
-	 */
-	qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
-	if (qsize <= SMALL_MAXCLASS + large_pad)
-		return (run_quantize_floor_compute(size - large_pad));
-	assert(qsize <= size);
-	return (qsize);
-}
-
-static size_t
-run_quantize_ceil_compute_hard(size_t size)
-{
-	size_t large_run_size_next;
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	/*
-	 * Return the next quantized size greater than the input size.
-	 * Quantized sizes comprise the union of run sizes that back small
-	 * region runs, and run sizes that back large regions with no explicit
-	 * alignment constraints.
-	 */
-
-	if (size > SMALL_MAXCLASS) {
-		large_run_size_next = PAGE_CEILING(index2size(size2index(size -
-		    large_pad) + 1) + large_pad);
-	} else
-		large_run_size_next = SIZE_T_MAX;
-	if (size >= small_maxrun)
-		return (large_run_size_next);
-
-	while (true) {
-		size += PAGE;
-		assert(size <= small_maxrun);
-		if (small_run_tab[size >> LG_PAGE]) {
-			if (large_run_size_next < size)
-				return (large_run_size_next);
-			return (size);
-		}
-	}
-}
-
-static size_t
-run_quantize_ceil_compute(size_t size)
-{
-	size_t qsize = run_quantize_floor_compute(size);
-
-	if (qsize < size) {
-		/*
-		 * Skip a quantization that may have an adequately large run,
-		 * because under-sized runs may be mixed in.  This only happens
-		 * when an unusual size is requested, i.e. for aligned
-		 * allocation, and is just one of several places where linear
-		 * search would potentially find sufficiently aligned available
-		 * memory somewhere lower.
-		 */
-		qsize = run_quantize_ceil_compute_hard(qsize);
-	}
-	return (qsize);
-}
-
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
 #define	run_quantize_floor JEMALLOC_N(n_run_quantize_floor)
@ -162,13 +80,27 @@ static size_t
 run_quantize_floor(size_t size)
 {
 	size_t ret;
+	pszind_t pind;

 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);

-	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_floor_compute(size));
+	assert(size != 0);
+	assert(size == PAGE_CEILING(size));
+
+	pind = psz2ind(size - large_pad + 1);
+	if (pind == 0) {
+		/*
+		 * Avoid underflow.  This short-circuit would also do the right
+		 * thing for all sizes in the range for which there are
+		 * PAGE-spaced size classes, but it's simplest to just handle
+		 * the one case that would cause erroneous results.
+		 */
+		return (size);
+	}
+	ret = pind2sz(pind - 1) + large_pad;
+	assert(ret <= size);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@ -187,11 +119,21 @@ run_quantize_ceil(size_t size)
 	size_t ret;

 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);

-	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_ceil_compute(size));
+	ret = run_quantize_floor(size);
+	if (ret < size) {
+		/*
+		 * Skip a quantization that may have an adequately large run,
+		 * because under-sized runs may be mixed in.  This only happens
+		 * when an unusual size is requested, i.e. for aligned
+		 * allocation, and is just one of several places where linear
+		 * search would potentially find sufficiently aligned available
+		 * memory somewhere lower.
+		 */
+		ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad;
+	}
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@ -200,25 +142,17 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif

-static arena_run_heap_t *
-arena_runs_avail_get(arena_t *arena, szind_t ind)
-{
-
-	assert(ind >= runs_avail_bias);
-	assert(ind - runs_avail_bias < runs_avail_nclasses);
-
-	return (&arena->runs_avail[ind - runs_avail_bias]);
-}
-
 static void
 arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
    size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_insert(arena_runs_avail_get(arena, ind),
+	assert((npages << LG_PAGE) < chunksize);
+	assert(pind2sz(pind) <= chunksize);
+	arena_run_heap_insert(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }

@ -226,11 +160,13 @@ static void
 arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
    size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_remove(arena_runs_avail_get(arena, ind),
+	assert((npages << LG_PAGE) < chunksize);
+	assert(pind2sz(pind) <= chunksize);
+	arena_run_heap_remove(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }

@ -649,14 +585,13 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;

 	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
-	    chunksize, zero, true);
+	    chunksize, zero, commit, true);
 	if (chunk != NULL) {
 		if (arena_chunk_register(tsdn, arena, chunk, *zero)) {
 			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
 			    chunksize, true);
 			return (NULL);
 		}
-		*commit = true;
 	}
 	if (chunk == NULL) {
 		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
@ -953,6 +888,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
+	bool commit = true;

 	malloc_mutex_lock(tsdn, &arena->lock);

@ -964,7 +900,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	arena_nactive_add(arena, usize >> LG_PAGE);

 	ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize,
-	    alignment, zero, true);
+	    alignment, zero, &commit, true);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (ret == NULL) {
 		ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
@ -1074,6 +1010,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
+	bool commit = true;

 	malloc_mutex_lock(tsdn, &arena->lock);

@ -1085,7 +1022,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 	arena_nactive_add(arena, udiff >> LG_PAGE);

 	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, true) == NULL);
+	    chunksize, zero, &commit, true) == NULL);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (err) {
 		err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena,
@ -1109,12 +1046,13 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 static arena_run_t *
 arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	szind_t ind, i;
+	pszind_t pind, i;

-	ind = size2index(run_quantize_ceil(size));
-	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
+	pind = psz2ind(run_quantize_ceil(size));
+
+	for (i = pind; pind2sz(i) <= chunksize; i++) {
 		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
-		    arena_runs_avail_get(arena, i));
+		    &arena->runs_avail[i]);
 		if (miscelm != NULL)
 			return (&miscelm->run);
 	}
@ -1125,7 +1063,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 static arena_run_t *
 arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 {
-	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
+	arena_run_t *run = arena_run_first_best_fit(arena, size);
 	if (run != NULL) {
 		if (arena_run_split_large(arena, run, size, zero))
 			run = NULL;
@ -1256,14 +1194,14 @@ arena_decay_deadline_init(arena_t *arena)
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
 	 */
-	nstime_copy(&arena->decay_deadline, &arena->decay_epoch);
-	nstime_add(&arena->decay_deadline, &arena->decay_interval);
-	if (arena->decay_time > 0) {
+	nstime_copy(&arena->decay.deadline, &arena->decay.epoch);
+	nstime_add(&arena->decay.deadline, &arena->decay.interval);
+	if (arena->decay.time > 0) {
 		nstime_t jitter;

-		nstime_init(&jitter, prng_range(&arena->decay_jitter_state,
-		    nstime_ns(&arena->decay_interval)));
-		nstime_add(&arena->decay_deadline, &jitter);
+		nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
+		    nstime_ns(&arena->decay.interval)));
+		nstime_add(&arena->decay.deadline, &jitter);
 	}
 }

@ -1273,7 +1211,7 @@ arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)

 	assert(opt_purge == purge_mode_decay);

-	return (nstime_compare(&arena->decay_deadline, time) <= 0);
+	return (nstime_compare(&arena->decay.deadline, time) <= 0);
 }

 static size_t
@ -1298,92 +1236,103 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 	 */
 	sum = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
-		sum += arena->decay_backlog[i] * h_steps[i];
+		sum += arena->decay.backlog[i] * h_steps[i];
 	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);

 	return (npages_limit_backlog);
 }

 static void
-arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
+arena_decay_backlog_update_last(arena_t *arena)
 {
-	uint64_t nadvance_u64;
-	nstime_t delta;
-	size_t ndirty_delta;
+	size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ?
+	    arena->ndirty - arena->decay.ndirty : 0;
+	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+}

-	assert(opt_purge == purge_mode_decay);
-	assert(arena_decay_deadline_reached(arena, time));
+static void
+arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64)
+{

-	nstime_copy(&delta, time);
-	nstime_subtract(&delta, &arena->decay_epoch);
-	nadvance_u64 = nstime_divide(&delta, &arena->decay_interval);
-	assert(nadvance_u64 > 0);
-
-	/* Add nadvance_u64 decay intervals to epoch. */
-	nstime_copy(&delta, &arena->decay_interval);
-	nstime_imultiply(&delta, nadvance_u64);
-	nstime_add(&arena->decay_epoch, &delta);
-
-	/* Set a new deadline. */
-	arena_decay_deadline_init(arena);
-
-	/* Update the backlog. */
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
 	} else {
 		size_t nadvance_z = (size_t)nadvance_u64;

 		assert((uint64_t)nadvance_z == nadvance_u64);

-		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance_z],
+		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
 		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
 		if (nadvance_z > 1) {
-			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
+			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
 			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
 		}
 	}
-	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -
-	    arena->decay_ndirty : 0;
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
-	arena->decay_backlog_npages_limit =
-	    arena_decay_backlog_npages_limit(arena);
+
+	arena_decay_backlog_update_last(arena);
 }

-static size_t
-arena_decay_npages_limit(arena_t *arena)
+static void
+arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time)
 {
-	size_t npages_limit;
+	uint64_t nadvance_u64;
+	nstime_t delta;

 	assert(opt_purge == purge_mode_decay);
+	assert(arena_decay_deadline_reached(arena, time));

-	npages_limit = arena->decay_backlog_npages_limit;
+	nstime_copy(&delta, time);
+	nstime_subtract(&delta, &arena->decay.epoch);
+	nadvance_u64 = nstime_divide(&delta, &arena->decay.interval);
+	assert(nadvance_u64 > 0);

-	/* Add in any dirty pages created during the current epoch. */
-	if (arena->ndirty > arena->decay_ndirty)
-		npages_limit += arena->ndirty - arena->decay_ndirty;
+	/* Add nadvance_u64 decay intervals to epoch. */
+	nstime_copy(&delta, &arena->decay.interval);
+	nstime_imultiply(&delta, nadvance_u64);
+	nstime_add(&arena->decay.epoch, &delta);

-	return (npages_limit);
+	/* Set a new deadline. */
+	arena_decay_deadline_init(arena);
+
+	/* Update the backlog. */
+	arena_decay_backlog_update(arena, nadvance_u64);
+}
+
+static void
+arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena)
+{
+	size_t ndirty_limit = arena_decay_backlog_npages_limit(arena);
+
+	if (arena->ndirty > ndirty_limit)
+		arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	arena->decay.ndirty = arena->ndirty;
+}
+
+static void
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time)
+{
+
+	arena_decay_epoch_advance_helper(arena, time);
+	arena_decay_epoch_advance_purge(tsdn, arena);
 }

 static void
 arena_decay_init(arena_t *arena, ssize_t decay_time)
 {

-	arena->decay_time = decay_time;
+	arena->decay.time = decay_time;
 	if (decay_time > 0) {
-		nstime_init2(&arena->decay_interval, decay_time, 0);
-		nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS);
+		nstime_init2(&arena->decay.interval, decay_time, 0);
+		nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS);
 	}

-	nstime_init(&arena->decay_epoch, 0);
-	nstime_update(&arena->decay_epoch);
-	arena->decay_jitter_state = (uint64_t)(uintptr_t)arena;
+	nstime_init(&arena->decay.epoch, 0);
+	nstime_update(&arena->decay.epoch);
+	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog_npages_limit = 0;
-	memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+	arena->decay.ndirty = arena->ndirty;
+	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }

 static bool
@ -1403,7 +1352,7 @@ arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 	ssize_t decay_time;

 	malloc_mutex_lock(tsdn, &arena->lock);
-	decay_time = arena->decay_time;
+	decay_time = arena->decay.time;
 	malloc_mutex_unlock(tsdn, &arena->lock);

 	return (decay_time);
@ -1464,35 +1413,44 @@ static void
 arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 {
 	nstime_t time;
-	size_t ndirty_limit;

 	assert(opt_purge == purge_mode_decay);

 	/* Purge all or nothing if the option is disabled. */
-	if (arena->decay_time <= 0) {
-		if (arena->decay_time == 0)
+	if (arena->decay.time <= 0) {
+		if (arena->decay.time == 0)
 			arena_purge_to_limit(tsdn, arena, 0);
 		return;
 	}

-	nstime_copy(&time, &arena->decay_epoch);
-	if (unlikely(nstime_update(&time))) {
-		/* Time went backwards.  Force an epoch advance. */
-		nstime_copy(&time, &arena->decay_deadline);
+	nstime_init(&time, 0);
+	nstime_update(&time);
+	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
+	    &time) > 0)) {
+		/*
+		 * Time went backwards.  Move the epoch back in time and
+		 * generate a new deadline, with the expectation that time
+		 * typically flows forward for long enough periods of time that
+		 * epochs complete.  Unfortunately, this strategy is susceptible
+		 * to clock jitter triggering premature epoch advances, but
+		 * clock jitter estimation and compensation isn't feasible here
+		 * because calls into this code are event-driven.
+		 */
+		nstime_copy(&arena->decay.epoch, &time);
+		arena_decay_deadline_init(arena);
+	} else {
+		/* Verify that time does not go backwards. */
+		assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
 	}

-	if (arena_decay_deadline_reached(arena, &time))
-		arena_decay_epoch_advance(arena, &time);
-
-	ndirty_limit = arena_decay_npages_limit(arena);
-
 	/*
-	 * Don't try to purge unless the number of purgeable pages exceeds the
-	 * current limit.
+	 * If the deadline has been reached, advance to the current epoch and
+	 * purge to the new limit if necessary.  Note that dirty pages created
+	 * during the current epoch are not subject to purge until a future
+	 * epoch, so as a result purging only happens during epoch advances.
 	 */
-	if (arena->ndirty <= ndirty_limit)
-		return;
-	arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	if (arena_decay_deadline_reached(arena, &time))
+		arena_decay_epoch_advance(tsdn, arena, &time);
 }

 void
@ -1561,7 +1519,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,

 		if (rdelm == &chunkselm->rd) {
 			extent_node_t *chunkselm_next;
-			bool zero;
+			bool zero, commit;
 			UNUSED void *chunk;

 			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
@ -1575,10 +1533,11 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
+			commit = false;
 			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
-			    false);
+			    &commit, false);
 			assert(chunk == extent_node_addr_get(chunkselm));
 			assert(zero == extent_node_zeroed_get(chunkselm));
 			extent_node_dirty_insert(chunkselm, purge_runs_sentinel,
@ -1967,7 +1926,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	assert(!arena->purging);
 	arena->nactive = 0;

-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < NPSIZES; i++)
 		arena_run_heap_new(&arena->runs_avail[i]);

 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
@ -2606,7 +2565,8 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 		 * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
 		 * for 4 KiB pages and 64-byte cachelines.
 		 */
-		r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE);
+		r = prng_lg_range_zu(&arena->offset_state, LG_PAGE -
+		    LG_CACHELINE, false);
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
@ -3391,7 +3351,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
-	*decay_time = arena->decay_time;
+	*decay_time = arena->decay.time;
 	*nactive += arena->nactive;
 	*ndirty += arena->ndirty;
 }
@ -3496,23 +3456,19 @@ arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
-	size_t arena_size;
 	unsigned i;

-	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
-	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
 		arena = (arena_t *)base_alloc(tsdn,
-		    CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses *
-		    sizeof(malloc_large_stats_t) + nhclasses) *
-		    sizeof(malloc_huge_stats_t));
+		    CACHELINE_CEILING(sizeof(arena_t)) +
+		    QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)))
+		    + (nhclasses * sizeof(malloc_huge_stats_t)));
 	} else
-		arena = (arena_t *)base_alloc(tsdn, arena_size);
+		arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
 	if (arena == NULL)
 		return (NULL);

@ -3524,11 +3480,11 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	if (config_stats) {
 		memset(&arena->stats, 0, sizeof(arena_stats_t));
 		arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size));
+		    + CACHELINE_CEILING(sizeof(arena_t)));
 		memset(arena->stats.lstats, 0, nlclasses *
 		    sizeof(malloc_large_stats_t));
 		arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size) +
+		    + CACHELINE_CEILING(sizeof(arena_t)) +
 		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
 		memset(arena->stats.hstats, 0, nhclasses *
 		    sizeof(malloc_huge_stats_t));
@ -3548,10 +3504,10 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		 * deterministic seed.
 		 */
 		arena->offset_state = config_debug ? ind :
-		    (uint64_t)(uintptr_t)arena;
+		    (size_t)(uintptr_t)arena;
 	}

-	arena->dss_prec = chunk_dss_prec_get(tsdn);
+	arena->dss_prec = chunk_dss_prec_get();

 	ql_new(&arena->achunks);

@ -3562,8 +3518,9 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	arena->nactive = 0;
 	arena->ndirty = 0;

-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < NPSIZES; i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
+
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);

@ -3693,9 +3650,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 	bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs *
 	    bin_info->reg_interval) - pad_size + bin_info->redzone_size);

-	if (actual_run_size > small_maxrun)
-		small_maxrun = actual_run_size;
-
 	assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
 	    * bin_info->reg_interval) + pad_size == bin_info->run_size);
 }
@ -3711,7 +3665,7 @@ bin_info_init(void)
 	bin_info_run_size_calc(bin_info);				\
 	bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 #define	BIN_INFO_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	BIN_INFO_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
 #undef BIN_INFO_INIT_bin_yes
@ -3719,62 +3673,7 @@ bin_info_init(void)
 #undef SC
 }

-static bool
-small_run_size_init(void)
-{
-
-	assert(small_maxrun != 0);
-
-	small_run_tab = (bool *)base_alloc(NULL, sizeof(bool) * (small_maxrun >>
-	    LG_PAGE));
-	if (small_run_tab == NULL)
-		return (true);
-
-#define	TAB_INIT_bin_yes(index, size) {					\
-		arena_bin_info_t *bin_info = &arena_bin_info[index];	\
-		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
-	}
-#define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
-	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
-	SIZE_CLASSES
-#undef TAB_INIT_bin_yes
-#undef TAB_INIT_bin_no
-#undef SC
-
-	return (false);
-}
-
-static bool
-run_quantize_init(void)
-{
-	unsigned i;
-
-	run_quantize_max = chunksize + large_pad;
-
-	run_quantize_floor_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_floor_tab == NULL)
-		return (true);
-
-	run_quantize_ceil_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_ceil_tab == NULL)
-		return (true);
-
-	for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) {
-		size_t run_size = i << LG_PAGE;
-
-		run_quantize_floor_tab[i-1] =
-		    run_quantize_floor_compute(run_size);
-		run_quantize_ceil_tab[i-1] =
-		    run_quantize_ceil_compute(run_size);
-	}
-
-	return (false);
-}
-
-bool
+void
 arena_boot(void)
 {
 	unsigned i;
@ -3822,15 +3721,6 @@ arena_boot(void)
 	nhclasses = NSIZES - nlclasses - NBINS;

 	bin_info_init();
-	if (small_run_size_init())
-		return (true);
-	if (run_quantize_init())
-		return (true);
-
-	runs_avail_bias = size2index(PAGE);
-	runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
-
-	return (false);
 }

 void
--- a/contrib/jemalloc/src/chunk.c
+++ b/contrib/jemalloc/src/chunk.c
@ -316,10 +316,11 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			size_t i;
 			size_t *p = (size_t *)(uintptr_t)ret;

-			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
 			for (i = 0; i < size / sizeof(size_t); i++)
 				assert(p[i] == 0);
 		}
+		if (config_valgrind)
+			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
 	}
 	return (ret);
 }
@ -384,23 +385,21 @@ chunk_alloc_base(size_t size)

 void *
 chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node)
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
+    bool dalloc_node)
 {
 	void *ret;
-	bool commit;

 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);

-	commit = true;
 	ret = chunk_recycle(tsdn, arena, chunk_hooks,
 	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
-	    new_addr, size, alignment, zero, &commit, dalloc_node);
+	    new_addr, size, alignment, zero, commit, dalloc_node);
 	if (ret == NULL)
 		return (NULL);
-	assert(commit);
 	if (config_valgrind)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 	return (ret);
@ -610,10 +609,10 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }

 static bool
-chunk_dalloc_default_impl(tsdn_t *tsdn, void *chunk, size_t size)
+chunk_dalloc_default_impl(void *chunk, size_t size)
 {

-	if (!have_dss || !chunk_in_dss(tsdn, chunk))
+	if (!have_dss || !chunk_in_dss(chunk))
 		return (chunk_dalloc_mmap(chunk, size));
 	return (true);
 }
@ -622,11 +621,8 @@ static bool
 chunk_dalloc_default(void *chunk, size_t size, bool committed,
    unsigned arena_ind)
 {
-	tsdn_t *tsdn;

-	tsdn = tsdn_fetch();
-
-	return (chunk_dalloc_default_impl(tsdn, chunk, size));
+	return (chunk_dalloc_default_impl(chunk, size));
 }

 void
@ -644,7 +640,7 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Try to deallocate. */
 	if (chunk_hooks->dalloc == chunk_dalloc_default) {
 		/* Call directly to propagate tsdn. */
-		err = chunk_dalloc_default_impl(tsdn, chunk, size);
+		err = chunk_dalloc_default_impl(chunk, size);
 	} else
 		err = chunk_hooks->dalloc(chunk, size, committed, arena->ind);

@ -717,13 +713,12 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
 }

 static bool
-chunk_merge_default_impl(tsdn_t *tsdn, void *chunk_a, void *chunk_b)
+chunk_merge_default_impl(void *chunk_a, void *chunk_b)
 {

 	if (!maps_coalesce)
 		return (true);
-	if (have_dss && chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn,
-	    chunk_b))
+	if (have_dss && !chunk_dss_mergeable(chunk_a, chunk_b))
 		return (true);

 	return (false);
@ -733,11 +728,8 @@ static bool
 chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
    bool committed, unsigned arena_ind)
 {
-	tsdn_t *tsdn;

-	tsdn = tsdn_fetch();
-
-	return (chunk_merge_default_impl(tsdn, chunk_a, chunk_b));
+	return (chunk_merge_default_impl(chunk_a, chunk_b));
 }

 static rtree_node_elm_t *
@ -781,32 +773,11 @@ chunk_boot(void)
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> LG_PAGE);

-	if (have_dss && chunk_dss_boot())
-		return (true);
+	if (have_dss)
+		chunk_dss_boot();
 	if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    opt_lg_chunk), chunks_rtree_node_alloc, NULL))
 		return (true);

 	return (false);
 }
-
-void
-chunk_prefork(tsdn_t *tsdn)
-{
-
-	chunk_dss_prefork(tsdn);
-}
-
-void
-chunk_postfork_parent(tsdn_t *tsdn)
-{
-
-	chunk_dss_postfork_parent(tsdn);
-}
-
-void
-chunk_postfork_child(tsdn_t *tsdn)
-{
-
-	chunk_dss_postfork_child(tsdn);
-}
--- a/contrib/jemalloc/src/chunk_dss.c
+++ b/contrib/jemalloc/src/chunk_dss.c
@ -10,20 +10,19 @@ const char	*dss_prec_names[] = {
 	"N/A"
 };

-/* Current dss precedence default, used when creating new arenas. */
-static dss_prec_t	dss_prec_default = DSS_PREC_DEFAULT;
-
 /*
- * Protects sbrk() calls.  This avoids malloc races among threads, though it
- * does not protect against races with threads that call sbrk() directly.
+ * Current dss precedence default, used when creating new arenas.  NB: This is
+ * stored as unsigned rather than dss_prec_t because in principle there's no
+ * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
+ * atomic operations to synchronize the setting.
 */
-static malloc_mutex_t	dss_mtx;
+static unsigned		dss_prec_default = (unsigned)DSS_PREC_DEFAULT;

 /* Base address of the DSS. */
 static void		*dss_base;
-/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
-static void		*dss_prev;
-/* Current upper limit on DSS addresses. */
+/* Atomic boolean indicating whether the DSS is exhausted. */
+static unsigned		dss_exhausted;
+/* Atomic current upper limit on DSS addresses. */
 static void		*dss_max;

 /******************************************************************************/
@ -41,30 +40,59 @@ chunk_dss_sbrk(intptr_t increment)
 }

 dss_prec_t
-chunk_dss_prec_get(tsdn_t *tsdn)
+chunk_dss_prec_get(void)
 {
 	dss_prec_t ret;

 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	ret = dss_prec_default;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
+	ret = (dss_prec_t)atomic_read_u(&dss_prec_default);
 	return (ret);
 }

 bool
-chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
+chunk_dss_prec_set(dss_prec_t dss_prec)
 {

 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
+	atomic_write_u(&dss_prec_default, (unsigned)dss_prec);
 	return (false);
 }

+static void *
+chunk_dss_max_update(void *new_addr)
+{
+	void *max_cur;
+	spin_t spinner;
+
+	/*
+	 * Get the current end of the DSS as max_cur and assure that dss_max is
+	 * up to date.
+	 */
+	spin_init(&spinner);
+	while (true) {
+		void *max_prev = atomic_read_p(&dss_max);
+
+		max_cur = chunk_dss_sbrk(0);
+		if ((uintptr_t)max_prev > (uintptr_t)max_cur) {
+			/*
+			 * Another thread optimistically updated dss_max.  Wait
+			 * for it to finish.
+			 */
+			spin_adaptive(&spinner);
+			continue;
+		}
+		if (!atomic_cas_p(&dss_max, max_prev, max_cur))
+			break;
+	}
+	/* Fixed new_addr can only be supported if it is at the edge of DSS. */
+	if (new_addr != NULL && max_cur != new_addr)
+		return (NULL);
+
+	return (max_cur);
+}
+
 void *
 chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
    size_t alignment, bool *zero, bool *commit)
@ -80,28 +108,20 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	if ((intptr_t)size < 0)
 		return (NULL);

-	malloc_mutex_lock(tsdn, &dss_mtx);
-	if (dss_prev != (void *)-1) {
-
+	if (!atomic_read_u(&dss_exhausted)) {
 		/*
 		 * The loop is necessary to recover from races with other
 		 * threads that are using the DSS for something other than
 		 * malloc.
 		 */
-		do {
-			void *ret, *cpad, *dss_next;
+		while (true) {
+			void *ret, *cpad, *max_cur, *dss_next, *dss_prev;
 			size_t gap_size, cpad_size;
 			intptr_t incr;
-			/* Avoid an unnecessary system call. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;

-			/* Get the current end of the DSS. */
-			dss_max = chunk_dss_sbrk(0);
-
-			/* Make sure the earlier condition still holds. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;
+			max_cur = chunk_dss_max_update(new_addr);
+			if (max_cur == NULL)
+				goto label_oom;

 			/*
 			 * Calculate how much padding is necessary to
@ -120,17 +140,23 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
-			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
-				/* Wrap-around. */
-				malloc_mutex_unlock(tsdn, &dss_mtx);
-				return (NULL);
-			}
+			    (uintptr_t)dss_next < (uintptr_t)dss_max)
+				goto label_oom; /* Wrap-around. */
 			incr = gap_size + cpad_size + size;
+
+			/*
+			 * Optimistically update dss_max, and roll back below if
+			 * sbrk() fails.  No other thread will try to extend the
+			 * DSS while dss_max is greater than the current DSS
+			 * max reported by sbrk(0).
+			 */
+			if (atomic_cas_p(&dss_max, max_cur, dss_next))
+				continue;
+
+			/* Try to allocate. */
 			dss_prev = chunk_dss_sbrk(incr);
-			if (dss_prev == dss_max) {
+			if (dss_prev == max_cur) {
 				/* Success. */
-				dss_max = dss_next;
-				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (cpad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
@ -147,68 +173,65 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				return (ret);
 			}
-		} while (dss_prev != (void *)-1);
-	}
-	malloc_mutex_unlock(tsdn, &dss_mtx);

+			/*
+			 * Failure, whether due to OOM or a race with a raw
+			 * sbrk() call from outside the allocator.  Try to roll
+			 * back optimistic dss_max update; if rollback fails,
+			 * it's due to another caller of this function having
+			 * succeeded since this invocation started, in which
+			 * case rollback is not necessary.
+			 */
+			atomic_cas_p(&dss_max, dss_next, max_cur);
+			if (dss_prev == (void *)-1) {
+				/* OOM. */
+				atomic_write_u(&dss_exhausted, (unsigned)true);
+				goto label_oom;
+			}
+		}
+	}
+label_oom:
 	return (NULL);
 }

-bool
-chunk_in_dss(tsdn_t *tsdn, void *chunk)
+static bool
+chunk_in_dss_helper(void *chunk, void *max)
 {
-	bool ret;

-	cassert(have_dss);
-
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	if ((uintptr_t)chunk >= (uintptr_t)dss_base
-	    && (uintptr_t)chunk < (uintptr_t)dss_max)
-		ret = true;
-	else
-		ret = false;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
-
-	return (ret);
+	return ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk <
+	    (uintptr_t)max);
 }

 bool
+chunk_in_dss(void *chunk)
+{
+
+	cassert(have_dss);
+
+	return (chunk_in_dss_helper(chunk, atomic_read_p(&dss_max)));
+}
+
+bool
+chunk_dss_mergeable(void *chunk_a, void *chunk_b)
+{
+	void *max;
+
+	cassert(have_dss);
+
+	max = atomic_read_p(&dss_max);
+	return (chunk_in_dss_helper(chunk_a, max) ==
+	    chunk_in_dss_helper(chunk_b, max));
+}
+
+void
 chunk_dss_boot(void)
 {

 	cassert(have_dss);

-	if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS))
-		return (true);
 	dss_base = chunk_dss_sbrk(0);
-	dss_prev = dss_base;
+	dss_exhausted = (unsigned)(dss_base == (void *)-1);
 	dss_max = dss_base;
-
-	return (false);
-}
-
-void
-chunk_dss_prefork(tsdn_t *tsdn)
-{
-
-	if (have_dss)
-		malloc_mutex_prefork(tsdn, &dss_mtx);
-}
-
-void
-chunk_dss_postfork_parent(tsdn_t *tsdn)
-{
-
-	if (have_dss)
-		malloc_mutex_postfork_parent(tsdn, &dss_mtx);
-}
-
-void
-chunk_dss_postfork_child(tsdn_t *tsdn)
-{
-
-	if (have_dss)
-		malloc_mutex_postfork_child(tsdn, &dss_mtx);
 }

 /******************************************************************************/
--- a/contrib/jemalloc/src/ckh.c
+++ b/contrib/jemalloc/src/ckh.c
@ -40,8 +40,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */

-static bool	ckh_grow(tsdn_t *tsdn, ckh_t *ckh);
-static void	ckh_shrink(tsdn_t *tsdn, ckh_t *ckh);
+static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);

 /******************************************************************************/

@ -99,7 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
+	offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+	    LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@ -141,7 +142,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		i = (unsigned)prng_lg_range(&ckh->prng_state,
+		i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
 		    LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
@ -244,7 +245,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 }

 static bool
-ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
+ckh_grow(tsd_t *tsd, ckh_t *ckh)
 {
 	bool ret;
 	ckhc_t *tab, *ttab;
@ -270,8 +271,8 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 			ret = true;
 			goto label_return;
 		}
-		tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
-		    true, arena_ichoose(tsdn, NULL));
+		tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE,
+		    true, NULL, true, arena_ichoose(tsd, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@ -283,12 +284,12 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;

 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsdn, tab, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 			break;
 		}

 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsdn, ckh->tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@ -299,7 +300,7 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 }

 static void
-ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
+ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
 	size_t usize;
@ -314,8 +315,8 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
-	tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true,
-	    arena_ichoose(tsdn, NULL));
+	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
+	    true, arena_ichoose(tsd, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@ -330,7 +331,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;

 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsdn, tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@ -338,7 +339,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	}

 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsdn, ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@ -347,7 +348,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 }

 bool
-ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
    ckh_keycomp_t *keycomp)
 {
 	bool ret;
@ -391,8 +392,8 @@ ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		ret = true;
 		goto label_return;
 	}
-	ckh->tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
-	    true, arena_ichoose(tsdn, NULL));
+	ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true,
+	    NULL, true, arena_ichoose(tsd, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@ -404,7 +405,7 @@ ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 }

 void
-ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
+ckh_delete(tsd_t *tsd, ckh_t *ckh)
 {

 	assert(ckh != NULL);
@ -421,7 +422,7 @@ ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif

-	idalloctm(tsdn, ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
@ -456,7 +457,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
 }

 bool
-ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
+ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
 {
 	bool ret;

@ -468,7 +469,7 @@ ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
 #endif

 	while (ckh_try_insert(ckh, &key, &data)) {
-		if (ckh_grow(tsdn, ckh)) {
+		if (ckh_grow(tsd, ckh)) {
 			ret = true;
 			goto label_return;
 		}
@ -480,7 +481,7 @@ ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
 }

 bool
-ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
+ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
    void **data)
 {
 	size_t cell;
@ -502,7 +503,7 @@ ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
 		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
 		    > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
-			ckh_shrink(tsdn, ckh);
+			ckh_shrink(tsd, ckh);
 		}

 		return (false);
--- a/contrib/jemalloc/src/ctl.c
+++ b/contrib/jemalloc/src/ctl.c
@ -1478,7 +1478,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,

 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (tcaches_create(tsd_tsdn(tsd), &tcache_ind)) {
+	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@ -1685,11 +1685,11 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) {
+		    chunk_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd));
+		dss_prec_old = chunk_dss_prec_get();
 	}

 	dss = dss_prec_names[dss_prec_old];
@ -2100,7 +2100,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;

-	prof_reset(tsd_tsdn(tsd), lg_sample);
+	prof_reset(tsd, lg_sample);

 	ret = 0;
 label_return:
--- a/contrib/jemalloc/src/huge.c
+++ b/contrib/jemalloc/src/huge.c
@ -54,6 +54,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 {
 	void *ret;
 	size_t ausize;
+	arena_t *iarena;
 	extent_node_t *node;
 	bool is_zeroed;

@ -67,8 +68,9 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(ausize >= chunksize);

 	/* Allocate an extent node with which to track the chunk. */
+	iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) : a0get();
 	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
+	    CACHELINE, false, NULL, true, iarena);
 	if (node == NULL)
 		return (NULL);

@ -114,7 +116,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
 static void
-huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
+huge_dalloc_junk(void *ptr, size_t usize)
 {

 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@ -122,7 +124,7 @@ huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 		 * Only bother junk filling if the chunk isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr)))
+		if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
@ -221,7 +223,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize),
+			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			post_zeroed = false;
 		} else {
@ -402,7 +404,7 @@ huge_dalloc(tsdn_t *tsdn, void *ptr)
 	ql_remove(&arena->huge, node, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);

-	huge_dalloc_junk(tsdn, extent_node_addr_get(node),
+	huge_dalloc_junk(extent_node_addr_get(node),
 	    extent_node_size_get(node));
 	arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
--- a/contrib/jemalloc/src/jemalloc.c
+++ b/contrib/jemalloc/src/jemalloc.c
@ -9,7 +9,11 @@ const char	*__malloc_options_1_0 = NULL;
 __sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0);

 /* Runtime configuration options. */
-const char	*je_malloc_conf JEMALLOC_ATTR(weak);
+const char	*je_malloc_conf
+#ifndef _WIN32
+    JEMALLOC_ATTR(weak)
+#endif
+    ;
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
    true
@ -89,14 +93,25 @@ enum {
 };
 static uint8_t	malloc_slow_flags;

-/* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t	index2size_tab[NSIZES+1] = {
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+const size_t	pind2sz_tab[NPSIZES] = {
+#define	PSZ_yes(lg_grp, ndelta, lg_delta)				\
+	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
+#define	PSZ_no(lg_grp, ndelta, lg_delta)
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
+	PSZ_##psz(lg_grp, ndelta, lg_delta)
+	SIZE_CLASSES
+#undef PSZ_yes
+#undef PSZ_no
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t	index2size_tab[NSIZES] = {
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
-	ZU(0)
 };

 JEMALLOC_ALIGNED(CACHELINE)
@ -165,7 +180,7 @@ const uint8_t	size2index_tab[] = {
 #define	S2B_11(i)	S2B_10(i) S2B_10(i)
 #endif
 #define	S2B_no(i)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	S2B_##lg_delta_lookup(index)
 	SIZE_CLASSES
 #undef S2B_3
@ -333,6 +348,13 @@ a0idalloc(void *ptr, bool is_metadata)
 	idalloctm(TSDN_NULL, ptr, false, is_metadata, true);
 }

+arena_t *
+a0get(void)
+{
+
+	return (a0);
+}
+
 void *
 a0malloc(size_t size)
 {
@ -459,15 +481,16 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;

+	if (!tsd_nominal(tsd))
+		return;
+
 	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_inc(arena, internal);

-	if (tsd_nominal(tsd)) {
-		if (internal)
-			tsd_iarena_set(tsd, arena);
-		else
-			tsd_arena_set(tsd, arena);
-	}
+	if (internal)
+		tsd_iarena_set(tsd, arena);
+	else
+		tsd_arena_set(tsd, arena);
 }

 void
@ -793,6 +816,20 @@ malloc_ncpus(void)
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
+#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
+	/*
+	 * glibc >= 2.6 has the CPU_COUNT macro.
+	 *
+	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
+	 * *before* setting up the isspace tables.  Therefore we need a
+	 * different method to get the number of CPUs.
+	 */
+	{
+		cpu_set_t set;
+
+		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+		result = CPU_COUNT(&set);
+	}
 #else
 	result = sysconf(_SC_NPROCESSORS_ONLN);
 #endif
@ -1107,8 +1144,7 @@ malloc_conf_init(void)
 				for (i = 0; i < dss_prec_limit; i++) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
-						if (chunk_dss_prec_set(NULL,
-						   i)) {
+						if (chunk_dss_prec_set(i)) {
 							malloc_conf_error(
 							    "Error setting dss",
 							    k, klen, v, vlen);
@ -1153,9 +1189,20 @@ malloc_conf_init(void)
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
 					if (CONF_MATCH_VALUE("true")) {
-						opt_junk = "true";
-						opt_junk_alloc = opt_junk_free =
-						    true;
+						if (config_valgrind &&
+						    unlikely(in_valgrind)) {
+							malloc_conf_error(
+							"Deallocation-time "
+							"junk filling cannot "
+							"be enabled while "
+							"running inside "
+							"Valgrind", k, klen, v,
+							vlen);
+						} else {
+							opt_junk = "true";
+							opt_junk_alloc = true;
+							opt_junk_free = true;
+						}
 					} else if (CONF_MATCH_VALUE("false")) {
 						opt_junk = "false";
 						opt_junk_alloc = opt_junk_free =
@ -1165,9 +1212,20 @@ malloc_conf_init(void)
 						opt_junk_alloc = true;
 						opt_junk_free = false;
 					} else if (CONF_MATCH_VALUE("free")) {
-						opt_junk = "free";
-						opt_junk_alloc = false;
-						opt_junk_free = true;
+						if (config_valgrind &&
+						    unlikely(in_valgrind)) {
+							malloc_conf_error(
+							"Deallocation-time "
+							"junk filling cannot "
+							"be enabled while "
+							"running inside "
+							"Valgrind", k, klen, v,
+							vlen);
+						} else {
+							opt_junk = "free";
+							opt_junk_alloc = false;
+							opt_junk_free = true;
+						}
 					} else {
 						malloc_conf_error(
 						    "Invalid conf value", k,
@ -1253,11 +1311,14 @@ malloc_init_hard_needed(void)
 	}
 #ifdef JEMALLOC_THREADED_INIT
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
+		spin_t spinner;
+
 		/* Busy-wait until the initializing thread completes. */
+		spin_init(&spinner);
 		do {
-			malloc_mutex_unlock(NULL, &init_lock);
-			CPU_SPINWAIT;
-			malloc_mutex_lock(NULL, &init_lock);
+			malloc_mutex_unlock(TSDN_NULL, &init_lock);
+			spin_adaptive(&spinner);
+			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
 	}
@ -1291,8 +1352,7 @@ malloc_init_hard_a0_locked()
 		return (true);
 	if (config_prof)
 		prof_boot1();
-	if (arena_boot())
-		return (true);
+	arena_boot();
 	if (config_tcache && tcache_boot(TSDN_NULL))
 		return (true);
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
@ -1423,7 +1483,7 @@ malloc_init_hard(void)
 		return (true);
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);

-	if (config_prof && prof_boot2(tsd_tsdn(tsd))) {
+	if (config_prof && prof_boot2(tsd)) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
@ -1998,6 +2058,29 @@ JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
 JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
    je_memalign;
 # endif
+
+#ifdef CPU_COUNT
+/*
+ * To enable static linking with glibc, the libc specific malloc interface must
+ * be implemented also, so none of glibc's malloc.o functions are added to the
+ * link.
+ */
+#define	ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+/* To force macro expansion of je_ prefix before stringification. */
+#define	PREALIAS(je_fn)  ALIAS(je_fn)
+void	*__libc_malloc(size_t size) PREALIAS(je_malloc);
+void	__libc_free(void* ptr) PREALIAS(je_free);
+void	*__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
+void	*__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
+void	*__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
+void	*__libc_valloc(size_t size) PREALIAS(je_valloc);
+int	__posix_memalign(void** r, size_t a, size_t s)
+    PREALIAS(je_posix_memalign);
+#undef PREALIAS
+#undef ALIAS
+
+#endif
+
 #endif

 /*
@ -2852,7 +2935,6 @@ _malloc_prefork(void)
 		}
 	}
 	base_prefork(tsd_tsdn(tsd));
-	chunk_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
 			arena_prefork3(tsd_tsdn(tsd), arena);
@ -2881,7 +2963,6 @@ _malloc_postfork(void)

 	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_parent(tsd_tsdn(tsd));
 	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
@ -2906,7 +2987,6 @@ jemalloc_postfork_child(void)

 	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_child(tsd_tsdn(tsd));
 	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
--- a/contrib/jemalloc/src/mutex.c
+++ b/contrib/jemalloc/src/mutex.c
@ -91,6 +91,8 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 	    _CRT_SPINCOUNT))
 		return (true);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mutex->lock = 0;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
--- a/contrib/jemalloc/src/nstime.c
+++ b/contrib/jemalloc/src/nstime.c
@ -97,6 +97,76 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 	return (time->ns / divisor->ns);
 }

+#ifdef _WIN32
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time)
+{
+	FILETIME ft;
+	uint64_t ticks_100ns;
+
+	GetSystemTimeAsFileTime(&ft);
+	ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
+
+	nstime_init(time, ticks_100ns * 100);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time)
+{
+
+	nstime_init(time, mach_absolute_time());
+}
+#else
+#  define NSTIME_MONOTONIC false
+static void
+nstime_get(nstime_t *time)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000);
+}
+#endif
+
+#ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(n_nstime_monotonic)
+#endif
+bool
+nstime_monotonic(void)
+{
+
+	return (NSTIME_MONOTONIC);
+#undef NSTIME_MONOTONIC
+}
+#ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(nstime_monotonic)
+nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic);
+#endif
+
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(n_nstime_update)
@ -107,33 +177,7 @@ nstime_update(nstime_t *time)
 	nstime_t old_time;

 	nstime_copy(&old_time, time);
-
-#ifdef _WIN32
-	{
-		FILETIME ft;
-		uint64_t ticks;
-		GetSystemTimeAsFileTime(&ft);
-		ticks = (((uint64_t)ft.dwHighDateTime) << 32) |
-		    ft.dwLowDateTime;
-		time->ns = ticks * 100;
-	}
-#elif JEMALLOC_CLOCK_GETTIME
-	{
-		struct timespec ts;
-
-		if (sysconf(_SC_MONOTONIC_CLOCK) > 0)
-			clock_gettime(CLOCK_MONOTONIC, &ts);
-		else
-			clock_gettime(CLOCK_REALTIME, &ts);
-		time->ns = ts.tv_sec * BILLION + ts.tv_nsec;
-	}
-#else
-	{
-		struct timeval tv;
-		gettimeofday(&tv, NULL);
-		time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
-	}
-#endif
+	nstime_get(time);

 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {
--- a/contrib/jemalloc/src/pages.c
+++ b/contrib/jemalloc/src/pages.c
@ -207,6 +207,11 @@ os_overcommits_sysctl(void)
 #endif

 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+/*
+ * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
+ * reentry during bootstrapping if another library has interposed system call
+ * wrappers.
+ */
 static bool
 os_overcommits_proc(void)
 {
@ -214,11 +219,26 @@ os_overcommits_proc(void)
 	char buf[1];
 	ssize_t nread;

+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_open)
+	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+#else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+#endif
 	if (fd == -1)
 		return (false); /* Error. */

+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_read)
+	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
 	nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_close)
+	syscall(SYS_close, fd);
+#else
+	close(fd);
+#endif
+
 	if (nread < 1)
 		return (false); /* Error. */
 	/*
--- a/contrib/jemalloc/src/prof.c
+++ b/contrib/jemalloc/src/prof.c
@ -125,7 +125,7 @@ static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
    bool even_if_attached);
-static void	prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
    bool even_if_attached);
 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);

@ -591,7 +591,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL))
+		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
@ -651,7 +651,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);

-	ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
 	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);

@ -704,7 +704,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);

 	if (destroy_tdata)
-		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
+		prof_tdata_destroy(tsd, tdata, false);

 	if (destroy_tctx)
 		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
@ -733,7 +733,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 			return (true);
 		}
 		btkey.p = &gctx.p->bt;
-		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
+		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
 			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
@ -795,7 +795,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		/* Link a prof_tctx_t into gctx for this thread. */
 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
 		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd_tsdn(tsd), NULL), true);
+		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@ -810,8 +810,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
 		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-		error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey,
-		    ret.v);
+		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx)
@ -875,7 +874,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
 	 */
-	r = prng_lg_range(&tdata->prng_state, 53);
+	r = prng_lg_range_u64(&tdata->prng_state, 53);
 	u = (double)r * (1.0/9007199254740992.0L);
 	tdata->bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
@ -1791,7 +1790,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn)
 }

 static prof_tdata_t *
-prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
    char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
@ -1799,7 +1798,7 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	cassert(config_prof);

 	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t),
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
 	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL)
@ -1813,9 +1812,9 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;

-	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
-	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsdn, tdata, NULL, true, true);
+	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+	    prof_bt_keycomp)) {
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 		return (NULL);
 	}

@ -1829,19 +1828,19 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->dumping = false;
 	tdata->active = active;

-	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);

 	return (tdata);
 }

 prof_tdata_t *
-prof_tdata_init(tsdn_t *tsdn)
+prof_tdata_init(tsd_t *tsd)
 {

-	return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL,
-	    prof_thread_active_init_get(tsdn)));
+	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd))));
 }

 static bool
@ -1866,31 +1865,29 @@ prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
 }

 static void
-prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
    bool even_if_attached)
 {

-	malloc_mutex_assert_owner(tsdn, &tdatas_mtx);
-
-	assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);

 	tdata_tree_remove(&tdatas, tdata);

 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));

 	if (tdata->thread_name != NULL)
-		idalloctm(tsdn, tdata->thread_name, NULL, true, true);
-	ckh_delete(tsdn, &tdata->bt2tctx);
-	idalloctm(tsdn, tdata, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
+	ckh_delete(tsd, &tdata->bt2tctx);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 }

 static void
-prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
 {

-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	prof_tdata_destroy_locked(tsdn, tdata, even_if_attached);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 }

 static void
@ -1913,7 +1910,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 		destroy_tdata = false;
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd_tsdn(tsd), tdata, true);
+		prof_tdata_destroy(tsd, tdata, true);
 }

 prof_tdata_t *
@ -1926,8 +1923,8 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 	bool active = tdata->active;

 	prof_tdata_detach(tsd, tdata);
-	return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim,
-	    thread_name, active));
+	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
+	    active));
 }

 static bool
@ -1956,30 +1953,30 @@ prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 }

 void
-prof_reset(tsdn_t *tsdn, size_t lg_sample)
+prof_reset(tsd_t *tsd, size_t lg_sample)
 {
 	prof_tdata_t *next;

 	assert(lg_sample < (sizeof(uint64_t) << 3));

-	malloc_mutex_lock(tsdn, &prof_dump_mtx);
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);

 	lg_prof_sample = lg_sample;

 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsdn);
+		    prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsdn, to_destroy, false);
+			prof_tdata_destroy_locked(tsd, to_destroy, false);
 		} else
 			next = NULL;
 	} while (next != NULL);

-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	malloc_mutex_unlock(tsdn, &prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 }

 void
@ -2189,7 +2186,7 @@ prof_boot1(void)
 }

 bool
-prof_boot2(tsdn_t *tsdn)
+prof_boot2(tsd_t *tsd)
 {

 	cassert(config_prof);
@ -2215,7 +2212,7 @@ prof_boot2(tsdn_t *tsdn)
 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);

-		if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
+		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
@ -2246,8 +2243,8 @@ prof_boot2(tsdn_t *tsdn)
 				abort();
 		}

-		gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS
-		    * sizeof(malloc_mutex_t));
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
+		    PROF_NCTX_LOCKS * sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
@ -2256,7 +2253,7 @@ prof_boot2(tsdn_t *tsdn)
 				return (true);
 		}

-		tdata_locks = (malloc_mutex_t *)base_alloc(tsdn,
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
--- a/contrib/jemalloc/src/rtree.c
+++ b/contrib/jemalloc/src/rtree.c
@ -96,12 +96,15 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp)
 	rtree_node_elm_t *node;

 	if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
+		spin_t spinner;
+
 		/*
 		 * Another thread is already in the process of initializing.
 		 * Spin-wait until initialization is complete.
 		 */
+		spin_init(&spinner);
 		do {
-			CPU_SPINWAIT;
+			spin_adaptive(&spinner);
 			node = atomic_read_p((void **)elmp);
 		} while (node == RTREE_NODE_INITIALIZING);
 	} else {
@ -125,5 +128,5 @@ rtree_node_elm_t *
 rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
 {

-	return (rtree_node_init(rtree, level, &elm->child));
+	return (rtree_node_init(rtree, level+1, &elm->child));
 }
--- a/contrib/jemalloc/src/spin.c
+++ b/contrib/jemalloc/src/spin.c
@ -0,0 +1,2 @@
+#define	JEMALLOC_SPIN_C_
+#include "jemalloc/internal/jemalloc_internal.h"
--- a/contrib/jemalloc/src/stats.c
+++ b/contrib/jemalloc/src/stats.c
--- a/contrib/jemalloc/src/tcache.c
+++ b/contrib/jemalloc/src/tcache.c
@ -445,14 +445,14 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 }

 bool
-tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
+tcaches_create(tsd_t *tsd, unsigned *r_ind)
 {
 	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;

 	if (tcaches == NULL) {
-		tcaches = base_alloc(tsdn, sizeof(tcache_t *) *
+		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
@ -460,10 +460,10 @@ tcaches_create(tsdn_t *tsdn, unsigned *r_ind)

 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	arena = arena_ichoose(tsdn, NULL);
+	arena = arena_ichoose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (true);
-	tcache = tcache_create(tsdn, arena);
+	tcache = tcache_create(tsd_tsdn(tsd), arena);
 	if (tcache == NULL)
 		return (true);

--- a/contrib/jemalloc/src/tsd.c
+++ b/contrib/jemalloc/src/tsd.c
@ -171,10 +171,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	tsd_init_block_t *iter;

 	/* Check whether this thread has already inserted into the list. */
-	malloc_mutex_lock(NULL, &head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
-			malloc_mutex_unlock(NULL, &head->lock);
+			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return (iter->data);
 		}
 	}
@ -182,7 +182,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	ql_elm_new(block, link);
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
-	malloc_mutex_unlock(NULL, &head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 	return (NULL);
 }

@ -190,8 +190,8 @@ void
 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
 {

-	malloc_mutex_lock(NULL, &head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
-	malloc_mutex_unlock(NULL, &head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 }
 #endif
--- a/contrib/jemalloc/src/util.c
+++ b/contrib/jemalloc/src/util.c
@ -49,7 +49,7 @@ static void
 wrtmessage(void *cbopaque, const char *s)
 {

-#ifdef SYS_write
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
 	 * the possibility of memory allocation within libc.  This is necessary
--- a/lib/libc/stdlib/jemalloc/Makefile.inc
+++ b/lib/libc/stdlib/jemalloc/Makefile.inc
@ -4,8 +4,8 @@

 JEMALLOCSRCS:= jemalloc.c arena.c atomic.c base.c bitmap.c chunk.c \
 	chunk_dss.c chunk_mmap.c ckh.c ctl.c extent.c hash.c huge.c mb.c \
-	mutex.c nstime.c pages.c prng.c prof.c quarantine.c rtree.c stats.c \
-	tcache.c ticker.c tsd.c util.c witness.c
+	mutex.c nstime.c pages.c prng.c prof.c quarantine.c rtree.c spin.c \
+	stats.c tcache.c ticker.c tsd.c util.c witness.c

 SYM_MAPS+=${LIBC_SRCTOP}/stdlib/jemalloc/Symbol.map