From 136e1e4ddc6169ae4068a28d94389e8a0b7bef72 Mon Sep 17 00:00:00 2001
From: jasone <jasone@FreeBSD.org>
Date: Thu, 10 May 2012 18:29:40 +0000
Subject: [PATCH] Import jemalloc 37b6f95dcd866f51c91488531a2efc3ed4c2b754 (dev
 branch, prior to 3.0.0 release).  This version is likely very close to what
 will be 3.0.0.

---
 contrib/jemalloc/ChangeLog                    |  15 +-
 contrib/jemalloc/FREEBSD-Xlist                |   1 +
 contrib/jemalloc/FREEBSD-diffs                |  63 ++-
 contrib/jemalloc/VERSION                      |   2 +-
 contrib/jemalloc/doc/jemalloc.3               |  16 +-
 .../include/jemalloc/internal/arena.h         | 401 +++++++++++---
 .../include/jemalloc/internal/atomic.h        |  28 +
 .../jemalloc/include/jemalloc/internal/ctl.h  |  31 +-
 .../jemalloc/internal/jemalloc_internal.h     |  89 +++-
 .../include/jemalloc/internal/mutex.h         |  20 +-
 .../jemalloc/internal/private_namespace.h     |  28 +-
 .../jemalloc/include/jemalloc/internal/prof.h |  60 ++-
 .../include/jemalloc/internal/tcache.h        |  71 +--
 .../jemalloc/include/jemalloc/internal/tsd.h  | 101 ++++
 .../jemalloc/include/jemalloc/internal/util.h |  32 +-
 contrib/jemalloc/include/jemalloc/jemalloc.h  |  64 ++-
 .../jemalloc/include/jemalloc/jemalloc_defs.h |  39 +-
 contrib/jemalloc/src/arena.c                  | 503 +++++++++---------
 contrib/jemalloc/src/chunk.c                  |  97 ++--
 contrib/jemalloc/src/chunk_mmap.c             | 149 +++---
 contrib/jemalloc/src/ctl.c                    | 252 +++++----
 contrib/jemalloc/src/huge.c                   |   4 +-
 contrib/jemalloc/src/jemalloc.c               | 122 ++---
 contrib/jemalloc/src/mutex.c                  |  21 +-
 contrib/jemalloc/src/prof.c                   | 215 ++++----
 contrib/jemalloc/src/quarantine.c             | 101 +++-
 contrib/jemalloc/src/stats.c                  |  30 +-
 contrib/jemalloc/src/tcache.c                 |  59 +-
 contrib/jemalloc/src/tsd.c                    |  43 +-
 contrib/jemalloc/src/util.c                   |  51 +-
 30 files changed, 1728 insertions(+), 980 deletions(-)

diff --git a/contrib/jemalloc/ChangeLog b/contrib/jemalloc/ChangeLog
index b71fa165b60f..691630bc8d31 100644
--- a/contrib/jemalloc/ChangeLog
+++ b/contrib/jemalloc/ChangeLog
@@ -19,9 +19,10 @@ found in the git revision history:
 
   New features:
   - Implement Valgrind support, redzones, and quarantine.
-  - Add support for additional operating systems:
+  - Add support for additional platforms:
     + FreeBSD
     + Mac OS X Lion
+    + MinGW
   - Add support for additional architectures:
     + MIPS
     + SH4
@@ -64,18 +65,24 @@ found in the git revision history:
   - Remove the --enable-sysv configure option.
 
   Bug fixes:
-  - Fix fork-related bugs that could cause deadlock in children between fork
-    and exec.
   - Fix a statistics-related bug in the "thread.arena" mallctl that could cause
     invalid statistics and crashes.
-  - Work around TLS dallocation via free() on Linux.  This bug could cause
+  - Work around TLS deallocation via free() on Linux.  This bug could cause
     write-after-free memory corruption.
+  - Fix a potential deadlock that could occur during interval- and
+    growth-triggered heap profile dumps.
   - Fix chunk_alloc_dss() to stop claiming memory is zeroed.  This bug could
     cause memory corruption and crashes with --enable-dss specified.
+  - Fix fork-related bugs that could cause deadlock in children between fork
+    and exec.
   - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter.
   - Fix realloc(p, 0) to act like free(p).
   - Do not enforce minimum alignment in memalign().
   - Check for NULL pointer in malloc_usable_size().
+  - Fix an off-by-one heap profile statistics bug that could be observed in
+    interval- and growth-triggered heap profiles.
+  - Fix the "epoch" mallctl to update cached stats even if the passed in epoch
+    is 0.
   - Fix bin->runcur management to fix a layout policy bug.  This bug did not
     affect correctness.
   - Fix a bug in choose_arena_hard() that potentially caused more arenas to be
diff --git a/contrib/jemalloc/FREEBSD-Xlist b/contrib/jemalloc/FREEBSD-Xlist
index f3945de46f8c..cd0f8add7b60 100644
--- a/contrib/jemalloc/FREEBSD-Xlist
+++ b/contrib/jemalloc/FREEBSD-Xlist
@@ -18,6 +18,7 @@ include/jemalloc/internal/jemalloc_internal.h.in
 include/jemalloc/internal/size_classes.sh
 include/jemalloc/jemalloc.h.in
 include/jemalloc/jemalloc_defs.h.in
+include/msvc_compat/
 install-sh
 src/zone.c
 test/
diff --git a/contrib/jemalloc/FREEBSD-diffs b/contrib/jemalloc/FREEBSD-diffs
index 76e0a87c8c6e..c38b92fe9218 100644
--- a/contrib/jemalloc/FREEBSD-diffs
+++ b/contrib/jemalloc/FREEBSD-diffs
@@ -1,5 +1,5 @@
 diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
-index e8a5722..cec85b5 100644
+index 93c16dc..b5c5595 100644
 --- a/doc/jemalloc.xml.in
 +++ b/doc/jemalloc.xml.in
 @@ -51,12 +51,23 @@
@@ -27,7 +27,7 @@ index e8a5722..cec85b5 100644
        <refsect2>
          <title>Standard API</title>
          <funcprototype>
-@@ -2091,4 +2102,16 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
+@@ -2101,4 +2112,16 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
      <para>The <function>posix_memalign<parameter/></function> function conforms
      to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
    </refsect1>
@@ -45,7 +45,7 @@ index e8a5722..cec85b5 100644
 +  </refsect1>
  </refentry>
 diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
-index b61abe8..edbb437 100644
+index 268cd14..cfb1fb9 100644
 --- a/include/jemalloc/internal/jemalloc_internal.h.in
 +++ b/include/jemalloc/internal/jemalloc_internal.h.in
 @@ -1,5 +1,8 @@
@@ -54,12 +54,12 @@ index b61abe8..edbb437 100644
 +#include "libc_private.h"
 +#include "namespace.h"
 +
- #include <sys/mman.h>
- #include <sys/param.h>
- #include <sys/syscall.h>
-@@ -35,6 +38,9 @@
- #include <pthread.h>
  #include <math.h>
+ #ifdef _WIN32
+ #  include <windows.h>
+@@ -54,6 +57,9 @@ typedef intptr_t ssize_t;
+ #endif
+ #include <fcntl.h>
  
 +#include "un-namespace.h"
 +#include "libc_private.h"
@@ -68,10 +68,10 @@ index b61abe8..edbb437 100644
  #include "../jemalloc@install_suffix@.h"
  
 diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
-index 8837ef5..d7133f4 100644
+index de44e14..564d604 100644
 --- a/include/jemalloc/internal/mutex.h
 +++ b/include/jemalloc/internal/mutex.h
-@@ -39,9 +39,6 @@ struct malloc_mutex_s {
+@@ -43,9 +43,6 @@ struct malloc_mutex_s {
  
  #ifdef JEMALLOC_LAZY_LOCK
  extern bool isthreaded;
@@ -82,10 +82,10 @@ index 8837ef5..d7133f4 100644
  
  bool	malloc_mutex_init(malloc_mutex_t *mutex);
 diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h
-index bb1b63e..00eb169 100644
+index b816647..b8ce6b1 100644
 --- a/include/jemalloc/internal/private_namespace.h
 +++ b/include/jemalloc/internal/private_namespace.h
-@@ -165,7 +165,6 @@
+@@ -186,7 +186,6 @@
  #define	iqalloc JEMALLOC_N(iqalloc)
  #define	iralloc JEMALLOC_N(iralloc)
  #define	isalloc JEMALLOC_N(isalloc)
@@ -94,7 +94,7 @@ index bb1b63e..00eb169 100644
  #define	jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child)
  #define	jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent)
 diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in
-index f0581db..f26d8bc 100644
+index ad06948..505dd38 100644
 --- a/include/jemalloc/jemalloc.h.in
 +++ b/include/jemalloc/jemalloc.h.in
 @@ -15,6 +15,7 @@ extern "C" {
@@ -107,10 +107,10 @@ index f0581db..f26d8bc 100644
  #define	ALLOCM_LG_ALIGN(la)	(la)
 diff --git a/include/jemalloc/jemalloc_FreeBSD.h b/include/jemalloc/jemalloc_FreeBSD.h
 new file mode 100644
-index 0000000..2c5797f
+index 0000000..9efab93
 --- /dev/null
 +++ b/include/jemalloc/jemalloc_FreeBSD.h
-@@ -0,0 +1,76 @@
+@@ -0,0 +1,80 @@
 +/*
 + * Override settings that were generated in jemalloc_defs.h as necessary.
 + */
@@ -154,8 +154,12 @@ index 0000000..2c5797f
 +#  define LG_SIZEOF_PTR		2
 +#endif
 +#ifdef __mips__
++#ifdef __mips_n64
++#  define LG_SIZEOF_PTR		3
++#else
 +#  define LG_SIZEOF_PTR		2
 +#endif
++#endif
 +#ifdef __powerpc64__
 +#  define LG_SIZEOF_PTR		3
 +#elif defined(__powerpc__)
@@ -188,20 +192,21 @@ index 0000000..2c5797f
 +#define	pthread_mutex_lock	_pthread_mutex_lock
 +#define	pthread_mutex_unlock	_pthread_mutex_unlock
 diff --git a/src/jemalloc.c b/src/jemalloc.c
-index f9c8916..8e24a5a 100644
+index d42e91d..cdf6222 100644
 --- a/src/jemalloc.c
 +++ b/src/jemalloc.c
-@@ -8,6 +8,9 @@ malloc_tsd_data(, arenas, arena_t *, NULL)
+@@ -8,6 +8,10 @@ malloc_tsd_data(, arenas, arena_t *, NULL)
  malloc_tsd_data(, thread_allocated, thread_allocated_t,
      THREAD_ALLOCATED_INITIALIZER)
  
-+const char	*__malloc_options_1_0;
++/* Work around <http://llvm.org/bugs/show_bug.cgi?id=12623>: */
++const char	*__malloc_options_1_0 = NULL;
 +__sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0);
 +
  /* Runtime configuration options. */
- const char	*je_malloc_conf JEMALLOC_ATTR(visibility("default"));
+ const char	*je_malloc_conf;
  #ifdef JEMALLOC_DEBUG
-@@ -401,7 +404,8 @@ malloc_conf_init(void)
+@@ -429,7 +433,8 @@ malloc_conf_init(void)
  #endif
  			    ;
  
@@ -212,10 +217,10 @@ index f9c8916..8e24a5a 100644
  				 * Do nothing; opts is already initialized to
  				 * the value of the MALLOC_CONF environment
 diff --git a/src/mutex.c b/src/mutex.c
-index 4b8ce57..7be5fc9 100644
+index 37a843e..4a90a05 100644
 --- a/src/mutex.c
 +++ b/src/mutex.c
-@@ -63,6 +63,17 @@ pthread_create(pthread_t *__restrict thread,
+@@ -66,6 +66,17 @@ pthread_create(pthread_t *__restrict thread,
  #ifdef JEMALLOC_MUTEX_INIT_CB
  int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
      void *(calloc_cb)(size_t, size_t));
@@ -234,14 +239,14 @@ index 4b8ce57..7be5fc9 100644
  
  bool
 diff --git a/src/util.c b/src/util.c
-index 99ae26d..b80676c 100644
+index 9b73c3e..f94799f 100644
 --- a/src/util.c
 +++ b/src/util.c
-@@ -60,6 +60,22 @@ wrtmessage(void *cbopaque, const char *s)
- void	(*je_malloc_message)(void *, const char *s)
-     JEMALLOC_ATTR(visibility("default")) = wrtmessage;
+@@ -58,6 +58,22 @@ wrtmessage(void *cbopaque, const char *s)
  
-+JEMALLOC_CATTR(visibility("hidden"), static)
+ JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
+ 
++JEMALLOC_ATTR(visibility("hidden"))
 +void
 +wrtmessage_1_0(const char *s1, const char *s2, const char *s3,
 +    const char *s4)
@@ -258,5 +263,5 @@ index 99ae26d..b80676c 100644
 +__sym_compat(_malloc_message, __malloc_message_1_0, FBSD_1.0);
 +
  /*
-  * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
-  * provide a wrapper.
+  * Wrapper around malloc_message() that avoids the need for
+  * je_malloc_message(...) throughout the code.
diff --git a/contrib/jemalloc/VERSION b/contrib/jemalloc/VERSION
index 629165c7f2de..6651c8ee386c 100644
--- a/contrib/jemalloc/VERSION
+++ b/contrib/jemalloc/VERSION
@@ -1 +1 @@
-1.0.0-286-ga8f8d7540d66ddee7337db80c92890916e1063ca
+1.0.0-335-g37b6f95dcd866f51c91488531a2efc3ed4c2b754
diff --git a/contrib/jemalloc/doc/jemalloc.3 b/contrib/jemalloc/doc/jemalloc.3
index 209b1331219c..0621c8cc5e96 100644
--- a/contrib/jemalloc/doc/jemalloc.3
+++ b/contrib/jemalloc/doc/jemalloc.3
@@ -2,12 +2,12 @@
 .\"     Title: JEMALLOC
 .\"    Author: Jason Evans
 .\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>
-.\"      Date: 04/21/2012
+.\"      Date: 05/09/2012
 .\"    Manual: User Manual
-.\"    Source: jemalloc 1.0.0-286-ga8f8d7540d66ddee7337db80c92890916e1063ca
+.\"    Source: jemalloc 1.0.0-335-g37b6f95dcd866f51c91488531a2efc3ed4c2b754
 .\"  Language: English
 .\"
-.TH "JEMALLOC" "3" "04/21/2012" "jemalloc 1.0.0-286-ga8f8d7540d" "User Manual"
+.TH "JEMALLOC" "3" "05/09/2012" "jemalloc 1.0.0-335-g37b6f95dcd" "User Manual"
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
@@ -31,7 +31,7 @@
 jemalloc \- general purpose memory allocation functions
 .SH "LIBRARY"
 .PP
-This manual describes jemalloc 1\&.0\&.0\-286\-ga8f8d7540d66ddee7337db80c92890916e1063ca\&. More information can be found at the
+This manual describes jemalloc 1\&.0\&.0\-335\-g37b6f95dcd866f51c91488531a2efc3ed4c2b754\&. More information can be found at the
 \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&.
 .PP
 The following configuration options are enabled in libc\*(Aqs built\-in jemalloc:
@@ -567,6 +567,12 @@ was specified during build configuration\&.
 was specified during build configuration\&.
 .RE
 .PP
+"config\&.mremap" (\fBbool\fR) r\-
+.RS 4
+\fB\-\-enable\-mremap\fR
+was specified during build configuration\&.
+.RE
+.PP
 "config\&.munmap" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-enable\-munmap\fR
@@ -1462,7 +1468,7 @@ jemalloc website
 .IP " 2." 4
 Valgrind
 .RS 4
-\%http://http://valgrind.org/
+\%http://valgrind.org/
 .RE
 .IP " 3." 4
 gperftools package
diff --git a/contrib/jemalloc/include/jemalloc/internal/arena.h b/contrib/jemalloc/include/jemalloc/internal/arena.h
index 2eb41cd97e57..264b5d3a24d1 100644
--- a/contrib/jemalloc/include/jemalloc/internal/arena.h
+++ b/contrib/jemalloc/include/jemalloc/internal/arena.h
@@ -109,7 +109,8 @@ struct arena_chunk_map_s {
 	 *
 	 * p : run page offset
 	 * s : run size
-	 * c : (binind+1) for size class (used only if prof_promote is true)
+	 * n : binind for size class; large objects set these to BININD_INVALID
+	 *     except for promoted allocations (see prof_promote)
 	 * x : don't care
 	 * - : 0
 	 * + : 1
@@ -117,35 +118,38 @@ struct arena_chunk_map_s {
 	 * [dula] : bit unset
 	 *
 	 *   Unallocated (clean):
-	 *     ssssssss ssssssss ssss---- ----du-a
-	 *     xxxxxxxx xxxxxxxx xxxx---- -----Uxx
-	 *     ssssssss ssssssss ssss---- ----dU-a
+	 *     ssssssss ssssssss ssss1111 1111du-a
+	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx
+	 *     ssssssss ssssssss ssss1111 1111dU-a
 	 *
 	 *   Unallocated (dirty):
-	 *     ssssssss ssssssss ssss---- ----D--a
-	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
-	 *     ssssssss ssssssss ssss---- ----D--a
+	 *     ssssssss ssssssss ssss1111 1111D--a
+	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+	 *     ssssssss ssssssss ssss1111 1111D--a
 	 *
 	 *   Small:
-	 *     pppppppp pppppppp pppp---- ----d--A
-	 *     pppppppp pppppppp pppp---- -------A
-	 *     pppppppp pppppppp pppp---- ----d--A
+	 *     pppppppp pppppppp ppppnnnn nnnnd--A
+	 *     pppppppp pppppppp ppppnnnn nnnn---A
+	 *     pppppppp pppppppp ppppnnnn nnnnd--A
 	 *
 	 *   Large:
-	 *     ssssssss ssssssss ssss---- ----D-LA
-	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
-	 *     -------- -------- -------- ----D-LA
+	 *     ssssssss ssssssss ssss1111 1111D-LA
+	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+	 *     -------- -------- ----1111 1111D-LA
 	 *
 	 *   Large (sampled, size <= PAGE):
-	 *     ssssssss ssssssss sssscccc ccccD-LA
+	 *     ssssssss ssssssss ssssnnnn nnnnD-LA
 	 *
 	 *   Large (not sampled, size == PAGE):
-	 *     ssssssss ssssssss ssss---- ----D-LA
+	 *     ssssssss ssssssss ssss1111 1111D-LA
 	 */
 	size_t				bits;
-#define	CHUNK_MAP_CLASS_SHIFT	4
-#define	CHUNK_MAP_CLASS_MASK	((size_t)0xff0U)
-#define	CHUNK_MAP_FLAGS_MASK	((size_t)0xfU)
+#define	CHUNK_MAP_BININD_SHIFT	4
+#define	BININD_INVALID		((size_t)0xffU)
+/*     CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */
+#define	CHUNK_MAP_BININD_MASK	((size_t)0xff0U)
+#define	CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK
+#define	CHUNK_MAP_FLAGS_MASK	((size_t)0xcU)
 #define	CHUNK_MAP_DIRTY		((size_t)0x8U)
 #define	CHUNK_MAP_UNZEROED	((size_t)0x4U)
 #define	CHUNK_MAP_LARGE		((size_t)0x2U)
@@ -409,8 +413,14 @@ void	*arena_malloc_small(arena_t *arena, size_t size, bool zero);
 void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
 void	*arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero);
 void	arena_prof_promoted(const void *ptr, size_t size);
-void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+void	arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     arena_chunk_map_t *mapelm);
+void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t pageind, arena_chunk_map_t *mapelm);
+void	arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t pageind);
+void	arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk,
+    void *ptr);
 void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 void	arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
     arena_stats_t *astats, malloc_bin_stats_t *bstats,
@@ -430,6 +440,31 @@ void	arena_postfork_child(arena_t *arena);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+arena_chunk_map_t	*arena_mapp_get(arena_chunk_t *chunk, size_t pageind);
+size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
+void	arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
+    size_t size, size_t flags);
+void	arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
+    size_t size);
+void	arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind,
+    size_t size, size_t flags);
+void	arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
+    size_t binind);
+void	arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
+    size_t runind, size_t binind, size_t flags);
+void	arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
+    size_t unzeroed);
+size_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 size_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
@@ -442,6 +477,227 @@ void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+#  ifdef JEMALLOC_ARENA_INLINE_A
+JEMALLOC_INLINE arena_chunk_map_t *
+arena_mapp_get(arena_chunk_t *chunk, size_t pageind)
+{
+
+	assert(pageind >= map_bias);
+	assert(pageind < chunk_npages);
+
+	return (&chunk->map[pageind-map_bias]);
+}
+
+JEMALLOC_INLINE size_t *
+arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (&arena_mapp_get(chunk, pageind)->bits);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (*arena_mapbitsp_get(chunk, pageind));
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
+	return (mapbits & ~PAGE_MASK);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
+	    (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
+	return (mapbits & ~PAGE_MASK);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
+	    CHUNK_MAP_ALLOCATED);
+	return (mapbits >> LG_PAGE);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+	size_t binind;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
+	assert(binind < NBINS || binind == BININD_INVALID);
+	return (binind);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	return (mapbits & CHUNK_MAP_DIRTY);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	return (mapbits & CHUNK_MAP_UNZEROED);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	return (mapbits & CHUNK_MAP_LARGE);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	return (mapbits & CHUNK_MAP_ALLOCATED);
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
+    size_t flags)
+{
+	size_t *mapbitsp;
+
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert((size & PAGE_MASK) == 0);
+	assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
+	*mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags;
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
+    size_t size)
+{
+	size_t *mapbitsp;
+
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert((size & PAGE_MASK) == 0);
+	assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
+	*mapbitsp = size | (*mapbitsp & PAGE_MASK);
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
+    size_t flags)
+{
+	size_t *mapbitsp;
+
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert((size & PAGE_MASK) == 0);
+	assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
+	*mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE |
+	    CHUNK_MAP_ALLOCATED;
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
+    size_t binind)
+{
+	size_t *mapbitsp;
+
+	assert(binind <= BININD_INVALID);
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE);
+	*mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind <<
+	    CHUNK_MAP_BININD_SHIFT);
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
+    size_t binind, size_t flags)
+{
+	size_t *mapbitsp;
+
+	assert(binind < BININD_INVALID);
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert(pageind - runind >= map_bias);
+	assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
+	*mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) |
+	    flags | CHUNK_MAP_ALLOCATED;
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
+    size_t unzeroed)
+{
+	size_t *mapbitsp;
+
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	*mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed;
+}
+
+JEMALLOC_INLINE size_t
+arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
+{
+	size_t binind;
+
+	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
+
+	if (config_debug) {
+		arena_chunk_t *chunk;
+		arena_t *arena;
+		size_t pageind;
+		size_t actual_mapbits;
+		arena_run_t *run;
+		arena_bin_t *bin;
+		size_t actual_binind;
+		arena_bin_info_t *bin_info;
+
+		assert(binind != BININD_INVALID);
+		assert(binind < NBINS);
+		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+		arena = chunk->arena;
+		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+		actual_mapbits = arena_mapbits_get(chunk, pageind);
+		assert(mapbits == actual_mapbits);
+		assert(arena_mapbits_large_get(chunk, pageind) == 0);
+		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+		    (actual_mapbits >> LG_PAGE)) << LG_PAGE));
+		bin = run->bin;
+		actual_binind = bin - arena->bins;
+		assert(binind == actual_binind);
+		bin_info = &arena_bin_info[actual_binind];
+		assert(((uintptr_t)ptr - ((uintptr_t)run +
+		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval
+		    == 0);
+	}
+
+	return (binind);
+}
+#  endif /* JEMALLOC_ARENA_INLINE_A */
+
+#  ifdef JEMALLOC_ARENA_INLINE_B
 JEMALLOC_INLINE size_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 {
@@ -535,7 +791,7 @@ arena_prof_ctx_get(const void *ptr)
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	mapbits = chunk->map[pageind-map_bias].bits;
+	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 		if (prof_promote)
@@ -544,7 +800,8 @@ arena_prof_ctx_get(const void *ptr)
 			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 			    (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
 			    LG_PAGE));
-			size_t binind = arena_bin_index(chunk->arena, run->bin);
+			size_t binind = arena_ptr_small_binind_get(ptr,
+			    mapbits);
 			arena_bin_info_t *bin_info = &arena_bin_info[binind];
 			unsigned regind;
 
@@ -554,7 +811,7 @@ arena_prof_ctx_get(const void *ptr)
 			    sizeof(prof_ctx_t *)));
 		}
 	} else
-		ret = chunk->map[pageind-map_bias].prof_ctx;
+		ret = arena_mapp_get(chunk, pageind)->prof_ctx;
 
 	return (ret);
 }
@@ -571,19 +828,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	mapbits = chunk->map[pageind-map_bias].bits;
+	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 		if (prof_promote == false) {
 			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
 			    (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
 			    LG_PAGE));
-			arena_bin_t *bin = run->bin;
 			size_t binind;
 			arena_bin_info_t *bin_info;
 			unsigned regind;
 
-			binind = arena_bin_index(chunk->arena, bin);
+			binind = arena_ptr_small_binind_get(ptr, mapbits);
 			bin_info = &arena_bin_info[binind];
 			regind = arena_run_regind(run, bin_info, ptr);
 
@@ -592,7 +848,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 		} else
 			assert((uintptr_t)ctx == (uintptr_t)1U);
 	} else
-		chunk->map[pageind-map_bias].prof_ctx = ctx;
+		arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
 }
 
 JEMALLOC_INLINE void *
@@ -631,35 +887,42 @@ arena_salloc(const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
-	size_t pageind, mapbits;
+	size_t pageind, binind;
 
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	mapbits = chunk->map[pageind-map_bias].bits;
-	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
-	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
-		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-		    (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE));
-		size_t binind = arena_bin_index(chunk->arena, run->bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
-		assert(((uintptr_t)ptr - ((uintptr_t)run +
-		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval
-		    == 0);
-		ret = bin_info->reg_size;
-	} else {
+	assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+	binind = arena_mapbits_binind_get(chunk, pageind);
+	if (binind == BININD_INVALID || (config_prof && demote == false &&
+	    prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) {
+		/*
+		 * Large allocation.  In the common case (demote == true), and
+		 * as this is an inline function, most callers will only end up
+		 * looking at binind to determine that ptr is a small
+		 * allocation.
+		 */
 		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
-		ret = mapbits & ~PAGE_MASK;
-		if (config_prof && demote && prof_promote && ret == PAGE &&
-		    (mapbits & CHUNK_MAP_CLASS_MASK) != 0) {
-			size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
-			    CHUNK_MAP_CLASS_SHIFT) - 1;
-			assert(binind < NBINS);
-			ret = arena_bin_info[binind].reg_size;
-		}
+		ret = arena_mapbits_large_size_get(chunk, pageind);
 		assert(ret != 0);
+		assert(pageind + (ret>>LG_PAGE) <= chunk_npages);
+		assert(ret == PAGE || arena_mapbits_large_size_get(chunk,
+		    pageind+(ret>>LG_PAGE)-1) == 0);
+		assert(binind == arena_mapbits_binind_get(chunk,
+		    pageind+(ret>>LG_PAGE)-1));
+		assert(arena_mapbits_dirty_get(chunk, pageind) ==
+		    arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1));
+	} else {
+		/*
+		 * Small allocation (possibly promoted to a large object due to
+		 * prof_promote).
+		 */
+		assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
+		    arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
+		    pageind)) == binind);
+		ret = arena_bin_info[binind].reg_size;
 	}
 
 	return (ret);
@@ -668,8 +931,7 @@ arena_salloc(const void *ptr, bool demote)
 JEMALLOC_INLINE void
 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache)
 {
-	size_t pageind;
-	arena_chunk_map_t *mapelm;
+	size_t pageind, mapbits;
 	tcache_t *tcache;
 
 	assert(arena != NULL);
@@ -678,47 +940,30 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache)
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	mapelm = &chunk->map[pageind-map_bias];
-	assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
-	if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
+	mapbits = arena_mapbits_get(chunk, pageind);
+	assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 		/* Small allocation. */
-		if (try_tcache && (tcache = tcache_get(false)) != NULL)
-			tcache_dalloc_small(tcache, ptr);
-		else {
-			arena_run_t *run;
-			arena_bin_t *bin;
+		if (try_tcache && (tcache = tcache_get(false)) != NULL) {
+			size_t binind;
 
-			run = (arena_run_t *)((uintptr_t)chunk +
-			    (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) <<
-			    LG_PAGE));
-			bin = run->bin;
-			if (config_debug) {
-				size_t binind = arena_bin_index(arena, bin);
-				UNUSED arena_bin_info_t *bin_info =
-				    &arena_bin_info[binind];
-				assert(((uintptr_t)ptr - ((uintptr_t)run +
-				    (uintptr_t)bin_info->reg0_offset)) %
-				    bin_info->reg_interval == 0);
-			}
-			malloc_mutex_lock(&bin->lock);
-			arena_dalloc_bin(arena, chunk, ptr, mapelm);
-			malloc_mutex_unlock(&bin->lock);
-		}
+			binind = arena_ptr_small_binind_get(ptr, mapbits);
+			tcache_dalloc_small(tcache, ptr, binind);
+		} else
+			arena_dalloc_small(arena, chunk, ptr, pageind);
 	} else {
-		size_t size = mapelm->bits & ~PAGE_MASK;
+		size_t size = arena_mapbits_large_size_get(chunk, pageind);
 
 		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 
 		if (try_tcache && size <= tcache_maxclass && (tcache =
 		    tcache_get(false)) != NULL) {
 			tcache_dalloc_large(tcache, ptr, size);
-		} else {
-			malloc_mutex_lock(&arena->lock);
+		} else
 			arena_dalloc_large(arena, chunk, ptr);
-			malloc_mutex_unlock(&arena->lock);
-		}
 	}
 }
+#  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/contrib/jemalloc/include/jemalloc/internal/atomic.h b/contrib/jemalloc/include/jemalloc/internal/atomic.h
index 016c472aba98..11a7b47fe0f9 100644
--- a/contrib/jemalloc/include/jemalloc/internal/atomic.h
+++ b/contrib/jemalloc/include/jemalloc/internal/atomic.h
@@ -47,6 +47,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
 
 	return (__sync_sub_and_fetch(p, x));
 }
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (InterlockedExchangeAdd64(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (InterlockedExchangeAdd64(p, -((int64_t)x)));
+}
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
@@ -145,6 +159,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)
 
 	return (__sync_sub_and_fetch(p, x));
 }
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (InterlockedExchangeAdd(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (InterlockedExchangeAdd(p, -((int32_t)x)));
+}
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint32_t
 atomic_add_uint32(uint32_t *p, uint32_t x)
diff --git a/contrib/jemalloc/include/jemalloc/internal/ctl.h b/contrib/jemalloc/include/jemalloc/internal/ctl.h
index a48d09fe7929..adf3827f0948 100644
--- a/contrib/jemalloc/include/jemalloc/internal/ctl.h
+++ b/contrib/jemalloc/include/jemalloc/internal/ctl.h
@@ -2,6 +2,8 @@
 #ifdef JEMALLOC_H_TYPES
 
 typedef struct ctl_node_s ctl_node_t;
+typedef struct ctl_named_node_s ctl_named_node_t;
+typedef struct ctl_indexed_node_s ctl_indexed_node_t;
 typedef struct ctl_arena_stats_s ctl_arena_stats_t;
 typedef struct ctl_stats_s ctl_stats_t;
 
@@ -11,20 +13,21 @@ typedef struct ctl_stats_s ctl_stats_t;
 
 struct ctl_node_s {
 	bool			named;
-	union {
-		struct {
-			const char	*name;
-			/* If (nchildren == 0), this is a terminal node. */
-			unsigned	nchildren;
-			const	ctl_node_t *children;
-		} named;
-		struct {
-			const ctl_node_t *(*index)(const size_t *, size_t,
-			    size_t);
-		} indexed;
-	} u;
-	int	(*ctl)(const size_t *, size_t, void *, size_t *, void *,
-	    size_t);
+};
+
+struct ctl_named_node_s {
+	struct ctl_node_s	node;
+	const char		*name;
+	/* If (nchildren == 0), this is a terminal node. */
+	unsigned		nchildren;
+	const			ctl_node_t *children;
+	int			(*ctl)(const size_t *, size_t, void *, size_t *,
+	    void *, size_t);
+};
+
+struct ctl_indexed_node_s {
+	struct ctl_node_s	node;
+	const ctl_named_node_t	*(*index)(const size_t *, size_t, size_t);
 };
 
 struct ctl_arena_stats_s {
diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h
index 1f38d1ad475b..a7e76f37eec1 100644
--- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h
+++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h
@@ -3,23 +3,34 @@
 #include "libc_private.h"
 #include "namespace.h"
 
-#include <sys/mman.h>
-#include <sys/param.h>
-#include <sys/syscall.h>
-#if !defined(SYS_write) && defined(__NR_write)
-#define	SYS_write __NR_write
+#include <math.h>
+#ifdef _WIN32
+#  include <windows.h>
+#  define ENOENT ERROR_PATH_NOT_FOUND
+#  define EINVAL ERROR_BAD_ARGUMENTS
+#  define EAGAIN ERROR_OUTOFMEMORY
+#  define EPERM  ERROR_WRITE_FAULT
+#  define EFAULT ERROR_INVALID_ADDRESS
+#  define ENOMEM ERROR_NOT_ENOUGH_MEMORY
+#  undef ERANGE
+#  define ERANGE ERROR_INVALID_DATA
+#else
+#  include <sys/param.h>
+#  include <sys/mman.h>
+#  include <sys/syscall.h>
+#  if !defined(SYS_write) && defined(__NR_write)
+#    define SYS_write __NR_write
+#  endif
+#  include <sys/uio.h>
+#  include <pthread.h>
+#  include <errno.h>
 #endif
-#include <sys/time.h>
 #include <sys/types.h>
-#include <sys/uio.h>
 
-#include <errno.h>
 #include <limits.h>
 #ifndef SIZE_T_MAX
 #  define SIZE_T_MAX	SIZE_MAX
 #endif
-#include <pthread.h>
-#include <sched.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -33,10 +44,18 @@
 #include <string.h>
 #include <strings.h>
 #include <ctype.h>
-#include <unistd.h>
+#ifdef _MSC_VER
+#  include <io.h>
+typedef intptr_t ssize_t;
+#  define PATH_MAX 1024
+#  define STDERR_FILENO 2
+#  define __func__ __FUNCTION__
+/* Disable warnings about deprecated system functions */
+#  pragma warning(disable: 4996)
+#else
+#  include <unistd.h>
+#endif
 #include <fcntl.h>
-#include <pthread.h>
-#include <math.h>
 
 #include "un-namespace.h"
 #include "libc_private.h"
@@ -110,6 +129,13 @@ static const bool config_prof_libunwind =
     false
 #endif
     ;
+static const bool config_mremap =
+#ifdef JEMALLOC_MREMAP
+    true
+#else
+    false
+#endif
+    ;
 static const bool config_munmap =
 #ifdef JEMALLOC_MUNMAP
     true
@@ -218,6 +244,9 @@ static const bool config_ivsalloc =
 #else
 #  define JEMALLOC_ENABLE_INLINE
 #  define JEMALLOC_INLINE static inline
+#  ifdef _MSC_VER
+#    define inline _inline
+#  endif
 #endif
 
 /* Smallest size class to support. */
@@ -229,7 +258,7 @@ static const bool config_ivsalloc =
  * classes).
  */
 #ifndef LG_QUANTUM
-#  ifdef __i386__
+#  if (defined(__i386__) || defined(_M_IX86))
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __ia64__
@@ -241,7 +270,7 @@ static const bool config_ivsalloc =
 #  ifdef __sparc64__
 #    define LG_QUANTUM		4
 #  endif
-#  if (defined(__amd64__) || defined(__x86_64__))
+#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __arm__
@@ -291,9 +320,12 @@ static const bool config_ivsalloc =
 /*
  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
  * In addition, this controls the spacing of cacheline-spaced size classes.
+ *
+ * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
+ * only handle raw constants.
  */
 #define	LG_CACHELINE		6
-#define	CACHELINE		((size_t)(1U << LG_CACHELINE))
+#define	CACHELINE		64
 #define	CACHELINE_MASK		(CACHELINE - 1)
 
 /* Return the smallest cacheline multiple that is >= s. */
@@ -324,6 +356,20 @@ static const bool config_ivsalloc =
 #define	ALIGNMENT_CEILING(s, alignment)					\
 	(((s) + (alignment - 1)) & (-(alignment)))
 
+/* Declare a variable length array */
+#if __STDC_VERSION__ < 199901L
+#  ifdef _MSC_VER
+#    include <malloc.h>
+#    define alloca _alloca
+#  else
+#    include <alloca.h>
+#  endif
+#  define VARIABLE_ARRAY(type, name, count) \
+	type *name = alloca(sizeof(type) * count)
+#else
+#  define VARIABLE_ARRAY(type, name, count) type name[count]
+#endif
+
 #ifdef JEMALLOC_VALGRIND
 /*
  * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions
@@ -655,8 +701,17 @@ choose_arena(arena_t *arena)
 
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/tcache.h"
+/*
+ * Include arena.h twice in order to resolve circular dependencies with
+ * tcache.h.
+ */
+#define	JEMALLOC_ARENA_INLINE_A
 #include "jemalloc/internal/arena.h"
+#undef JEMALLOC_ARENA_INLINE_A
+#include "jemalloc/internal/tcache.h"
+#define	JEMALLOC_ARENA_INLINE_B
+#include "jemalloc/internal/arena.h"
+#undef JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/quarantine.h"
 
diff --git a/contrib/jemalloc/include/jemalloc/internal/mutex.h b/contrib/jemalloc/include/jemalloc/internal/mutex.h
index d7133f4d29c0..564d6046b425 100644
--- a/contrib/jemalloc/include/jemalloc/internal/mutex.h
+++ b/contrib/jemalloc/include/jemalloc/internal/mutex.h
@@ -3,10 +3,12 @@
 
 typedef struct malloc_mutex_s malloc_mutex_t;
 
-#ifdef JEMALLOC_OSSPIN
-#define	MALLOC_MUTEX_INITIALIZER {0}
+#ifdef _WIN32
+#  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OSSPIN))
+#  define MALLOC_MUTEX_INITIALIZER {0}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#define	MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
+#  define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
 #else
 #  if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) &&				\
        defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
@@ -23,7 +25,9 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 #ifdef JEMALLOC_H_STRUCTS
 
 struct malloc_mutex_s {
-#ifdef JEMALLOC_OSSPIN
+#ifdef _WIN32
+	CRITICAL_SECTION	lock;
+#elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 	pthread_mutex_t		lock;
@@ -62,7 +66,9 @@ malloc_mutex_lock(malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-#ifdef JEMALLOC_OSSPIN
+#ifdef _WIN32
+		EnterCriticalSection(&mutex->lock);
+#elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockLock(&mutex->lock);
 #else
 		pthread_mutex_lock(&mutex->lock);
@@ -75,7 +81,9 @@ malloc_mutex_unlock(malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-#ifdef JEMALLOC_OSSPIN
+#ifdef _WIN32
+		LeaveCriticalSection(&mutex->lock);
+#elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockUnlock(&mutex->lock);
 #else
 		pthread_mutex_unlock(&mutex->lock);
diff --git a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h
index 00eb169cfd24..b8ce6b16125a 100644
--- a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h
+++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h
@@ -7,11 +7,31 @@
 #define	arena_boot JEMALLOC_N(arena_boot)
 #define	arena_dalloc JEMALLOC_N(arena_dalloc)
 #define	arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin)
+#define	arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked)
 #define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
 #define	arena_dalloc_large JEMALLOC_N(arena_dalloc_large)
+#define	arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked)
+#define	arena_dalloc_small JEMALLOC_N(arena_dalloc_small)
 #define	arena_malloc JEMALLOC_N(arena_malloc)
 #define	arena_malloc_large JEMALLOC_N(arena_malloc_large)
 #define	arena_malloc_small JEMALLOC_N(arena_malloc_small)
+#define	arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get)
+#define	arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get)
+#define	arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get)
+#define	arena_mapbits_get JEMALLOC_N(arena_mapbits_get)
+#define	arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set)
+#define	arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get)
+#define	arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set)
+#define	arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get)
+#define	arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get)
+#define	arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set)
+#define	arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set)
+#define	arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get)
+#define	arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set)
+#define	arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get)
+#define	arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set)
+#define	arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get)
+#define	arena_mapp_get JEMALLOC_N(arena_mapp_get)
 #define	arena_maxclass JEMALLOC_N(arena_maxclass)
 #define	arena_new JEMALLOC_N(arena_new)
 #define	arena_palloc JEMALLOC_N(arena_palloc)
@@ -22,6 +42,7 @@
 #define	arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get)
 #define	arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set)
 #define	arena_prof_promoted JEMALLOC_N(arena_prof_promoted)
+#define	arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get)
 #define	arena_purge_all JEMALLOC_N(arena_purge_all)
 #define	arena_ralloc JEMALLOC_N(arena_ralloc)
 #define	arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move)
@@ -232,13 +253,13 @@
 #define	prof_lookup JEMALLOC_N(prof_lookup)
 #define	prof_malloc JEMALLOC_N(prof_malloc)
 #define	prof_mdump JEMALLOC_N(prof_mdump)
-#define	prof_lookup JEMALLOC_N(prof_lookup)
 #define	prof_promote JEMALLOC_N(prof_promote)
 #define	prof_realloc JEMALLOC_N(prof_realloc)
 #define	prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update)
 #define	prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
 #define	prof_tdata_booted JEMALLOC_N(prof_tdata_booted)
 #define	prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup)
+#define	prof_tdata_get JEMALLOC_N(prof_tdata_get)
 #define	prof_tdata_init JEMALLOC_N(prof_tdata_init)
 #define	prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized)
 #define	prof_tdata_tls JEMALLOC_N(prof_tdata_tls)
@@ -294,12 +315,13 @@
 #define	tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get)
 #define	tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set)
 #define	tcache_event JEMALLOC_N(tcache_event)
-#define	tcache_initialized JEMALLOC_N(tcache_initialized)
+#define	tcache_event_hard JEMALLOC_N(tcache_event_hard)
 #define	tcache_flush JEMALLOC_N(tcache_flush)
 #define	tcache_get JEMALLOC_N(tcache_get)
+#define	tcache_initialized JEMALLOC_N(tcache_initialized)
 #define	tcache_maxclass JEMALLOC_N(tcache_maxclass)
-#define	tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
 #define	tcache_salloc JEMALLOC_N(tcache_salloc)
+#define	tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
 #define	tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup)
 #define	tcache_tls JEMALLOC_N(tcache_tls)
 #define	tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot)
diff --git a/contrib/jemalloc/include/jemalloc/internal/prof.h b/contrib/jemalloc/include/jemalloc/internal/prof.h
index a4c563cc63d3..c3e3f9e4bcd8 100644
--- a/contrib/jemalloc/include/jemalloc/internal/prof.h
+++ b/contrib/jemalloc/include/jemalloc/internal/prof.h
@@ -37,6 +37,14 @@ typedef struct prof_tdata_s prof_tdata_t;
  */
 #define	PROF_NCTX_LOCKS			1024
 
+/*
+ * prof_tdata pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
+#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
+#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
@@ -113,9 +121,19 @@ struct prof_ctx_s {
 	/* Associated backtrace. */
 	prof_bt_t		*bt;
 
-	/* Protects cnt_merged and cnts_ql. */
+	/* Protects nlimbo, cnt_merged, and cnts_ql. */
 	malloc_mutex_t		*lock;
 
+	/*
+	 * Number of threads that currently cause this ctx to be in a state of
+	 * limbo due to one of:
+	 *   - Initializing per thread counters associated with this ctx.
+	 *   - Preparing to destroy this ctx.
+	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
+	 * ctx.
+	 */
+	unsigned		nlimbo;
+
 	/* Temporary storage for summation during dump. */
 	prof_cnt_t		cnt_summed;
 
@@ -152,6 +170,11 @@ struct prof_tdata_s {
 	uint64_t		prng_state;
 	uint64_t		threshold;
 	uint64_t		accum;
+
+	/* State used to avoid dumping while operating on prof internals. */
+	bool			enq;
+	bool			enq_idump;
+	bool			enq_gdump;
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -211,13 +234,13 @@ bool	prof_boot2(void);
 									\
 	assert(size == s2u(size));					\
 									\
-	prof_tdata = *prof_tdata_tsd_get();				\
-	if (prof_tdata == NULL) {					\
-		prof_tdata = prof_tdata_init();				\
-		if (prof_tdata == NULL) {				\
+	prof_tdata = prof_tdata_get();					\
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {	\
+		if (prof_tdata != NULL)					\
+			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
+		else							\
 			ret = NULL;					\
-			break;						\
-		}							\
+		break;							\
 	}								\
 									\
 	if (opt_prof_active == false) {					\
@@ -260,6 +283,7 @@ bool	prof_boot2(void);
 #ifndef JEMALLOC_ENABLE_INLINE
 malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
 
+prof_tdata_t	*prof_tdata_get(void);
 void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
 prof_ctx_t	*prof_ctx_get(const void *ptr);
 void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
@@ -276,6 +300,22 @@ malloc_tsd_externs(prof_tdata, prof_tdata_t *)
 malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
     prof_tdata_cleanup)
 
+JEMALLOC_INLINE prof_tdata_t *
+prof_tdata_get(void)
+{
+	prof_tdata_t *prof_tdata;
+
+	cassert(config_prof);
+
+	prof_tdata = *prof_tdata_tsd_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {
+		if (prof_tdata == NULL)
+			prof_tdata = prof_tdata_init();
+	}
+
+	return (prof_tdata);
+}
+
 JEMALLOC_INLINE void
 prof_sample_threshold_update(prof_tdata_t *prof_tdata)
 {
@@ -355,7 +395,8 @@ prof_sample_accum_update(size_t size)
 	assert(opt_lg_prof_sample != 0);
 
 	prof_tdata = *prof_tdata_tsd_get();
-	assert(prof_tdata != NULL);
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return (true);
 
 	/* Take care to avoid integer overflow. */
 	if (size >= prof_tdata->threshold - prof_tdata->accum) {
@@ -501,8 +542,9 @@ prof_free(const void *ptr, size_t size)
 	cassert(config_prof);
 
 	if ((uintptr_t)ctx > (uintptr_t)1) {
+		prof_thr_cnt_t *tcnt;
 		assert(size == isalloc(ptr, true));
-		prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
+		tcnt = prof_lookup(ctx->bt);
 
 		if (tcnt != NULL) {
 			tcnt->epoch++;
diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache.h b/contrib/jemalloc/include/jemalloc/internal/tcache.h
index cfb17c28d1bd..38d735c86c81 100644
--- a/contrib/jemalloc/include/jemalloc/internal/tcache.h
+++ b/contrib/jemalloc/include/jemalloc/internal/tcache.h
@@ -101,6 +101,7 @@ extern size_t			nhbins;
 extern size_t			tcache_maxclass;
 
 size_t	tcache_salloc(const void *ptr);
+void	tcache_event_hard(tcache_t *tcache);
 void	*tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
     size_t binind);
 void	tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
@@ -132,7 +133,7 @@ void	tcache_enabled_set(bool enabled);
 void	*tcache_alloc_easy(tcache_bin_t *tbin);
 void	*tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
 void	*tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
-void	tcache_dalloc_small(tcache_t *tcache, void *ptr);
+void	tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind);
 void	tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
 #endif
 
@@ -266,47 +267,8 @@ tcache_event(tcache_t *tcache)
 
 	tcache->ev_cnt++;
 	assert(tcache->ev_cnt <= TCACHE_GC_INCR);
-	if (tcache->ev_cnt == TCACHE_GC_INCR) {
-		size_t binind = tcache->next_gc_bin;
-		tcache_bin_t *tbin = &tcache->tbins[binind];
-		tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
-
-		if (tbin->low_water > 0) {
-			/*
-			 * Flush (ceiling) 3/4 of the objects below the low
-			 * water mark.
-			 */
-			if (binind < NBINS) {
-				tcache_bin_flush_small(tbin, binind,
-				    tbin->ncached - tbin->low_water +
-				    (tbin->low_water >> 2), tcache);
-			} else {
-				tcache_bin_flush_large(tbin, binind,
-				    tbin->ncached - tbin->low_water +
-				    (tbin->low_water >> 2), tcache);
-			}
-			/*
-			 * Reduce fill count by 2X.  Limit lg_fill_div such that
-			 * the fill count is always at least 1.
-			 */
-			if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1))
-			    >= 1)
-				tbin->lg_fill_div++;
-		} else if (tbin->low_water < 0) {
-			/*
-			 * Increase fill count by 2X.  Make sure lg_fill_div
-			 * stays greater than 0.
-			 */
-			if (tbin->lg_fill_div > 1)
-				tbin->lg_fill_div--;
-		}
-		tbin->low_water = tbin->ncached;
-
-		tcache->next_gc_bin++;
-		if (tcache->next_gc_bin == nhbins)
-			tcache->next_gc_bin = 0;
-		tcache->ev_cnt = 0;
-	}
+	if (tcache->ev_cnt == TCACHE_GC_INCR)
+		tcache_event_hard(tcache);
 }
 
 JEMALLOC_INLINE void *
@@ -390,13 +352,13 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
 		if (ret == NULL)
 			return (NULL);
 	} else {
-		if (config_prof) {
+		if (config_prof && prof_promote && size == PAGE) {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
 			size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
 			    LG_PAGE);
-			chunk->map[pageind-map_bias].bits &=
-			    ~CHUNK_MAP_CLASS_MASK;
+			arena_mapbits_large_binind_set(chunk, pageind,
+			    BININD_INVALID);
 		}
 		if (zero == false) {
 			if (config_fill) {
@@ -421,30 +383,13 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
 }
 
 JEMALLOC_INLINE void
-tcache_dalloc_small(tcache_t *tcache, void *ptr)
+tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind)
 {
-	arena_t *arena;
-	arena_chunk_t *chunk;
-	arena_run_t *run;
-	arena_bin_t *bin;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
-	size_t pageind, binind;
-	arena_chunk_map_t *mapelm;
 
 	assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	arena = chunk->arena;
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	mapelm = &chunk->map[pageind-map_bias];
-	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
-	    (mapelm->bits >> LG_PAGE)) << LG_PAGE));
-	bin = run->bin;
-	binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
-	    sizeof(arena_bin_t);
-	assert(binind < NBINS);
-
 	if (config_fill && opt_junk)
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
 
diff --git a/contrib/jemalloc/include/jemalloc/internal/tsd.h b/contrib/jemalloc/include/jemalloc/internal/tsd.h
index 20491c885282..0037cf35e703 100644
--- a/contrib/jemalloc/include/jemalloc/internal/tsd.h
+++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h
@@ -74,6 +74,10 @@ extern bool		a_name##_booted;
 extern __thread a_type	a_name##_tls;					\
 extern pthread_key_t	a_name##_tsd;					\
 extern bool		a_name##_booted;
+#elif (defined(_WIN32))
+#define malloc_tsd_externs(a_name, a_type)				\
+extern DWORD		a_name##_tsd;					\
+extern bool		a_name##_booted;
 #else
 #define	malloc_tsd_externs(a_name, a_type)				\
 extern pthread_key_t	a_name##_tsd;					\
@@ -94,6 +98,10 @@ a_attr __thread a_type JEMALLOC_TLS_MODEL				\
     a_name##_tls = a_initializer;					\
 a_attr pthread_key_t	a_name##_tsd;					\
 a_attr bool		a_name##_booted = false;
+#elif (defined(_WIN32))
+#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+a_attr DWORD		a_name##_tsd;					\
+a_attr bool		a_name##_booted = false;
 #else
 #define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
 a_attr pthread_key_t	a_name##_tsd;					\
@@ -182,6 +190,99 @@ a_name##_tsd_set(a_type *val)						\
 		}							\
 	}								\
 }
+#elif (defined(_WIN32))
+#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+    a_cleanup)								\
+/* Data structure. */							\
+typedef struct {							\
+	bool	initialized;						\
+	a_type	val;							\
+} a_name##_tsd_wrapper_t;						\
+/* Initialization/cleanup. */						\
+a_attr bool								\
+a_name##_tsd_cleanup_wrapper(void)					\
+{									\
+	a_name##_tsd_wrapper_t *wrapper;				\
+									\
+	wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd);	\
+	if (wrapper == NULL)						\
+		return (false);						\
+	if (a_cleanup != malloc_tsd_no_cleanup &&			\
+	    wrapper->initialized) {					\
+		a_type val = wrapper->val;				\
+		a_type tsd_static_data = a_initializer;			\
+		wrapper->initialized = false;				\
+		wrapper->val = tsd_static_data;				\
+		a_cleanup(&val);					\
+		if (wrapper->initialized) {				\
+			/* Trigger another cleanup round. */		\
+			return (true);					\
+		}							\
+	}								\
+	malloc_tsd_dalloc(wrapper);					\
+	return (false);							\
+}									\
+a_attr bool								\
+a_name##_tsd_boot(void)							\
+{									\
+									\
+	a_name##_tsd = TlsAlloc();					\
+	if (a_name##_tsd == TLS_OUT_OF_INDEXES)				\
+		return (true);						\
+	if (a_cleanup != malloc_tsd_no_cleanup) {			\
+		malloc_tsd_cleanup_register(				\
+		    &a_name##_tsd_cleanup_wrapper);			\
+	}								\
+	a_name##_booted = true;						\
+	return (false);							\
+}									\
+/* Get/set. */								\
+a_attr a_name##_tsd_wrapper_t *						\
+a_name##_tsd_get_wrapper(void)						\
+{									\
+	a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)	\
+	    TlsGetValue(a_name##_tsd);					\
+									\
+	if (wrapper == NULL) {						\
+		wrapper = (a_name##_tsd_wrapper_t *)			\
+		    malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t));	\
+		if (wrapper == NULL) {					\
+			malloc_write("<jemalloc>: Error allocating"	\
+			    " TSD for "#a_name"\n");			\
+			abort();					\
+		} else {						\
+			static a_type tsd_static_data = a_initializer;	\
+			wrapper->initialized = false;			\
+			wrapper->val = tsd_static_data;			\
+		}							\
+		if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) {	\
+			malloc_write("<jemalloc>: Error setting"	\
+			    " TSD for "#a_name"\n");			\
+			abort();					\
+		}							\
+	}								\
+	return (wrapper);						\
+}									\
+a_attr a_type *								\
+a_name##_tsd_get(void)							\
+{									\
+	a_name##_tsd_wrapper_t *wrapper;				\
+									\
+	assert(a_name##_booted);					\
+	wrapper = a_name##_tsd_get_wrapper();				\
+	return (&wrapper->val);						\
+}									\
+a_attr void								\
+a_name##_tsd_set(a_type *val)						\
+{									\
+	a_name##_tsd_wrapper_t *wrapper;				\
+									\
+	assert(a_name##_booted);					\
+	wrapper = a_name##_tsd_get_wrapper();				\
+	wrapper->val = *(val);						\
+	if (a_cleanup != malloc_tsd_no_cleanup)				\
+		wrapper->initialized = true;				\
+}
 #else
 #define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
     a_cleanup)								\
diff --git a/contrib/jemalloc/include/jemalloc/internal/util.h b/contrib/jemalloc/include/jemalloc/internal/util.h
index d360ae3f5a8f..8479693631ab 100644
--- a/contrib/jemalloc/include/jemalloc/internal/util.h
+++ b/contrib/jemalloc/include/jemalloc/internal/util.h
@@ -82,10 +82,9 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-extern void	(*je_malloc_message)(void *wcbopaque, const char *s);
-
-int	buferror(int errnum, char *buf, size_t buflen);
+int	buferror(char *buf, size_t buflen);
 uintmax_t	malloc_strtoumax(const char *nptr, char **endptr, int base);
+void	malloc_write(const char *s);
 
 /*
  * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
@@ -109,6 +108,8 @@ void	malloc_printf(const char *format, ...)
 #ifndef JEMALLOC_ENABLE_INLINE
 size_t	pow2_ceil(size_t x);
 void	malloc_write(const char *s);
+void	set_errno(int errnum);
+int	get_errno(void);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
@@ -130,15 +131,28 @@ pow2_ceil(size_t x)
 	return (x);
 }
 
-/*
- * Wrapper around malloc_message() that avoids the need for
- * je_malloc_message(...) throughout the code.
- */
+/* Sets error code */
 JEMALLOC_INLINE void
-malloc_write(const char *s)
+set_errno(int errnum)
 {
 
-	je_malloc_message(NULL, s);
+#ifdef _WIN32
+	SetLastError(errnum);
+#else
+	errno = errnum;
+#endif
+}
+
+/* Get last error code */
+JEMALLOC_INLINE int
+get_errno(void)
+{
+
+#ifdef _WIN32
+	return (GetLastError());
+#else
+	return (errno);
+#endif
 }
 #endif
 
diff --git a/contrib/jemalloc/include/jemalloc/jemalloc.h b/contrib/jemalloc/include/jemalloc/jemalloc.h
index 6322d9773c17..dbb738668b3d 100644
--- a/contrib/jemalloc/include/jemalloc/jemalloc.h
+++ b/contrib/jemalloc/include/jemalloc/jemalloc.h
@@ -7,12 +7,12 @@ extern "C" {
 #include <limits.h>
 #include <strings.h>
 
-#define	JEMALLOC_VERSION "1.0.0-286-ga8f8d7540d66ddee7337db80c92890916e1063ca"
+#define	JEMALLOC_VERSION "1.0.0-335-g37b6f95dcd866f51c91488531a2efc3ed4c2b754"
 #define	JEMALLOC_VERSION_MAJOR 1
 #define	JEMALLOC_VERSION_MINOR 0
 #define	JEMALLOC_VERSION_BUGFIX 0
-#define	JEMALLOC_VERSION_NREV 286
-#define	JEMALLOC_VERSION_GID "a8f8d7540d66ddee7337db80c92890916e1063ca"
+#define	JEMALLOC_VERSION_NREV 335
+#define	JEMALLOC_VERSION_GID "37b6f95dcd866f51c91488531a2efc3ed4c2b754"
 
 #include "jemalloc_defs.h"
 #include "jemalloc_FreeBSD.h"
@@ -37,35 +37,49 @@ extern "C" {
  * namespace management, and should be omitted in application code unless
  * JEMALLOC_NO_DEMANGLE is defined (see below).
  */
-extern const char	*je_malloc_conf;
-extern void		(*je_malloc_message)(void *, const char *);
+extern JEMALLOC_EXPORT const char	*je_malloc_conf;
+extern JEMALLOC_EXPORT void		(*je_malloc_message)(void *cbopaque,
+    const char *s);
 
-void	*je_malloc(size_t size) JEMALLOC_ATTR(malloc);
-void	*je_calloc(size_t num, size_t size) JEMALLOC_ATTR(malloc);
-int	je_posix_memalign(void **memptr, size_t alignment, size_t size)
-    JEMALLOC_ATTR(nonnull(1));
-void	*je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_ATTR(malloc);
-void	*je_realloc(void *ptr, size_t size);
-void	je_free(void *ptr);
+JEMALLOC_EXPORT void	*je_malloc(size_t size) JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT void	*je_calloc(size_t num, size_t size)
+    JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT int	je_posix_memalign(void **memptr, size_t alignment,
+    size_t size) JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT void	*je_aligned_alloc(size_t alignment, size_t size)
+    JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT void	*je_realloc(void *ptr, size_t size);
+JEMALLOC_EXPORT void	je_free(void *ptr);
 
-size_t	je_malloc_usable_size(const void *ptr);
-void	je_malloc_stats_print(void (*write_cb)(void *, const char *),
-    void *je_cbopaque, const char *opts);
-int	je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen);
-int	je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp);
-int	je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp,
+#ifdef JEMALLOC_OVERRIDE_MEMALIGN
+JEMALLOC_EXPORT void *	je_memalign(size_t alignment, size_t size)
+    JEMALLOC_ATTR(malloc);
+#endif
+
+#ifdef JEMALLOC_OVERRIDE_VALLOC
+JEMALLOC_EXPORT void *	je_valloc(size_t size) JEMALLOC_ATTR(malloc);
+#endif
+
+JEMALLOC_EXPORT size_t	je_malloc_usable_size(const void *ptr);
+JEMALLOC_EXPORT void	je_malloc_stats_print(void (*write_cb)(void *,
+    const char *), void *je_cbopaque, const char *opts);
+JEMALLOC_EXPORT int	je_mallctl(const char *name, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
+JEMALLOC_EXPORT int	je_mallctlnametomib(const char *name, size_t *mibp,
+    size_t *miblenp);
+JEMALLOC_EXPORT int	je_mallctlbymib(const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 #ifdef JEMALLOC_EXPERIMENTAL
-int	je_allocm(void **ptr, size_t *rsize, size_t size, int flags)
-    JEMALLOC_ATTR(nonnull(1));
-int	je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra,
+JEMALLOC_EXPORT int	je_allocm(void **ptr, size_t *rsize, size_t size,
     int flags) JEMALLOC_ATTR(nonnull(1));
-int	je_sallocm(const void *ptr, size_t *rsize, int flags)
+JEMALLOC_EXPORT int	je_rallocm(void **ptr, size_t *rsize, size_t size,
+    size_t extra, int flags) JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int	je_sallocm(const void *ptr, size_t *rsize, int flags)
     JEMALLOC_ATTR(nonnull(1));
-int	je_dallocm(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1));
-int	je_nallocm(size_t *rsize, size_t size, int flags);
+JEMALLOC_EXPORT int	je_dallocm(void *ptr, int flags)
+    JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int	je_nallocm(size_t *rsize, size_t size, int flags);
 #endif
 
 /*
diff --git a/contrib/jemalloc/include/jemalloc/jemalloc_defs.h b/contrib/jemalloc/include/jemalloc/jemalloc_defs.h
index 7ef7dcfbed2d..85571b9a6594 100644
--- a/contrib/jemalloc/include/jemalloc/jemalloc_defs.h
+++ b/contrib/jemalloc/include/jemalloc/jemalloc_defs.h
@@ -105,11 +105,27 @@
 /* Defined if __attribute__((...)) syntax is supported. */
 #define JEMALLOC_HAVE_ATTR 
 #ifdef JEMALLOC_HAVE_ATTR
-#  define JEMALLOC_CATTR(s, a) __attribute__((s))
-#  define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,)
+#  define JEMALLOC_ATTR(s) __attribute__((s))
+#  define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
+#  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
+#  define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
+#  define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
+#elif _MSC_VER
+#  define JEMALLOC_ATTR(s)
+#  ifdef DLLEXPORT
+#    define JEMALLOC_EXPORT __declspec(dllexport)
+#  else
+#    define JEMALLOC_EXPORT __declspec(dllimport)
+#  endif
+#  define JEMALLOC_ALIGNED(s) __declspec(align(s))
+#  define JEMALLOC_SECTION(s) __declspec(allocate(s))
+#  define JEMALLOC_NOINLINE __declspec(noinline)
 #else
-#  define JEMALLOC_CATTR(s, a) a
-#  define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,)
+#  define JEMALLOC_ATTR(s)
+#  define JEMALLOC_EXPORT
+#  define JEMALLOC_ALIGNED(s)
+#  define JEMALLOC_SECTION(s)
+#  define JEMALLOC_NOINLINE
 #endif
 
 /* Defined if sbrk() is supported. */
@@ -178,12 +194,18 @@
 
 /*
  * If defined, use munmap() to unmap freed chunks, rather than storing them for
- * later reuse.  This is automatically disabled if configuration determines
- * that common sequences of mmap()/munmap() calls will cause virtual memory map
- * holes.
+ * later reuse.  This is disabled by default on Linux because common sequences
+ * of mmap()/munmap() calls will cause virtual memory map holes.
  */
 #define JEMALLOC_MUNMAP 
 
+/*
+ * If defined, use mremap(...MREMAP_FIXED...) for huge realloc().  This is
+ * disabled by default because it is Linux-specific and it will cause virtual
+ * memory map holes, much like munmap(2) does.
+ */
+/* #undef JEMALLOC_MREMAP */
+
 /* TLS is used to map arenas and magazine caches to threads. */
 #define JEMALLOC_TLS 
 
@@ -206,9 +228,6 @@
 /* #undef JEMALLOC_ZONE */
 /* #undef JEMALLOC_ZONE_VERSION */
 
-/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */
-/* #undef JEMALLOC_MREMAP_FIXED */
-
 /*
  * Methods for purging unused pages differ between operating systems.
  *
diff --git a/contrib/jemalloc/src/arena.c b/contrib/jemalloc/src/arena.c
index 6f28abe94d44..9f24e7caf61b 100644
--- a/contrib/jemalloc/src/arena.c
+++ b/contrib/jemalloc/src/arena.c
@@ -7,7 +7,7 @@
 ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
 arena_bin_info_t	arena_bin_info[NBINS];
 
-JEMALLOC_ATTR(aligned(CACHELINE))
+JEMALLOC_ALIGNED(CACHELINE)
 const uint8_t	small_size2bin[] = {
 #define	S2B_8(i)	i,
 #define	S2B_16(i)	S2B_8(i) S2B_8(i)
@@ -41,11 +41,11 @@ const uint8_t	small_size2bin[] = {
 /* Function prototypes for non-inline static functions. */
 
 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
-    bool large, bool zero);
+    bool large, size_t binind, bool zero);
 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
-    bool zero);
+    size_t binind, bool zero);
 static void	arena_purge(arena_t *arena, bool all);
 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
 static void	arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
@@ -152,7 +152,9 @@ static inline void
 arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-	size_t binind = arena_bin_index(chunk->arena, run->bin);
+	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+	size_t mapbits = arena_mapbits_get(chunk, pageind);
+	size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
 	arena_bin_info_t *bin_info = &arena_bin_info[binind];
 	unsigned regind = arena_run_regind(run, bin_info, ptr);
 	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
@@ -184,28 +186,31 @@ arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
 
 static void
 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
-    bool zero)
+    size_t binind, bool zero)
 {
 	arena_chunk_t *chunk;
 	size_t run_ind, total_pages, need_pages, rem_pages, i;
 	size_t flag_dirty;
 	arena_avail_tree_t *runs_avail;
 
+	assert((large && binind == BININD_INVALID) || (large == false && binind
+	    != BININD_INVALID));
+
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
-	flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY;
+	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
 	runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty :
 	    &arena->runs_avail_clean;
-	total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >>
+	total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >>
 	    LG_PAGE;
-	assert((chunk->map[run_ind+total_pages-1-map_bias].bits &
-	    CHUNK_MAP_DIRTY) == flag_dirty);
+	assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
+	    flag_dirty);
 	need_pages = (size >> LG_PAGE);
 	assert(need_pages > 0);
 	assert(need_pages <= total_pages);
 	rem_pages = total_pages - need_pages;
 
-	arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]);
+	arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind));
 	if (config_stats) {
 		/*
 		 * Update stats_cactive if nactive is crossing a chunk
@@ -222,22 +227,23 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 	/* Keep track of trailing unused pages for later use. */
 	if (rem_pages > 0) {
 		if (flag_dirty != 0) {
-			chunk->map[run_ind+need_pages-map_bias].bits =
-			    (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY;
-			chunk->map[run_ind+total_pages-1-map_bias].bits =
-			    (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY;
+			arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
+			    (rem_pages << LG_PAGE), CHUNK_MAP_DIRTY);
+			arena_mapbits_unallocated_set(chunk,
+			    run_ind+total_pages-1, (rem_pages << LG_PAGE),
+			    CHUNK_MAP_DIRTY);
 		} else {
-			chunk->map[run_ind+need_pages-map_bias].bits =
-			    (rem_pages << LG_PAGE) |
-			    (chunk->map[run_ind+need_pages-map_bias].bits &
-			    CHUNK_MAP_UNZEROED);
-			chunk->map[run_ind+total_pages-1-map_bias].bits =
-			    (rem_pages << LG_PAGE) |
-			    (chunk->map[run_ind+total_pages-1-map_bias].bits &
-			    CHUNK_MAP_UNZEROED);
+			arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
+			    (rem_pages << LG_PAGE),
+			    arena_mapbits_unzeroed_get(chunk,
+			    run_ind+need_pages));
+			arena_mapbits_unallocated_set(chunk,
+			    run_ind+total_pages-1, (rem_pages << LG_PAGE),
+			    arena_mapbits_unzeroed_get(chunk,
+			    run_ind+total_pages-1));
 		}
-		arena_avail_tree_insert(runs_avail,
-		    &chunk->map[run_ind+need_pages-map_bias]);
+		arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk,
+		    run_ind+need_pages));
 	}
 
 	/* Update dirty page accounting. */
@@ -258,8 +264,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 				 * zeroed (i.e. never before touched).
 				 */
 				for (i = 0; i < need_pages; i++) {
-					if ((chunk->map[run_ind+i-map_bias].bits
-					    & CHUNK_MAP_UNZEROED) != 0) {
+					if (arena_mapbits_unzeroed_get(chunk,
+					    run_ind+i) != 0) {
 						VALGRIND_MAKE_MEM_UNDEFINED(
 						    (void *)((uintptr_t)
 						    chunk + ((run_ind+i) <<
@@ -293,10 +299,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 		 * Set the last element first, in case the run only contains one
 		 * page (i.e. both statements set the same element).
 		 */
-		chunk->map[run_ind+need_pages-1-map_bias].bits =
-		    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty;
-		chunk->map[run_ind-map_bias].bits = size | flag_dirty |
-		    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+		arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0,
+		    flag_dirty);
+		arena_mapbits_large_set(chunk, run_ind, size, flag_dirty);
 	} else {
 		assert(zero == false);
 		/*
@@ -304,34 +309,30 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 		 * small run, so that arena_dalloc_bin_run() has the ability to
 		 * conditionally trim clean pages.
 		 */
-		chunk->map[run_ind-map_bias].bits =
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) |
-		    CHUNK_MAP_ALLOCATED | flag_dirty;
+		arena_mapbits_small_set(chunk, run_ind, 0, binind,
+		    arena_mapbits_unzeroed_get(chunk, run_ind) | flag_dirty);
 		/*
 		 * The first page will always be dirtied during small run
 		 * initialization, so a validation failure here would not
 		 * actually cause an observable failure.
 		 */
 		if (config_debug && flag_dirty == 0 &&
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED)
-		    == 0)
+		    arena_mapbits_unzeroed_get(chunk, run_ind) == 0)
 			arena_chunk_validate_zeroed(chunk, run_ind);
 		for (i = 1; i < need_pages - 1; i++) {
-			chunk->map[run_ind+i-map_bias].bits = (i << LG_PAGE)
-			    | (chunk->map[run_ind+i-map_bias].bits &
-			    CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED;
+			arena_mapbits_small_set(chunk, run_ind+i, i,
+			    binind, arena_mapbits_unzeroed_get(chunk,
+			    run_ind+i));
 			if (config_debug && flag_dirty == 0 &&
-			    (chunk->map[run_ind+i-map_bias].bits &
-			    CHUNK_MAP_UNZEROED) == 0)
+			    arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0)
 				arena_chunk_validate_zeroed(chunk, run_ind+i);
 		}
-		chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages
-		    - 1) << LG_PAGE) |
-		    (chunk->map[run_ind+need_pages-1-map_bias].bits &
-		    CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty;
+		arena_mapbits_small_set(chunk, run_ind+need_pages-1,
+		    need_pages-1, binind, arena_mapbits_unzeroed_get(chunk,
+		    run_ind+need_pages-1) | flag_dirty);
 		if (config_debug && flag_dirty == 0 &&
-		    (chunk->map[run_ind+need_pages-1-map_bias].bits &
-		    CHUNK_MAP_UNZEROED) == 0) {
+		    arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) ==
+		    0) {
 			arena_chunk_validate_zeroed(chunk,
 			    run_ind+need_pages-1);
 		}
@@ -351,17 +352,18 @@ arena_chunk_alloc(arena_t *arena)
 		arena->spare = NULL;
 
 		/* Insert the run into the appropriate runs_avail_* tree. */
-		if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0)
+		if (arena_mapbits_dirty_get(chunk, map_bias) == 0)
 			runs_avail = &arena->runs_avail_clean;
 		else
 			runs_avail = &arena->runs_avail_dirty;
-		assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass);
-		assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK)
-		    == arena_maxclass);
-		assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) ==
-		    (chunk->map[chunk_npages-1-map_bias].bits &
-		    CHUNK_MAP_DIRTY));
-		arena_avail_tree_insert(runs_avail, &chunk->map[0]);
+		assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
+		    arena_maxclass);
+		assert(arena_mapbits_unallocated_size_get(chunk,
+		    chunk_npages-1) == arena_maxclass);
+		assert(arena_mapbits_dirty_get(chunk, map_bias) ==
+		    arena_mapbits_dirty_get(chunk, chunk_npages-1));
+		arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk,
+		    map_bias));
 	} else {
 		bool zero;
 		size_t unzeroed;
@@ -392,24 +394,27 @@ arena_chunk_alloc(arena_t *arena)
 		 * chunk.
 		 */
 		unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED;
-		chunk->map[0].bits = arena_maxclass | unzeroed;
+		arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass,
+		    unzeroed);
 		/*
 		 * There is no need to initialize the internal page map entries
 		 * unless the chunk is not zeroed.
 		 */
 		if (zero == false) {
 			for (i = map_bias+1; i < chunk_npages-1; i++)
-				chunk->map[i-map_bias].bits = unzeroed;
+				arena_mapbits_unzeroed_set(chunk, i, unzeroed);
 		} else if (config_debug) {
-			for (i = map_bias+1; i < chunk_npages-1; i++)
-				assert(chunk->map[i-map_bias].bits == unzeroed);
+			for (i = map_bias+1; i < chunk_npages-1; i++) {
+				assert(arena_mapbits_unzeroed_get(chunk, i) ==
+				    unzeroed);
+			}
 		}
-		chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass |
-		    unzeroed;
+		arena_mapbits_unallocated_set(chunk, chunk_npages-1,
+		    arena_maxclass, unzeroed);
 
 		/* Insert the run into the runs_avail_clean tree. */
 		arena_avail_tree_insert(&arena->runs_avail_clean,
-		    &chunk->map[0]);
+		    arena_mapp_get(chunk, map_bias));
 	}
 
 	return (chunk);
@@ -424,11 +429,11 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
 	 * Remove run from the appropriate runs_avail_* tree, so that the arena
 	 * does not use it.
 	 */
-	if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0)
+	if (arena_mapbits_dirty_get(chunk, map_bias) == 0)
 		runs_avail = &arena->runs_avail_clean;
 	else
 		runs_avail = &arena->runs_avail_dirty;
-	arena_avail_tree_remove(runs_avail, &chunk->map[0]);
+	arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias));
 
 	if (arena->spare != NULL) {
 		arena_chunk_t *spare = arena->spare;
@@ -449,7 +454,8 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
 }
 
 static arena_run_t *
-arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
+arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind,
+    bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -457,6 +463,8 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
 
 	assert(size <= arena_maxclass);
 	assert((size & PAGE_MASK) == 0);
+	assert((large && binind == BININD_INVALID) || (large == false && binind
+	    != BININD_INVALID));
 
 	/* Search the arena's chunks for the lowest best fit. */
 	key.bits = size | CHUNK_MAP_KEY;
@@ -469,7 +477,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
 
 		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
 		    LG_PAGE));
-		arena_run_split(arena, run, size, large, zero);
+		arena_run_split(arena, run, size, large, binind, zero);
 		return (run);
 	}
 	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
@@ -481,7 +489,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
 
 		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
 		    LG_PAGE));
-		arena_run_split(arena, run, size, large, zero);
+		arena_run_split(arena, run, size, large, binind, zero);
 		return (run);
 	}
 
@@ -491,7 +499,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
 	chunk = arena_chunk_alloc(arena);
 	if (chunk != NULL) {
 		run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE));
-		arena_run_split(arena, run, size, large, zero);
+		arena_run_split(arena, run, size, large, binind, zero);
 		return (run);
 	}
 
@@ -509,7 +517,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
 
 		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
 		    LG_PAGE));
-		arena_run_split(arena, run, size, large, zero);
+		arena_run_split(arena, run, size, large, binind, zero);
 		return (run);
 	}
 	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
@@ -521,7 +529,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
 
 		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
 		    LG_PAGE));
-		arena_run_split(arena, run, size, large, zero);
+		arena_run_split(arena, run, size, large, binind, zero);
 		return (run);
 	}
 
@@ -579,40 +587,38 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 	 * run.
 	 */
 	if (chunk == arena->spare) {
-		assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0);
+		assert(arena_mapbits_dirty_get(chunk, map_bias) != 0);
 		arena_chunk_alloc(arena);
 	}
 
 	/* Temporarily allocate all free dirty runs within chunk. */
 	for (pageind = map_bias; pageind < chunk_npages;) {
-		mapelm = &chunk->map[pageind-map_bias];
-		if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) {
+		mapelm = arena_mapp_get(chunk, pageind);
+		if (arena_mapbits_allocated_get(chunk, pageind) == 0) {
 			size_t npages;
 
-			npages = mapelm->bits >> LG_PAGE;
+			npages = arena_mapbits_unallocated_size_get(chunk,
+			    pageind) >> LG_PAGE;
 			assert(pageind + npages <= chunk_npages);
-			if (mapelm->bits & CHUNK_MAP_DIRTY) {
+			if (arena_mapbits_dirty_get(chunk, pageind)) {
 				size_t i;
 
 				arena_avail_tree_remove(
 				    &arena->runs_avail_dirty, mapelm);
 
-				mapelm->bits = (npages << LG_PAGE) |
-				    flag_unzeroed | CHUNK_MAP_LARGE |
-				    CHUNK_MAP_ALLOCATED;
+				arena_mapbits_large_set(chunk, pageind,
+				    (npages << LG_PAGE), flag_unzeroed);
 				/*
 				 * Update internal elements in the page map, so
 				 * that CHUNK_MAP_UNZEROED is properly set.
 				 */
 				for (i = 1; i < npages - 1; i++) {
-					chunk->map[pageind+i-map_bias].bits =
-					    flag_unzeroed;
+					arena_mapbits_unzeroed_set(chunk,
+					    pageind+i, flag_unzeroed);
 				}
 				if (npages > 1) {
-					chunk->map[
-					    pageind+npages-1-map_bias].bits =
-					    flag_unzeroed | CHUNK_MAP_LARGE |
-					    CHUNK_MAP_ALLOCATED;
+					arena_mapbits_large_set(chunk,
+					    pageind+npages-1, 0, flag_unzeroed);
 				}
 
 				if (config_stats) {
@@ -637,17 +643,19 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 			pageind += npages;
 		} else {
 			/* Skip allocated run. */
-			if (mapelm->bits & CHUNK_MAP_LARGE)
-				pageind += mapelm->bits >> LG_PAGE;
+			if (arena_mapbits_large_get(chunk, pageind))
+				pageind += arena_mapbits_large_size_get(chunk,
+				    pageind) >> LG_PAGE;
 			else {
+				size_t binind;
+				arena_bin_info_t *bin_info;
 				arena_run_t *run = (arena_run_t *)((uintptr_t)
 				    chunk + (uintptr_t)(pageind << LG_PAGE));
 
-				assert((mapelm->bits >> LG_PAGE) == 0);
-				size_t binind = arena_bin_index(arena,
-				    run->bin);
-				arena_bin_info_t *bin_info =
-				    &arena_bin_info[binind];
+				assert(arena_mapbits_small_runind_get(chunk,
+				    pageind) == 0);
+				binind = arena_bin_index(arena, run->bin);
+				bin_info = &arena_bin_info[binind];
 				pageind += bin_info->run_size >> LG_PAGE;
 			}
 		}
@@ -669,7 +677,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 	ql_foreach(mapelm, &mapelms, u.ql_link) {
 		size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
 		    sizeof(arena_chunk_map_t)) + map_bias;
-		size_t npages = mapelm->bits >> LG_PAGE;
+		size_t npages = arena_mapbits_large_size_get(chunk, pageind) >>
+		    LG_PAGE;
 
 		assert(pageind + npages <= chunk_npages);
 		assert(ndirty >= npages);
@@ -806,15 +815,11 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
 	assert(run_ind >= map_bias);
 	assert(run_ind < chunk_npages);
-	if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) {
-		size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK;
+	if (arena_mapbits_large_get(chunk, run_ind) != 0) {
+		size = arena_mapbits_large_size_get(chunk, run_ind);
 		assert(size == PAGE ||
-		    (chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits &
-		    ~PAGE_MASK) == 0);
-		assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits &
-		    CHUNK_MAP_LARGE) != 0);
-		assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits &
-		    CHUNK_MAP_ALLOCATED) != 0);
+		    arena_mapbits_large_size_get(chunk,
+		    run_ind+(size>>LG_PAGE)-1) == 0);
 	} else {
 		size_t binind = arena_bin_index(arena, run->bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
@@ -837,7 +842,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 	 * The run is dirty if the caller claims to have dirtied it, as well as
 	 * if it was already dirty before being allocated.
 	 */
-	if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0)
+	if (arena_mapbits_dirty_get(chunk, run_ind) != 0)
 		dirty = true;
 	flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
 	runs_avail = dirty ? &arena->runs_avail_dirty :
@@ -845,58 +850,52 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 
 	/* Mark pages as unallocated in the chunk map. */
 	if (dirty) {
-		chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY;
-		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
-		    CHUNK_MAP_DIRTY;
+		arena_mapbits_unallocated_set(chunk, run_ind, size,
+		    CHUNK_MAP_DIRTY);
+		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
+		    CHUNK_MAP_DIRTY);
 
 		chunk->ndirty += run_pages;
 		arena->ndirty += run_pages;
 	} else {
-		chunk->map[run_ind-map_bias].bits = size |
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED);
-		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
-		    (chunk->map[run_ind+run_pages-1-map_bias].bits &
-		    CHUNK_MAP_UNZEROED);
+		arena_mapbits_unallocated_set(chunk, run_ind, size,
+		    arena_mapbits_unzeroed_get(chunk, run_ind));
+		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
+		    arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1));
 	}
 
 	/* Try to coalesce forward. */
 	if (run_ind + run_pages < chunk_npages &&
-	    (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED)
-	    == 0 && (chunk->map[run_ind+run_pages-map_bias].bits &
-	    CHUNK_MAP_DIRTY) == flag_dirty) {
-		size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits &
-		    ~PAGE_MASK;
+	    arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 &&
+	    arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) {
+		size_t nrun_size = arena_mapbits_unallocated_size_get(chunk,
+		    run_ind+run_pages);
 		size_t nrun_pages = nrun_size >> LG_PAGE;
 
 		/*
 		 * Remove successor from runs_avail; the coalesced run is
 		 * inserted later.
 		 */
-		assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
-		    & ~PAGE_MASK) == nrun_size);
-		assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
-		    & CHUNK_MAP_ALLOCATED) == 0);
-		assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
-		    & CHUNK_MAP_DIRTY) == flag_dirty);
+		assert(arena_mapbits_unallocated_size_get(chunk,
+		    run_ind+run_pages+nrun_pages-1) == nrun_size);
+		assert(arena_mapbits_dirty_get(chunk,
+		    run_ind+run_pages+nrun_pages-1) == flag_dirty);
 		arena_avail_tree_remove(runs_avail,
-		    &chunk->map[run_ind+run_pages-map_bias]);
+		    arena_mapp_get(chunk, run_ind+run_pages));
 
 		size += nrun_size;
 		run_pages += nrun_pages;
 
-		chunk->map[run_ind-map_bias].bits = size |
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK);
-		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
-		    (chunk->map[run_ind+run_pages-1-map_bias].bits &
-		    CHUNK_MAP_FLAGS_MASK);
+		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
+		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
+		    size);
 	}
 
 	/* Try to coalesce backward. */
-	if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits &
-	    CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits &
-	    CHUNK_MAP_DIRTY) == flag_dirty) {
-		size_t prun_size = chunk->map[run_ind-1-map_bias].bits &
-		    ~PAGE_MASK;
+	if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1)
+	    == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == flag_dirty) {
+		size_t prun_size = arena_mapbits_unallocated_size_get(chunk,
+		    run_ind-1);
 		size_t prun_pages = prun_size >> LG_PAGE;
 
 		run_ind -= prun_pages;
@@ -905,31 +904,26 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 		 * Remove predecessor from runs_avail; the coalesced run is
 		 * inserted later.
 		 */
-		assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK)
-		    == prun_size);
-		assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED)
-		    == 0);
-		assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY)
-		    == flag_dirty);
-		arena_avail_tree_remove(runs_avail,
-		    &chunk->map[run_ind-map_bias]);
+		assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
+		    prun_size);
+		assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
+		arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk,
+		    run_ind));
 
 		size += prun_size;
 		run_pages += prun_pages;
 
-		chunk->map[run_ind-map_bias].bits = size |
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK);
-		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
-		    (chunk->map[run_ind+run_pages-1-map_bias].bits &
-		    CHUNK_MAP_FLAGS_MASK);
+		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
+		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
+		    size);
 	}
 
 	/* Insert into runs_avail, now that coalescing is complete. */
-	assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) ==
-	    (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK));
-	assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) ==
-	    (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY));
-	arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]);
+	assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
+	    arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1));
+	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
+	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
+	arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind));
 
 	if (dirty) {
 		/*
@@ -943,14 +937,15 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 		}
 	}
 
-	/*
-	 * Deallocate chunk if it is now completely unused.  The bit
-	 * manipulation checks whether the first run is unallocated and extends
-	 * to the end of the chunk.
-	 */
-	if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) ==
-	    arena_maxclass)
+	/* Deallocate chunk if it is now completely unused. */
+	if (size == arena_maxclass) {
+		assert(run_ind == map_bias);
+		assert(run_pages == (arena_maxclass >> LG_PAGE));
+		assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
+		assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
+		    arena_maxclass);
 		arena_chunk_dealloc(arena, chunk);
+	}
 
 	/*
 	 * It is okay to do dirty page processing here even if the chunk was
@@ -969,7 +964,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 {
 	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
 	size_t head_npages = (oldsize - newsize) >> LG_PAGE;
-	size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY;
+	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
 
 	assert(oldsize > newsize);
 
@@ -978,29 +973,21 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	 * leading run as separately allocated.  Set the last element of each
 	 * run first, in case of single-page runs.
 	 */
-	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0);
-	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0);
-	chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty |
-	    (chunk->map[pageind+head_npages-1-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-	chunk->map[pageind-map_bias].bits = (oldsize - newsize)
-	    | flag_dirty | (chunk->map[pageind-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
+	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
+	    arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1));
+	arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty |
+	    arena_mapbits_unzeroed_get(chunk, pageind));
 
 	if (config_debug) {
 		UNUSED size_t tail_npages = newsize >> LG_PAGE;
-		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
-		    .bits & ~PAGE_MASK) == 0);
-		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
-		    .bits & CHUNK_MAP_DIRTY) == flag_dirty);
-		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
-		    .bits & CHUNK_MAP_LARGE) != 0);
-		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
-		    .bits & CHUNK_MAP_ALLOCATED) != 0);
+		assert(arena_mapbits_large_size_get(chunk,
+		    pageind+head_npages+tail_npages-1) == 0);
+		assert(arena_mapbits_dirty_get(chunk,
+		    pageind+head_npages+tail_npages-1) == flag_dirty);
 	}
-	chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty |
-	    (chunk->map[pageind+head_npages-map_bias].bits &
-	    CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty
+	    | arena_mapbits_unzeroed_get(chunk, pageind+head_npages));
 
 	arena_run_dalloc(arena, run, false);
 }
@@ -1011,9 +998,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 {
 	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
 	size_t head_npages = newsize >> LG_PAGE;
-	size_t tail_npages = (oldsize - newsize) >> LG_PAGE;
-	size_t flag_dirty = chunk->map[pageind-map_bias].bits &
-	    CHUNK_MAP_DIRTY;
+	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
 
 	assert(oldsize > newsize);
 
@@ -1022,28 +1007,22 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	 * trailing run as separately allocated.  Set the last element of each
 	 * run first, in case of single-page runs.
 	 */
-	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0);
-	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0);
-	chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty |
-	    (chunk->map[pageind+head_npages-1-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-	chunk->map[pageind-map_bias].bits = newsize | flag_dirty |
-	    (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) |
-	    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
+	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
+	    arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1));
+	arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty |
+	    arena_mapbits_unzeroed_get(chunk, pageind));
 
-	assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
-	    ~PAGE_MASK) == 0);
-	assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
-	    CHUNK_MAP_LARGE) != 0);
-	assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
-	    CHUNK_MAP_ALLOCATED) != 0);
-	chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits =
-	    flag_dirty |
-	    (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-	chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) |
-	    flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	if (config_debug) {
+		UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE;
+		assert(arena_mapbits_large_size_get(chunk,
+		    pageind+head_npages+tail_npages-1) == 0);
+		assert(arena_mapbits_dirty_get(chunk,
+		    pageind+head_npages+tail_npages-1) == flag_dirty);
+	}
+	arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize,
+	    flag_dirty | arena_mapbits_unzeroed_get(chunk,
+	    pageind+head_npages));
 
 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
 	    dirty);
@@ -1056,12 +1035,13 @@ arena_bin_runs_first(arena_bin_t *bin)
 	if (mapelm != NULL) {
 		arena_chunk_t *chunk;
 		size_t pageind;
+		arena_run_t *run;
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
 		pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
 		    sizeof(arena_chunk_map_t))) + map_bias;
-		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-		    (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) <<
+		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+		    arena_mapbits_small_runind_get(chunk, pageind)) <<
 		    LG_PAGE));
 		return (run);
 	}
@@ -1074,7 +1054,7 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
 {
 	arena_chunk_t *chunk = CHUNK_ADDR2BASE(run);
 	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
+	arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
 
 	assert(arena_run_tree_search(&bin->runs, mapelm) == NULL);
 
@@ -1086,7 +1066,7 @@ arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
+	arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
 
 	assert(arena_run_tree_search(&bin->runs, mapelm) != NULL);
 
@@ -1125,12 +1105,14 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
 	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc(arena, bin_info->run_size, false, false);
+	run = arena_run_alloc(arena, bin_info->run_size, false, binind, false);
 	if (run != NULL) {
 		bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
 		    (uintptr_t)bin_info->bitmap_offset);
 
 		/* Initialize run internals. */
+		VALGRIND_MAKE_MEM_UNDEFINED(run, bin_info->reg0_offset -
+		    bin_info->redzone_size);
 		run->bin = bin;
 		run->nextind = 0;
 		run->nfree = bin_info->nregs;
@@ -1381,7 +1363,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
 	/* Large allocation. */
 	size = PAGE_CEILING(size);
 	malloc_mutex_lock(&arena->lock);
-	ret = (void *)arena_run_alloc(arena, size, true, zero);
+	ret = (void *)arena_run_alloc(arena, size, true, BININD_INVALID, zero);
 	if (ret == NULL) {
 		malloc_mutex_unlock(&arena->lock);
 		return (NULL);
@@ -1425,7 +1407,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero)
 	alloc_size = size + alignment - PAGE;
 
 	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc(arena, alloc_size, true, zero);
+	run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, zero);
 	if (run == NULL) {
 		malloc_mutex_unlock(&arena->lock);
 		return (NULL);
@@ -1482,8 +1464,7 @@ arena_prof_promoted(const void *ptr, size_t size)
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	binind = SMALL_SIZE2BIN(size);
 	assert(binind < NBINS);
-	chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits &
-	    ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT);
+	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
 	assert(isalloc(ptr, false) == PAGE);
 	assert(isalloc(ptr, true) == size);
@@ -1521,8 +1502,9 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	size_t npages, run_ind, past;
 
 	assert(run != bin->runcur);
-	assert(arena_run_tree_search(&bin->runs, &chunk->map[
-	    (((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)-map_bias]) == NULL);
+	assert(arena_run_tree_search(&bin->runs,
+	    arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE))
+	    == NULL);
 
 	binind = arena_bin_index(chunk->arena, run->bin);
 	bin_info = &arena_bin_info[binind];
@@ -1542,18 +1524,16 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	 * trim the clean pages before deallocating the dirty portion of the
 	 * run.
 	 */
-	if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past
-	    - run_ind < npages) {
+	if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind <
+	    npages) {
 		/*
 		 * Trim clean pages.  Convert to large run beforehand.  Set the
 		 * last map element first, in case this is a one-page run.
 		 */
-		chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE |
-		    (chunk->map[run_ind+npages-1-map_bias].bits &
-		    CHUNK_MAP_FLAGS_MASK);
-		chunk->map[run_ind-map_bias].bits = bin_info->run_size |
-		    CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits &
-		    CHUNK_MAP_FLAGS_MASK);
+		arena_mapbits_large_set(chunk, run_ind+npages-1, 0,
+		    arena_mapbits_unzeroed_get(chunk, run_ind+npages-1));
+		arena_mapbits_large_set(chunk, run_ind, bin_info->run_size,
+		    arena_mapbits_unzeroed_get(chunk, run_ind));
 		arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE),
 		    ((past - run_ind) << LG_PAGE), false);
 		/* npages = past - run_ind; */
@@ -1588,20 +1568,21 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 void
-arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     arena_chunk_map_t *mapelm)
 {
 	size_t pageind;
 	arena_run_t *run;
 	arena_bin_t *bin;
-	size_t size;
+	arena_bin_info_t *bin_info;
+	size_t size, binind;
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
-	    (mapelm->bits >> LG_PAGE)) << LG_PAGE));
+	    arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
 	bin = run->bin;
-	size_t binind = arena_bin_index(arena, bin);
-	arena_bin_info_t *bin_info = &arena_bin_info[binind];
+	binind = arena_ptr_small_binind_get(ptr, mapelm->bits);
+	bin_info = &arena_bin_info[binind];
 	if (config_fill || config_stats)
 		size = bin_info->reg_size;
 
@@ -1621,6 +1602,35 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	}
 }
 
+void
+arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t pageind, arena_chunk_map_t *mapelm)
+{
+	arena_run_t *run;
+	arena_bin_t *bin;
+
+	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+	    arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
+	bin = run->bin;
+	malloc_mutex_lock(&bin->lock);
+	arena_dalloc_bin_locked(arena, chunk, ptr, mapelm);
+	malloc_mutex_unlock(&bin->lock);
+}
+
+void
+arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t pageind)
+{
+	arena_chunk_map_t *mapelm;
+
+	if (config_debug) {
+		/* arena_ptr_small_binind_get() does extra sanity checking. */
+		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
+		    pageind)) != BININD_INVALID);
+	}
+	mapelm = arena_mapp_get(chunk, pageind);
+	arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm);
+}
 void
 arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
     arena_stats_t *astats, malloc_bin_stats_t *bstats,
@@ -1669,12 +1679,12 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
 }
 
 void
-arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 {
 
 	if (config_fill || config_stats) {
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-		size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK;
+		size_t size = arena_mapbits_large_size_get(chunk, pageind);
 
 		if (config_fill && config_stats && opt_junk)
 			memset(ptr, 0x5a, size);
@@ -1689,6 +1699,15 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 	arena_run_dalloc(arena, (arena_run_t *)ptr, true);
 }
 
+void
+arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+{
+
+	malloc_mutex_lock(&arena->lock);
+	arena_dalloc_large_locked(arena, chunk, ptr);
+	malloc_mutex_unlock(&arena->lock);
+}
+
 static void
 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t oldsize, size_t size)
@@ -1727,16 +1746,15 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	size_t npages = oldsize >> LG_PAGE;
 	size_t followsize;
 
-	assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK));
+	assert(oldsize == arena_mapbits_large_size_get(chunk, pageind));
 
 	/* Try to extend the run. */
 	assert(size + extra > oldsize);
 	malloc_mutex_lock(&arena->lock);
 	if (pageind + npages < chunk_npages &&
-	    (chunk->map[pageind+npages-map_bias].bits
-	    & CHUNK_MAP_ALLOCATED) == 0 && (followsize =
-	    chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size -
-	    oldsize) {
+	    arena_mapbits_allocated_get(chunk, pageind+npages) == 0 &&
+	    (followsize = arena_mapbits_unallocated_size_get(chunk,
+	    pageind+npages)) >= size - oldsize) {
 		/*
 		 * The next run is available and sufficiently large.  Split the
 		 * following run, then merge the first part with the existing
@@ -1746,7 +1764,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		size_t splitsize = (oldsize + followsize <= size + extra)
 		    ? followsize : size + extra - oldsize;
 		arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
-		    ((pageind+npages) << LG_PAGE)), splitsize, true, zero);
+		    ((pageind+npages) << LG_PAGE)), splitsize, true,
+		    BININD_INVALID, zero);
 
 		size = oldsize + splitsize;
 		npages = size >> LG_PAGE;
@@ -1759,29 +1778,22 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		 * arena_run_dalloc() with the dirty argument set to false
 		 * (which is when dirty flag consistency would really matter).
 		 */
-		flag_dirty = (chunk->map[pageind-map_bias].bits &
-		    CHUNK_MAP_DIRTY) |
-		    (chunk->map[pageind+npages-1-map_bias].bits &
-		    CHUNK_MAP_DIRTY);
-		chunk->map[pageind-map_bias].bits = size | flag_dirty
-		    | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-		chunk->map[pageind+npages-1-map_bias].bits = flag_dirty |
-		    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+		flag_dirty = arena_mapbits_dirty_get(chunk, pageind) |
+		    arena_mapbits_dirty_get(chunk, pageind+npages-1);
+		arena_mapbits_large_set(chunk, pageind, size, flag_dirty);
+		arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty);
 
 		if (config_stats) {
 			arena->stats.ndalloc_large++;
 			arena->stats.allocated_large -= oldsize;
-			arena->stats.lstats[(oldsize >> LG_PAGE)
-			    - 1].ndalloc++;
-			arena->stats.lstats[(oldsize >> LG_PAGE)
-			    - 1].curruns--;
+			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
+			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
 
 			arena->stats.nmalloc_large++;
 			arena->stats.nrequests_large++;
 			arena->stats.allocated_large += size;
 			arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
-			arena->stats.lstats[(size >> LG_PAGE)
-			    - 1].nrequests++;
+			arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
 			arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
 		}
 		malloc_mutex_unlock(&arena->lock);
@@ -1924,6 +1936,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 * expectation that the extra bytes will be reliably preserved.
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
+	VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
 	memcpy(ret, ptr, copysize);
 	iqalloc(ptr);
 	return (ret);
diff --git a/contrib/jemalloc/src/chunk.c b/contrib/jemalloc/src/chunk.c
index 5426b0275c79..6bc245447977 100644
--- a/contrib/jemalloc/src/chunk.c
+++ b/contrib/jemalloc/src/chunk.c
@@ -30,19 +30,30 @@ size_t		arena_maxclass; /* Max size class for arenas. */
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static void	*chunk_recycle(size_t size, size_t alignment, bool *zero);
+static void	*chunk_recycle(size_t size, size_t alignment, bool base,
+    bool *zero);
 static void	chunk_record(void *chunk, size_t size);
 
 /******************************************************************************/
 
 static void *
-chunk_recycle(size_t size, size_t alignment, bool *zero)
+chunk_recycle(size_t size, size_t alignment, bool base, bool *zero)
 {
 	void *ret;
 	extent_node_t *node;
 	extent_node_t key;
 	size_t alloc_size, leadsize, trailsize;
 
+	if (base) {
+		/*
+		 * This function may need to call base_node_{,de}alloc(), but
+		 * the current chunk allocation request is on behalf of the
+		 * base allocator.  Avoid deadlock (and if that weren't an
+		 * issue, potential for infinite recursion) by returning NULL.
+		 */
+		return (NULL);
+	}
+
 	alloc_size = size + alignment - chunksize;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
@@ -57,8 +68,8 @@ chunk_recycle(size_t size, size_t alignment, bool *zero)
 	}
 	leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
 	    (uintptr_t)node->addr;
-	assert(alloc_size >= leadsize + size);
-	trailsize = alloc_size - leadsize - size;
+	assert(node->size >= leadsize + size);
+	trailsize = node->size - leadsize - size;
 	ret = (void *)((uintptr_t)node->addr + leadsize);
 	/* Remove node from the tree. */
 	extent_tree_szad_remove(&chunks_szad, node);
@@ -123,9 +134,10 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero)
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
+	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = chunk_recycle(size, alignment, zero);
+	ret = chunk_recycle(size, alignment, base, zero);
 	if (ret != NULL)
 		goto label_return;
 
@@ -168,6 +180,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero)
 		size_t i;
 		size_t *p = (size_t *)(uintptr_t)ret;
 
+		VALGRIND_MAKE_MEM_DEFINED(ret, size);
 		for (i = 0; i < size / sizeof(size_t); i++)
 			assert(p[i] == 0);
 	}
@@ -182,50 +195,48 @@ chunk_record(void *chunk, size_t size)
 
 	pages_purge(chunk, size);
 
-	xnode = NULL;
+	/*
+	 * Allocate a node before acquiring chunks_mtx even though it might not
+	 * be needed, because base_node_alloc() may cause a new base chunk to
+	 * be allocated, which could cause deadlock if chunks_mtx were already
+	 * held.
+	 */
+	xnode = base_node_alloc();
+
 	malloc_mutex_lock(&chunks_mtx);
-	while (true) {
-		key.addr = (void *)((uintptr_t)chunk + size);
-		node = extent_tree_ad_nsearch(&chunks_ad, &key);
-		/* Try to coalesce forward. */
-		if (node != NULL && node->addr == key.addr) {
+	key.addr = (void *)((uintptr_t)chunk + size);
+	node = extent_tree_ad_nsearch(&chunks_ad, &key);
+	/* Try to coalesce forward. */
+	if (node != NULL && node->addr == key.addr) {
+		/*
+		 * Coalesce chunk with the following address range.  This does
+		 * not change the position within chunks_ad, so only
+		 * remove/insert from/into chunks_szad.
+		 */
+		extent_tree_szad_remove(&chunks_szad, node);
+		node->addr = chunk;
+		node->size += size;
+		extent_tree_szad_insert(&chunks_szad, node);
+		if (xnode != NULL)
+			base_node_dealloc(xnode);
+	} else {
+		/* Coalescing forward failed, so insert a new node. */
+		if (xnode == NULL) {
 			/*
-			 * Coalesce chunk with the following address range.
-			 * This does not change the position within chunks_ad,
-			 * so only remove/insert from/into chunks_szad.
-			 */
-			extent_tree_szad_remove(&chunks_szad, node);
-			node->addr = chunk;
-			node->size += size;
-			extent_tree_szad_insert(&chunks_szad, node);
-			break;
-		} else if (xnode == NULL) {
-			/*
-			 * It is possible that base_node_alloc() will cause a
-			 * new base chunk to be allocated, so take care not to
-			 * deadlock on chunks_mtx, and recover if another thread
-			 * deallocates an adjacent chunk while this one is busy
-			 * allocating xnode.
+			 * base_node_alloc() failed, which is an exceedingly
+			 * unlikely failure.  Leak chunk; its pages have
+			 * already been purged, so this is only a virtual
+			 * memory leak.
 			 */
 			malloc_mutex_unlock(&chunks_mtx);
-			xnode = base_node_alloc();
-			if (xnode == NULL)
-				return;
-			malloc_mutex_lock(&chunks_mtx);
-		} else {
-			/* Coalescing forward failed, so insert a new node. */
-			node = xnode;
-			xnode = NULL;
-			node->addr = chunk;
-			node->size = size;
-			extent_tree_ad_insert(&chunks_ad, node);
-			extent_tree_szad_insert(&chunks_szad, node);
-			break;
+			return;
 		}
+		node = xnode;
+		node->addr = chunk;
+		node->size = size;
+		extent_tree_ad_insert(&chunks_ad, node);
+		extent_tree_szad_insert(&chunks_szad, node);
 	}
-	/* Discard xnode if it ended up unused due to a race. */
-	if (xnode != NULL)
-		base_node_dealloc(xnode);
 
 	/* Try to coalesce backward. */
 	prev = extent_tree_ad_prev(&chunks_ad, node);
diff --git a/contrib/jemalloc/src/chunk_mmap.c b/contrib/jemalloc/src/chunk_mmap.c
index 9ff7480a06f6..c8da6556b098 100644
--- a/contrib/jemalloc/src/chunk_mmap.c
+++ b/contrib/jemalloc/src/chunk_mmap.c
@@ -7,7 +7,7 @@
 static void	*pages_map(void *addr, size_t size);
 static void	pages_unmap(void *addr, size_t size);
 static void	*chunk_alloc_mmap_slow(size_t size, size_t alignment,
-    bool unaligned, bool *zero);
+    bool *zero);
 
 /******************************************************************************/
 
@@ -16,6 +16,16 @@ pages_map(void *addr, size_t size)
 {
 	void *ret;
 
+	assert(size != 0);
+
+#ifdef _WIN32
+	/*
+	 * If VirtualAlloc can't allocate at the given address when one is
+	 * given, it fails and returns NULL.
+	 */
+	ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
+	    PAGE_READWRITE);
+#else
 	/*
 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
 	 * of existing mappings, and we only want to create new mappings.
@@ -33,7 +43,7 @@ pages_map(void *addr, size_t size)
 		if (munmap(ret, size) == -1) {
 			char buf[BUFERROR_BUF];
 
-			buferror(errno, buf, sizeof(buf));
+			buferror(buf, sizeof(buf));
 			malloc_printf("<jemalloc: Error in munmap(): %s\n",
 			    buf);
 			if (opt_abort)
@@ -41,7 +51,7 @@ pages_map(void *addr, size_t size)
 		}
 		ret = NULL;
 	}
-
+#endif
 	assert(ret == NULL || (addr == NULL && ret != addr)
 	    || (addr != NULL && ret == addr));
 	return (ret);
@@ -51,55 +61,94 @@ static void
 pages_unmap(void *addr, size_t size)
 {
 
-	if (munmap(addr, size) == -1) {
+#ifdef _WIN32
+	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
+#else
+	if (munmap(addr, size) == -1)
+#endif
+	{
 		char buf[BUFERROR_BUF];
 
-		buferror(errno, buf, sizeof(buf));
-		malloc_printf("<jemalloc>: Error in munmap(): %s\n", buf);
+		buferror(buf, sizeof(buf));
+		malloc_printf("<jemalloc>: Error in "
+#ifdef _WIN32
+		              "VirtualFree"
+#else
+		              "munmap"
+#endif
+		              "(): %s\n", buf);
 		if (opt_abort)
 			abort();
 	}
 }
 
+static void *
+pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
+{
+	void *ret = (void *)((uintptr_t)addr + leadsize);
+
+	assert(alloc_size >= leadsize + size);
+#ifdef _WIN32
+	{
+		void *new_addr;
+
+		pages_unmap(addr, alloc_size);
+		new_addr = pages_map(ret, size);
+		if (new_addr == ret)
+			return (ret);
+		if (new_addr)
+			pages_unmap(new_addr, size);
+		return (NULL);
+	}
+#else
+	{
+		size_t trailsize = alloc_size - leadsize - size;
+
+		if (leadsize != 0)
+			pages_unmap(addr, leadsize);
+		if (trailsize != 0)
+			pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+		return (ret);
+	}
+#endif
+}
+
 void
 pages_purge(void *addr, size_t length)
 {
 
-#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
-#  define JEMALLOC_MADV_PURGE MADV_DONTNEED
-#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
-#  define JEMALLOC_MADV_PURGE MADV_FREE
+#ifdef _WIN32
+	VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE);
 #else
-#  error "No method defined for purging unused dirty pages."
-#endif
+#  ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+#    define JEMALLOC_MADV_PURGE MADV_DONTNEED
+#  elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+#    define JEMALLOC_MADV_PURGE MADV_FREE
+#  else
+#    error "No method defined for purging unused dirty pages."
+#  endif
 	madvise(addr, length, JEMALLOC_MADV_PURGE);
+#endif
 }
 
 static void *
-chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned, bool *zero)
+chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero)
 {
 	void *ret, *pages;
-	size_t alloc_size, leadsize, trailsize;
+	size_t alloc_size, leadsize;
 
 	alloc_size = size + alignment - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
-	pages = pages_map(NULL, alloc_size);
-	if (pages == NULL)
-		return (NULL);
-	leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
-	    (uintptr_t)pages;
-	assert(alloc_size >= leadsize + size);
-	trailsize = alloc_size - leadsize - size;
-	ret = (void *)((uintptr_t)pages + leadsize);
-	if (leadsize != 0) {
-		/* Note that mmap() returned an unaligned mapping. */
-		unaligned = true;
-		pages_unmap(pages, leadsize);
-	}
-	if (trailsize != 0)
-		pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+	do {
+		pages = pages_map(NULL, alloc_size);
+		if (pages == NULL)
+			return (NULL);
+		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
+		    (uintptr_t)pages;
+		ret = pages_trim(pages, alloc_size, leadsize, size);
+	} while (ret == NULL);
 
 	assert(ret != NULL);
 	*zero = true;
@@ -117,48 +166,24 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero)
 	 * NetBSD has), but in the absence of such a feature, we have to work
 	 * hard to efficiently create aligned mappings.  The reliable, but
 	 * slow method is to create a mapping that is over-sized, then trim the
-	 * excess.  However, that always results in at least one call to
+	 * excess.  However, that always results in one or two calls to
 	 * pages_unmap().
 	 *
-	 * A more optimistic approach is to try mapping precisely the right
-	 * amount, then try to append another mapping if alignment is off.  In
-	 * practice, this works out well as long as the application is not
-	 * interleaving mappings via direct mmap() calls.  If we do run into a
-	 * situation where there is an interleaved mapping and we are unable to
-	 * extend an unaligned mapping, our best option is to switch to the
-	 * slow method until mmap() returns another aligned mapping.  This will
-	 * tend to leave a gap in the memory map that is too small to cause
-	 * later problems for the optimistic method.
-	 *
-	 * Another possible confounding factor is address space layout
-	 * randomization (ASLR), which causes mmap(2) to disregard the
-	 * requested address.  As such, repeatedly trying to extend unaligned
-	 * mappings could result in an infinite loop, so if extension fails,
-	 * immediately fall back to the reliable method of over-allocation
-	 * followed by trimming.
+	 * Optimistically try mapping precisely the right amount before falling
+	 * back to the slow method, with the expectation that the optimistic
+	 * approach works most of the time.
 	 */
 
+	assert(alignment != 0);
+	assert((alignment & chunksize_mask) == 0);
+
 	ret = pages_map(NULL, size);
 	if (ret == NULL)
 		return (NULL);
-
 	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
 	if (offset != 0) {
-		/* Try to extend chunk boundary. */
-		if (pages_map((void *)((uintptr_t)ret + size), chunksize -
-		    offset) == NULL) {
-			/*
-			 * Extension failed.  Clean up, then fall back to the
-			 * reliable-but-expensive method.
-			 */
-			pages_unmap(ret, size);
-			return (chunk_alloc_mmap_slow(size, alignment, true,
-			    zero));
-		} else {
-			/* Clean up unneeded leading space. */
-			pages_unmap(ret, chunksize - offset);
-			ret = (void *)((uintptr_t)ret + (chunksize - offset));
-		}
+		pages_unmap(ret, size);
+		return (chunk_alloc_mmap_slow(size, alignment, zero));
 	}
 
 	assert(ret != NULL);
diff --git a/contrib/jemalloc/src/ctl.c b/contrib/jemalloc/src/ctl.c
index 98ea3d1c5b19..55e766777fd7 100644
--- a/contrib/jemalloc/src/ctl.c
+++ b/contrib/jemalloc/src/ctl.c
@@ -14,6 +14,32 @@ static bool		ctl_initialized;
 static uint64_t		ctl_epoch;
 static ctl_stats_t	ctl_stats;
 
+/******************************************************************************/
+/* Helpers for named and indexed nodes. */
+
+static inline const ctl_named_node_t *
+ctl_named_node(const ctl_node_t *node)
+{
+
+	return ((node->named) ? (const ctl_named_node_t *)node : NULL);
+}
+
+static inline const ctl_named_node_t *
+ctl_named_children(const ctl_named_node_t *node, int index)
+{
+	const ctl_named_node_t *children = ctl_named_node(node->children);
+
+	return (children ? &children[index] : NULL);
+}
+
+static inline const ctl_indexed_node_t *
+ctl_indexed_node(const ctl_node_t *node)
+{
+
+	return ((node->named == false) ? (const ctl_indexed_node_t *)node :
+	    NULL);
+}
+
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
@@ -22,7 +48,7 @@ static int	n##_ctl(const size_t *mib, size_t miblen, void *oldp,	\
     size_t *oldlenp, void *newp, size_t newlen);
 
 #define	INDEX_PROTO(n)							\
-const ctl_node_t	*n##_index(const size_t *mib, size_t miblen,	\
+const ctl_named_node_t	*n##_index(const size_t *mib, size_t miblen,	\
     size_t i);
 
 static bool	ctl_arena_init(ctl_arena_stats_t *astats);
@@ -50,6 +76,7 @@ CTL_PROTO(config_debug)
 CTL_PROTO(config_dss)
 CTL_PROTO(config_fill)
 CTL_PROTO(config_lazy_lock)
+CTL_PROTO(config_mremap)
 CTL_PROTO(config_munmap)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
@@ -149,35 +176,39 @@ CTL_PROTO(stats_mapped)
 /* Maximum tree depth. */
 #define	CTL_MAX_DEPTH	6
 
-#define	NAME(n)	true,	{.named = {n
-#define	CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t),	c##_node}},	NULL
-#define	CTL(c)	0,				NULL}},		c##_ctl
+#define	NAME(n)	{true},	n
+#define	CHILD(t, c)							\
+	sizeof(c##_node) / sizeof(ctl_##t##_node_t),			\
+	(ctl_node_t *)c##_node,						\
+	NULL
+#define	CTL(c)	0, NULL, c##_ctl
 
 /*
  * Only handles internal indexed nodes, since there are currently no external
  * ones.
  */
-#define	INDEX(i)	false,	{.indexed = {i##_index}},		NULL
+#define	INDEX(i)	{false},	i##_index
 
-static const ctl_node_t	tcache_node[] = {
+static const ctl_named_node_t	tcache_node[] = {
 	{NAME("enabled"),	CTL(thread_tcache_enabled)},
 	{NAME("flush"),		CTL(thread_tcache_flush)}
 };
 
-static const ctl_node_t	thread_node[] = {
+static const ctl_named_node_t	thread_node[] = {
 	{NAME("arena"),		CTL(thread_arena)},
 	{NAME("allocated"),	CTL(thread_allocated)},
 	{NAME("allocatedp"),	CTL(thread_allocatedp)},
 	{NAME("deallocated"),	CTL(thread_deallocated)},
 	{NAME("deallocatedp"),	CTL(thread_deallocatedp)},
-	{NAME("tcache"),	CHILD(tcache)}
+	{NAME("tcache"),	CHILD(named, tcache)}
 };
 
-static const ctl_node_t	config_node[] = {
+static const ctl_named_node_t	config_node[] = {
 	{NAME("debug"),			CTL(config_debug)},
 	{NAME("dss"),			CTL(config_dss)},
 	{NAME("fill"),			CTL(config_fill)},
 	{NAME("lazy_lock"),		CTL(config_lazy_lock)},
+	{NAME("mremap"),		CTL(config_mremap)},
 	{NAME("munmap"),		CTL(config_munmap)},
 	{NAME("prof"),			CTL(config_prof)},
 	{NAME("prof_libgcc"),		CTL(config_prof_libgcc)},
@@ -190,7 +221,7 @@ static const ctl_node_t	config_node[] = {
 	{NAME("xmalloc"),		CTL(config_xmalloc)}
 };
 
-static const ctl_node_t opt_node[] = {
+static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),			CTL(opt_abort)},
 	{NAME("lg_chunk"),		CTL(opt_lg_chunk)},
 	{NAME("narenas"),		CTL(opt_narenas)},
@@ -216,31 +247,31 @@ static const ctl_node_t opt_node[] = {
 	{NAME("prof_accum"),		CTL(opt_prof_accum)}
 };
 
-static const ctl_node_t arenas_bin_i_node[] = {
+static const ctl_named_node_t arenas_bin_i_node[] = {
 	{NAME("size"),			CTL(arenas_bin_i_size)},
 	{NAME("nregs"),			CTL(arenas_bin_i_nregs)},
 	{NAME("run_size"),		CTL(arenas_bin_i_run_size)}
 };
-static const ctl_node_t super_arenas_bin_i_node[] = {
-	{NAME(""),			CHILD(arenas_bin_i)}
+static const ctl_named_node_t super_arenas_bin_i_node[] = {
+	{NAME(""),			CHILD(named, arenas_bin_i)}
 };
 
-static const ctl_node_t arenas_bin_node[] = {
+static const ctl_indexed_node_t arenas_bin_node[] = {
 	{INDEX(arenas_bin_i)}
 };
 
-static const ctl_node_t arenas_lrun_i_node[] = {
+static const ctl_named_node_t arenas_lrun_i_node[] = {
 	{NAME("size"),			CTL(arenas_lrun_i_size)}
 };
-static const ctl_node_t super_arenas_lrun_i_node[] = {
-	{NAME(""),			CHILD(arenas_lrun_i)}
+static const ctl_named_node_t super_arenas_lrun_i_node[] = {
+	{NAME(""),			CHILD(named, arenas_lrun_i)}
 };
 
-static const ctl_node_t arenas_lrun_node[] = {
+static const ctl_indexed_node_t arenas_lrun_node[] = {
 	{INDEX(arenas_lrun_i)}
 };
 
-static const ctl_node_t arenas_node[] = {
+static const ctl_named_node_t arenas_node[] = {
 	{NAME("narenas"),		CTL(arenas_narenas)},
 	{NAME("initialized"),		CTL(arenas_initialized)},
 	{NAME("quantum"),		CTL(arenas_quantum)},
@@ -248,45 +279,45 @@ static const ctl_node_t arenas_node[] = {
 	{NAME("tcache_max"),		CTL(arenas_tcache_max)},
 	{NAME("nbins"),			CTL(arenas_nbins)},
 	{NAME("nhbins"),		CTL(arenas_nhbins)},
-	{NAME("bin"),			CHILD(arenas_bin)},
+	{NAME("bin"),			CHILD(indexed, arenas_bin)},
 	{NAME("nlruns"),		CTL(arenas_nlruns)},
-	{NAME("lrun"),			CHILD(arenas_lrun)},
+	{NAME("lrun"),			CHILD(indexed, arenas_lrun)},
 	{NAME("purge"),			CTL(arenas_purge)}
 };
 
-static const ctl_node_t	prof_node[] = {
+static const ctl_named_node_t	prof_node[] = {
 	{NAME("active"),	CTL(prof_active)},
 	{NAME("dump"),		CTL(prof_dump)},
 	{NAME("interval"),	CTL(prof_interval)}
 };
 
-static const ctl_node_t stats_chunks_node[] = {
+static const ctl_named_node_t stats_chunks_node[] = {
 	{NAME("current"),		CTL(stats_chunks_current)},
 	{NAME("total"),			CTL(stats_chunks_total)},
 	{NAME("high"),			CTL(stats_chunks_high)}
 };
 
-static const ctl_node_t stats_huge_node[] = {
+static const ctl_named_node_t stats_huge_node[] = {
 	{NAME("allocated"),		CTL(stats_huge_allocated)},
 	{NAME("nmalloc"),		CTL(stats_huge_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_huge_ndalloc)}
 };
 
-static const ctl_node_t stats_arenas_i_small_node[] = {
+static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("allocated"),		CTL(stats_arenas_i_small_allocated)},
 	{NAME("nmalloc"),		CTL(stats_arenas_i_small_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_arenas_i_small_ndalloc)},
 	{NAME("nrequests"),		CTL(stats_arenas_i_small_nrequests)}
 };
 
-static const ctl_node_t stats_arenas_i_large_node[] = {
+static const ctl_named_node_t stats_arenas_i_large_node[] = {
 	{NAME("allocated"),		CTL(stats_arenas_i_large_allocated)},
 	{NAME("nmalloc"),		CTL(stats_arenas_i_large_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_arenas_i_large_ndalloc)},
 	{NAME("nrequests"),		CTL(stats_arenas_i_large_nrequests)}
 };
 
-static const ctl_node_t stats_arenas_i_bins_j_node[] = {
+static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("allocated"),		CTL(stats_arenas_i_bins_j_allocated)},
 	{NAME("nmalloc"),		CTL(stats_arenas_i_bins_j_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_arenas_i_bins_j_ndalloc)},
@@ -297,29 +328,29 @@ static const ctl_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nreruns"),		CTL(stats_arenas_i_bins_j_nreruns)},
 	{NAME("curruns"),		CTL(stats_arenas_i_bins_j_curruns)}
 };
-static const ctl_node_t super_stats_arenas_i_bins_j_node[] = {
-	{NAME(""),			CHILD(stats_arenas_i_bins_j)}
+static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
+	{NAME(""),			CHILD(named, stats_arenas_i_bins_j)}
 };
 
-static const ctl_node_t stats_arenas_i_bins_node[] = {
+static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
 	{INDEX(stats_arenas_i_bins_j)}
 };
 
-static const ctl_node_t stats_arenas_i_lruns_j_node[] = {
+static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = {
 	{NAME("nmalloc"),		CTL(stats_arenas_i_lruns_j_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_arenas_i_lruns_j_ndalloc)},
 	{NAME("nrequests"),		CTL(stats_arenas_i_lruns_j_nrequests)},
 	{NAME("curruns"),		CTL(stats_arenas_i_lruns_j_curruns)}
 };
-static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = {
-	{NAME(""),			CHILD(stats_arenas_i_lruns_j)}
+static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = {
+	{NAME(""),			CHILD(named, stats_arenas_i_lruns_j)}
 };
 
-static const ctl_node_t stats_arenas_i_lruns_node[] = {
+static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = {
 	{INDEX(stats_arenas_i_lruns_j)}
 };
 
-static const ctl_node_t stats_arenas_i_node[] = {
+static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),		CTL(stats_arenas_i_nthreads)},
 	{NAME("pactive"),		CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),		CTL(stats_arenas_i_pdirty)},
@@ -327,41 +358,41 @@ static const ctl_node_t stats_arenas_i_node[] = {
 	{NAME("npurge"),		CTL(stats_arenas_i_npurge)},
 	{NAME("nmadvise"),		CTL(stats_arenas_i_nmadvise)},
 	{NAME("purged"),		CTL(stats_arenas_i_purged)},
-	{NAME("small"),			CHILD(stats_arenas_i_small)},
-	{NAME("large"),			CHILD(stats_arenas_i_large)},
-	{NAME("bins"),			CHILD(stats_arenas_i_bins)},
-	{NAME("lruns"),		CHILD(stats_arenas_i_lruns)}
+	{NAME("small"),			CHILD(named, stats_arenas_i_small)},
+	{NAME("large"),			CHILD(named, stats_arenas_i_large)},
+	{NAME("bins"),			CHILD(indexed, stats_arenas_i_bins)},
+	{NAME("lruns"),			CHILD(indexed, stats_arenas_i_lruns)}
 };
-static const ctl_node_t super_stats_arenas_i_node[] = {
-	{NAME(""),			CHILD(stats_arenas_i)}
+static const ctl_named_node_t super_stats_arenas_i_node[] = {
+	{NAME(""),			CHILD(named, stats_arenas_i)}
 };
 
-static const ctl_node_t stats_arenas_node[] = {
+static const ctl_indexed_node_t stats_arenas_node[] = {
 	{INDEX(stats_arenas_i)}
 };
 
-static const ctl_node_t stats_node[] = {
+static const ctl_named_node_t stats_node[] = {
 	{NAME("cactive"),		CTL(stats_cactive)},
 	{NAME("allocated"),		CTL(stats_allocated)},
 	{NAME("active"),		CTL(stats_active)},
 	{NAME("mapped"),		CTL(stats_mapped)},
-	{NAME("chunks"),		CHILD(stats_chunks)},
-	{NAME("huge"),			CHILD(stats_huge)},
-	{NAME("arenas"),		CHILD(stats_arenas)}
+	{NAME("chunks"),		CHILD(named, stats_chunks)},
+	{NAME("huge"),			CHILD(named, stats_huge)},
+	{NAME("arenas"),		CHILD(indexed, stats_arenas)}
 };
 
-static const ctl_node_t	root_node[] = {
+static const ctl_named_node_t	root_node[] = {
 	{NAME("version"),	CTL(version)},
 	{NAME("epoch"),		CTL(epoch)},
-	{NAME("thread"),	CHILD(thread)},
-	{NAME("config"),	CHILD(config)},
-	{NAME("opt"),		CHILD(opt)},
-	{NAME("arenas"),	CHILD(arenas)},
-	{NAME("prof"),		CHILD(prof)},
-	{NAME("stats"),		CHILD(stats)}
+	{NAME("thread"),	CHILD(named, thread)},
+	{NAME("config"),	CHILD(named, config)},
+	{NAME("opt"),		CHILD(named, opt)},
+	{NAME("arenas"),	CHILD(named, arenas)},
+	{NAME("prof"),		CHILD(named, prof)},
+	{NAME("stats"),		CHILD(named, stats)}
 };
-static const ctl_node_t super_root_node[] = {
-	{NAME(""),		CHILD(root)}
+static const ctl_named_node_t super_root_node[] = {
+	{NAME(""),		CHILD(named, root)}
 };
 
 #undef NAME
@@ -491,7 +522,7 @@ static void
 ctl_refresh(void)
 {
 	unsigned i;
-	arena_t *tarenas[narenas];
+	VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 	if (config_stats) {
 		malloc_mutex_lock(&chunks_mtx);
@@ -597,7 +628,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 	int ret;
 	const char *elm, *tdot, *dot;
 	size_t elen, i, j;
-	const ctl_node_t *node;
+	const ctl_named_node_t *node;
 
 	elm = name;
 	/* Equivalent to strchrnul(). */
@@ -609,21 +640,21 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 	}
 	node = super_root_node;
 	for (i = 0; i < *depthp; i++) {
-		assert(node->named);
-		assert(node->u.named.nchildren > 0);
-		if (node->u.named.children[0].named) {
-			const ctl_node_t *pnode = node;
+		assert(node);
+		assert(node->nchildren > 0);
+		if (ctl_named_node(node->children) != NULL) {
+			const ctl_named_node_t *pnode = node;
 
 			/* Children are named. */
-			for (j = 0; j < node->u.named.nchildren; j++) {
-				const ctl_node_t *child =
-				    &node->u.named.children[j];
-				if (strlen(child->u.named.name) == elen
-				    && strncmp(elm, child->u.named.name,
-				    elen) == 0) {
+			for (j = 0; j < node->nchildren; j++) {
+				const ctl_named_node_t *child =
+				    ctl_named_children(node, j);
+				if (strlen(child->name) == elen &&
+				    strncmp(elm, child->name, elen) == 0) {
 					node = child;
 					if (nodesp != NULL)
-						nodesp[i] = node;
+						nodesp[i] =
+						    (const ctl_node_t *)node;
 					mibp[i] = j;
 					break;
 				}
@@ -634,7 +665,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 			}
 		} else {
 			uintmax_t index;
-			const ctl_node_t *inode;
+			const ctl_indexed_node_t *inode;
 
 			/* Children are indexed. */
 			index = malloc_strtoumax(elm, NULL, 10);
@@ -643,16 +674,15 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 				goto label_return;
 			}
 
-			inode = &node->u.named.children[0];
-			node = inode->u.indexed.index(mibp, *depthp,
-			    (size_t)index);
+			inode = ctl_indexed_node(node->children);
+			node = inode->index(mibp, *depthp, (size_t)index);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
 			}
 
 			if (nodesp != NULL)
-				nodesp[i] = node;
+				nodesp[i] = (const ctl_node_t *)node;
 			mibp[i] = (size_t)index;
 		}
 
@@ -696,6 +726,7 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	size_t depth;
 	ctl_node_t const *nodes[CTL_MAX_DEPTH];
 	size_t mib[CTL_MAX_DEPTH];
+	const ctl_named_node_t *node;
 
 	if (ctl_initialized == false && ctl_init()) {
 		ret = EAGAIN;
@@ -707,13 +738,14 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	if (ret != 0)
 		goto label_return;
 
-	if (nodes[depth-1]->ctl == NULL) {
+	node = ctl_named_node(nodes[depth-1]);
+	if (node != NULL && node->ctl)
+		ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen);
+	else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
-		goto label_return;
 	}
 
-	ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen);
 label_return:
 	return(ret);
 }
@@ -738,7 +770,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen)
 {
 	int ret;
-	const ctl_node_t *node;
+	const ctl_named_node_t *node;
 	size_t i;
 
 	if (ctl_initialized == false && ctl_init()) {
@@ -749,19 +781,21 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	/* Iterate down the tree. */
 	node = super_root_node;
 	for (i = 0; i < miblen; i++) {
-		if (node->u.named.children[0].named) {
+		assert(node);
+		assert(node->nchildren > 0);
+		if (ctl_named_node(node->children) != NULL) {
 			/* Children are named. */
-			if (node->u.named.nchildren <= mib[i]) {
+			if (node->nchildren <= mib[i]) {
 				ret = ENOENT;
 				goto label_return;
 			}
-			node = &node->u.named.children[mib[i]];
+			node = ctl_named_children(node, mib[i]);
 		} else {
-			const ctl_node_t *inode;
+			const ctl_indexed_node_t *inode;
 
 			/* Indexed element. */
-			inode = &node->u.named.children[0];
-			node = inode->u.indexed.index(mib, miblen, mib[i]);
+			inode = ctl_indexed_node(node->children);
+			node = inode->index(mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -770,12 +804,12 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	}
 
 	/* Call the ctl function. */
-	if (node->ctl == NULL) {
+	if (node && node->ctl)
+		ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
+	else {
 		/* Partial MIB. */
 		ret = ENOENT;
-		goto label_return;
 	}
-	ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
 
 label_return:
 	return(ret);
@@ -799,22 +833,17 @@ ctl_boot(void)
 #define	READONLY()	do {						\
 	if (newp != NULL || newlen != 0) {				\
 		ret = EPERM;						\
-		goto label_return;						\
+		goto label_return;					\
 	}								\
 } while (0)
 
 #define	WRITEONLY()	do {						\
 	if (oldp != NULL || oldlenp != NULL) {				\
 		ret = EPERM;						\
-		goto label_return;						\
+		goto label_return;					\
 	}								\
 } while (0)
 
-#define	VOID()	do {							\
-	READONLY();							\
-	WRITEONLY();							\
-} while (0)
-
 #define	READ(v, t)	do {						\
 	if (oldp != NULL && oldlenp != NULL) {				\
 		if (*oldlenp != sizeof(t)) {				\
@@ -822,7 +851,7 @@ ctl_boot(void)
 			    ? sizeof(t) : *oldlenp;			\
 			memcpy(oldp, (void *)&v, copylen);		\
 			ret = EINVAL;					\
-			goto label_return;					\
+			goto label_return;				\
 		} else							\
 			*(t *)oldp = v;					\
 	}								\
@@ -832,7 +861,7 @@ ctl_boot(void)
 	if (newp != NULL) {						\
 		if (newlen != sizeof(t)) {				\
 			ret = EINVAL;					\
-			goto label_return;					\
+			goto label_return;				\
 		}							\
 		v = *(t *)newp;						\
 	}								\
@@ -859,7 +888,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
-label_return:									\
+label_return:								\
 	if (l)								\
 		malloc_mutex_unlock(&ctl_mtx);				\
 	return (ret);							\
@@ -881,7 +910,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
-label_return:									\
+label_return:								\
 	malloc_mutex_unlock(&ctl_mtx);					\
 	return (ret);							\
 }
@@ -900,7 +929,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
-label_return:									\
+label_return:								\
 	malloc_mutex_unlock(&ctl_mtx);					\
 	return (ret);							\
 }
@@ -924,7 +953,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
-label_return:									\
+label_return:								\
 	return (ret);							\
 }
 
@@ -941,7 +970,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
-label_return:									\
+label_return:								\
 	return (ret);							\
 }
 
@@ -958,7 +987,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	READ(oldval, bool);						\
 									\
 	ret = 0;							\
-label_return:									\
+label_return:								\
 	return (ret);							\
 }
 
@@ -972,9 +1001,8 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	uint64_t newval;
 
 	malloc_mutex_lock(&ctl_mtx);
-	newval = 0;
 	WRITE(newval, uint64_t);
-	if (newval != 0)
+	if (newp != NULL)
 		ctl_refresh();
 	READ(ctl_epoch, uint64_t);
 
@@ -1018,7 +1046,8 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp,
 	if (config_tcache == false)
 		return (ENOENT);
 
-	VOID();
+	READONLY();
+	WRITEONLY();
 
 	tcache_flush();
 
@@ -1091,6 +1120,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_debug)
 CTL_RO_BOOL_CONFIG_GEN(config_dss)
 CTL_RO_BOOL_CONFIG_GEN(config_fill)
 CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock)
+CTL_RO_BOOL_CONFIG_GEN(config_mremap)
 CTL_RO_BOOL_CONFIG_GEN(config_munmap)
 CTL_RO_BOOL_CONFIG_GEN(config_prof)
 CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc)
@@ -1133,7 +1163,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
 CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
-const ctl_node_t *
+const ctl_named_node_t *
 arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
 {
 
@@ -1143,7 +1173,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
 }
 
 CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t)
-const ctl_node_t *
+const ctl_named_node_t *
 arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
 {
 
@@ -1201,7 +1231,7 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		ret = EFAULT;
 		goto label_return;
 	} else {
-		arena_t *tarenas[narenas];
+		VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 		malloc_mutex_lock(&arenas_lock);
 		memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
@@ -1326,7 +1356,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns,
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
 
-const ctl_node_t *
+const ctl_named_node_t *
 stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
 {
 
@@ -1344,7 +1374,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests,
 CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
 
-const ctl_node_t *
+const ctl_named_node_t *
 stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
 {
 
@@ -1365,10 +1395,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
 CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
     ctl_stats.arenas[mib[2]].astats.purged, uint64_t)
 
-const ctl_node_t *
+const ctl_named_node_t *
 stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
 {
-	const ctl_node_t * ret;
+	const ctl_named_node_t * ret;
 
 	malloc_mutex_lock(&ctl_mtx);
 	if (ctl_stats.arenas[i].initialized == false) {
diff --git a/contrib/jemalloc/src/huge.c b/contrib/jemalloc/src/huge.c
index 23eb074ac53b..8a4ec942410e 100644
--- a/contrib/jemalloc/src/huge.c
+++ b/contrib/jemalloc/src/huge.c
@@ -140,11 +140,11 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 
+#ifdef JEMALLOC_MREMAP
 	/*
 	 * Use mremap(2) if this is a huge-->huge reallocation, and neither the
 	 * source nor the destination are in dss.
 	 */
-#ifdef JEMALLOC_MREMAP_FIXED
 	if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr)
 	    == false && chunk_in_dss(ret) == false))) {
 		size_t newsize = huge_salloc(ret);
@@ -168,7 +168,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 			 */
 			char buf[BUFERROR_BUF];
 
-			buferror(errno, buf, sizeof(buf));
+			buferror(buf, sizeof(buf));
 			malloc_printf("<jemalloc>: Error in mremap(): %s\n",
 			    buf);
 			if (opt_abort)
diff --git a/contrib/jemalloc/src/jemalloc.c b/contrib/jemalloc/src/jemalloc.c
index 9292d26f6fb0..cdf622237ba2 100644
--- a/contrib/jemalloc/src/jemalloc.c
+++ b/contrib/jemalloc/src/jemalloc.c
@@ -13,7 +13,7 @@ const char	*__malloc_options_1_0 = NULL;
 __sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0);
 
 /* Runtime configuration options. */
-const char	*je_malloc_conf JEMALLOC_ATTR(visibility("default"));
+const char	*je_malloc_conf;
 #ifdef JEMALLOC_DEBUG
 bool	opt_abort = true;
 #  ifdef JEMALLOC_FILL
@@ -56,7 +56,26 @@ static bool			malloc_initializer = NO_INITIALIZER;
 #endif
 
 /* Used to avoid initialization races. */
+#ifdef _WIN32
+static malloc_mutex_t	init_lock;
+
+JEMALLOC_ATTR(constructor)
+static void WINAPI
+_init_init_lock(void)
+{
+
+	malloc_mutex_init(&init_lock);
+}
+
+#ifdef _MSC_VER
+#  pragma section(".CRT$XCU", read)
+JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
+static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
+#endif
+
+#else
 static malloc_mutex_t	init_lock = MALLOC_MUTEX_INITIALIZER;
+#endif
 
 typedef struct {
 	void	*p;	/* Input pointer (as in realloc(p, s)). */
@@ -233,11 +252,17 @@ malloc_ncpus(void)
 	unsigned ret;
 	long result;
 
+#ifdef _WIN32
+	SYSTEM_INFO si;
+	GetSystemInfo(&si);
+	result = si.dwNumberOfProcessors;
+#else
 	result = sysconf(_SC_NPROCESSORS_ONLN);
 	if (result == -1) {
 		/* Error. */
 		ret = 1;
 	}
+#endif
 	ret = (unsigned)result;
 
 	return (ret);
@@ -373,13 +398,14 @@ malloc_conf_init(void)
 			}
 			break;
 		case 1: {
+#ifndef _WIN32
 			int linklen;
 			const char *linkname =
-#ifdef JEMALLOC_PREFIX
+#  ifdef JEMALLOC_PREFIX
 			    "/etc/"JEMALLOC_PREFIX"malloc.conf"
-#else
+#  else
 			    "/etc/malloc.conf"
-#endif
+#  endif
 			    ;
 
 			if ((linklen = readlink(linkname, buf,
@@ -390,7 +416,9 @@ malloc_conf_init(void)
 				 */
 				buf[linklen] = '\0';
 				opts = buf;
-			} else {
+			} else
+#endif
+			{
 				/* No configuration specified. */
 				buf[0] = '\0';
 				opts = buf;
@@ -456,9 +484,9 @@ malloc_conf_init(void)
 				uintmax_t um;				\
 				char *end;				\
 									\
-				errno = 0;				\
+				set_errno(0);				\
 				um = malloc_strtoumax(v, &end, 0);	\
-				if (errno != 0 || (uintptr_t)end -	\
+				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
@@ -477,9 +505,9 @@ malloc_conf_init(void)
 				long l;					\
 				char *end;				\
 									\
-				errno = 0;				\
+				set_errno(0);				\
 				l = strtol(v, &end, 0);			\
-				if (errno != 0 || (uintptr_t)end -	\
+				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
@@ -615,7 +643,8 @@ malloc_init_hard(void)
 
 	malloc_conf_init();
 
-#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE))
+#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \
+    && !defined(_WIN32))
 	/* Register fork handlers. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {
@@ -770,13 +799,11 @@ malloc_init_hard(void)
  * Begin malloc(3)-compatible functions.
  */
 
-JEMALLOC_ATTR(malloc)
-JEMALLOC_ATTR(visibility("default"))
 void *
 je_malloc(size_t size)
 {
 	void *ret;
-	size_t usize;
+	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (malloc_init()) {
@@ -814,7 +841,7 @@ je_malloc(size_t size)
 			    "out of memory\n");
 			abort();
 		}
-		errno = ENOMEM;
+		set_errno(ENOMEM);
 	}
 	if (config_prof && opt_prof && ret != NULL)
 		prof_malloc(ret, usize, cnt);
@@ -921,8 +948,6 @@ imemalign(void **memptr, size_t alignment, size_t size,
 	return (ret);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
 je_posix_memalign(void **memptr, size_t alignment, size_t size)
 {
@@ -932,8 +957,6 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size)
 	return (ret);
 }
 
-JEMALLOC_ATTR(malloc)
-JEMALLOC_ATTR(visibility("default"))
 void *
 je_aligned_alloc(size_t alignment, size_t size)
 {
@@ -942,21 +965,19 @@ je_aligned_alloc(size_t alignment, size_t size)
 
 	if ((err = imemalign(&ret, alignment, size, 1)) != 0) {
 		ret = NULL;
-		errno = err;
+		set_errno(err);
 	}
 	JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof),
 	    false);
 	return (ret);
 }
 
-JEMALLOC_ATTR(malloc)
-JEMALLOC_ATTR(visibility("default"))
 void *
 je_calloc(size_t num, size_t size)
 {
 	void *ret;
 	size_t num_size;
-	size_t usize;
+	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (malloc_init()) {
@@ -1012,7 +1033,7 @@ je_calloc(size_t num, size_t size)
 			    "memory\n");
 			abort();
 		}
-		errno = ENOMEM;
+		set_errno(ENOMEM);
 	}
 
 	if (config_prof && opt_prof && ret != NULL)
@@ -1026,12 +1047,11 @@ je_calloc(size_t num, size_t size)
 	return (ret);
 }
 
-JEMALLOC_ATTR(visibility("default"))
 void *
 je_realloc(void *ptr, size_t size)
 {
 	void *ret;
-	size_t usize;
+	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_size = 0;
 	size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
 	prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL);
@@ -1113,7 +1133,7 @@ je_realloc(void *ptr, size_t size)
 				    "out of memory\n");
 				abort();
 			}
-			errno = ENOMEM;
+			set_errno(ENOMEM);
 		}
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
@@ -1155,7 +1175,7 @@ je_realloc(void *ptr, size_t size)
 				    "out of memory\n");
 				abort();
 			}
-			errno = ENOMEM;
+			set_errno(ENOMEM);
 		}
 	}
 
@@ -1174,7 +1194,6 @@ je_realloc(void *ptr, size_t size)
 	return (ret);
 }
 
-JEMALLOC_ATTR(visibility("default"))
 void
 je_free(void *ptr)
 {
@@ -1209,8 +1228,6 @@ je_free(void *ptr)
  */
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
-JEMALLOC_ATTR(malloc)
-JEMALLOC_ATTR(visibility("default"))
 void *
 je_memalign(size_t alignment, size_t size)
 {
@@ -1222,8 +1239,6 @@ je_memalign(size_t alignment, size_t size)
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
-JEMALLOC_ATTR(malloc)
-JEMALLOC_ATTR(visibility("default"))
 void *
 je_valloc(size_t size)
 {
@@ -1252,17 +1267,12 @@ je_valloc(size_t size)
  * passed an extra argument for the caller return address, which will be
  * ignored.
  */
-JEMALLOC_ATTR(visibility("default"))
-void (* const __free_hook)(void *ptr) = je_free;
-
-JEMALLOC_ATTR(visibility("default"))
-void *(* const __malloc_hook)(size_t size) = je_malloc;
-
-JEMALLOC_ATTR(visibility("default"))
-void *(* const __realloc_hook)(void *ptr, size_t size) = je_realloc;
-
-JEMALLOC_ATTR(visibility("default"))
-void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign;
+JEMALLOC_EXPORT void (* const __free_hook)(void *ptr) = je_free;
+JEMALLOC_EXPORT void *(* const __malloc_hook)(size_t size) = je_malloc;
+JEMALLOC_EXPORT void *(* const __realloc_hook)(void *ptr, size_t size) =
+    je_realloc;
+JEMALLOC_EXPORT void *(* const __memalign_hook)(size_t alignment, size_t size) =
+    je_memalign;
 #endif
 
 /*
@@ -1273,7 +1283,6 @@ void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign;
  * Begin non-standard functions.
  */
 
-JEMALLOC_ATTR(visibility("default"))
 size_t
 je_malloc_usable_size(const void *ptr)
 {
@@ -1289,7 +1298,6 @@ je_malloc_usable_size(const void *ptr)
 	return (ret);
 }
 
-JEMALLOC_ATTR(visibility("default"))
 void
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts)
@@ -1298,7 +1306,6 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	stats_print(write_cb, cbopaque, opts);
 }
 
-JEMALLOC_ATTR(visibility("default"))
 int
 je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen)
@@ -1310,7 +1317,6 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	return (ctl_byname(name, oldp, oldlenp, newp, newlen));
 }
 
-JEMALLOC_ATTR(visibility("default"))
 int
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 {
@@ -1321,7 +1327,6 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 	return (ctl_nametomib(name, mibp, miblenp));
 }
 
-JEMALLOC_ATTR(visibility("default"))
 int
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
   void *newp, size_t newlen)
@@ -1357,8 +1362,6 @@ iallocm(size_t usize, size_t alignment, bool zero)
 		return (imalloc(usize));
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
 je_allocm(void **ptr, size_t *rsize, size_t size, int flags)
 {
@@ -1367,7 +1370,6 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags)
 	size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
 	    & (SIZE_T_MAX-1));
 	bool zero = flags & ALLOCM_ZERO;
-	prof_thr_cnt_t *cnt;
 
 	assert(ptr != NULL);
 	assert(size != 0);
@@ -1380,6 +1382,8 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags)
 		goto label_oom;
 
 	if (config_prof && opt_prof) {
+		prof_thr_cnt_t *cnt;
+
 		PROF_ALLOC_PREP(1, usize, cnt);
 		if (cnt == NULL)
 			goto label_oom;
@@ -1426,8 +1430,6 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags)
 	return (ALLOCM_ERR_OOM);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
 je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags)
 {
@@ -1439,7 +1441,6 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags)
 	    & (SIZE_T_MAX-1));
 	bool zero = flags & ALLOCM_ZERO;
 	bool no_move = flags & ALLOCM_NO_MOVE;
-	prof_thr_cnt_t *cnt;
 
 	assert(ptr != NULL);
 	assert(*ptr != NULL);
@@ -1449,6 +1450,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags)
 
 	p = *ptr;
 	if (config_prof && opt_prof) {
+		prof_thr_cnt_t *cnt;
+
 		/*
 		 * usize isn't knowable before iralloc() returns when extra is
 		 * non-zero.  Therefore, compute its maximum possible value and
@@ -1536,8 +1539,6 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags)
 	return (ALLOCM_ERR_OOM);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
 je_sallocm(const void *ptr, size_t *rsize, int flags)
 {
@@ -1557,8 +1558,6 @@ je_sallocm(const void *ptr, size_t *rsize, int flags)
 	return (ALLOCM_SUCCESS);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
 je_dallocm(void *ptr, int flags)
 {
@@ -1586,7 +1585,6 @@ je_dallocm(void *ptr, int flags)
 	return (ALLOCM_SUCCESS);
 }
 
-JEMALLOC_ATTR(visibility("default"))
 int
 je_nallocm(size_t *rsize, size_t size, int flags)
 {
@@ -1622,8 +1620,7 @@ je_nallocm(size_t *rsize, size_t size, int flags)
 void
 jemalloc_prefork(void)
 #else
-JEMALLOC_ATTR(visibility("default"))
-void
+JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
@@ -1644,8 +1641,7 @@ _malloc_prefork(void)
 void
 jemalloc_postfork_parent(void)
 #else
-JEMALLOC_ATTR(visibility("default"))
-void
+JEMALLOC_EXPORT void
 _malloc_postfork(void)
 #endif
 {
diff --git a/contrib/jemalloc/src/mutex.c b/contrib/jemalloc/src/mutex.c
index 7be5fc9ef10e..4a90a05e984e 100644
--- a/contrib/jemalloc/src/mutex.c
+++ b/contrib/jemalloc/src/mutex.c
@@ -1,10 +1,14 @@
 #define	JEMALLOC_MUTEX_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
-#ifdef JEMALLOC_LAZY_LOCK
+#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
 #include <dlfcn.h>
 #endif
 
+#ifndef _CRT_SPINCOUNT
+#define _CRT_SPINCOUNT 4000
+#endif
+
 /******************************************************************************/
 /* Data. */
 
@@ -16,7 +20,7 @@ static bool		postpone_init = true;
 static malloc_mutex_t	*postponed_mutexes = NULL;
 #endif
 
-#ifdef JEMALLOC_LAZY_LOCK
+#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
 static void	pthread_create_once(void);
 #endif
 
@@ -26,7 +30,7 @@ static void	pthread_create_once(void);
  * process goes multi-threaded.
  */
 
-#ifdef JEMALLOC_LAZY_LOCK
+#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
 
@@ -44,8 +48,7 @@ pthread_create_once(void)
 	isthreaded = true;
 }
 
-JEMALLOC_ATTR(visibility("default"))
-int
+JEMALLOC_EXPORT int
 pthread_create(pthread_t *__restrict thread,
     const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
     void *__restrict arg)
@@ -79,7 +82,12 @@ _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
 bool
 malloc_mutex_init(malloc_mutex_t *mutex)
 {
-#ifdef JEMALLOC_OSSPIN
+
+#ifdef _WIN32
+	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
+	    _CRT_SPINCOUNT))
+		return (true);
+#elif (defined(JEMALLOC_OSSPIN))
 	mutex->lock = 0;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 	if (postpone_init) {
@@ -101,7 +109,6 @@ malloc_mutex_init(malloc_mutex_t *mutex)
 		return (true);
 	}
 	pthread_mutexattr_destroy(&attr);
-
 #endif
 	return (false);
 }
diff --git a/contrib/jemalloc/src/prof.c b/contrib/jemalloc/src/prof.c
index 227560b8fce5..de1d392993e7 100644
--- a/contrib/jemalloc/src/prof.c
+++ b/contrib/jemalloc/src/prof.c
@@ -64,11 +64,6 @@ static int		prof_dump_fd;
 /* Do not dump any profiles until bootstrapping is complete. */
 static bool		prof_booted = false;
 
-static malloc_mutex_t	enq_mtx;
-static bool		enq;
-static bool		enq_idump;
-static bool		enq_gdump;
-
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
@@ -148,20 +143,19 @@ bt_dup(prof_bt_t *bt)
 }
 
 static inline void
-prof_enter(void)
+prof_enter(prof_tdata_t *prof_tdata)
 {
 
 	cassert(config_prof);
 
-	malloc_mutex_lock(&enq_mtx);
-	enq = true;
-	malloc_mutex_unlock(&enq_mtx);
+	assert(prof_tdata->enq == false);
+	prof_tdata->enq = true;
 
 	malloc_mutex_lock(&bt2ctx_mtx);
 }
 
 static inline void
-prof_leave(void)
+prof_leave(prof_tdata_t *prof_tdata)
 {
 	bool idump, gdump;
 
@@ -169,13 +163,12 @@ prof_leave(void)
 
 	malloc_mutex_unlock(&bt2ctx_mtx);
 
-	malloc_mutex_lock(&enq_mtx);
-	enq = false;
-	idump = enq_idump;
-	enq_idump = false;
-	gdump = enq_gdump;
-	enq_gdump = false;
-	malloc_mutex_unlock(&enq_mtx);
+	assert(prof_tdata->enq);
+	prof_tdata->enq = false;
+	idump = prof_tdata->enq_idump;
+	prof_tdata->enq_idump = false;
+	gdump = prof_tdata->enq_gdump;
+	prof_tdata->enq_gdump = false;
 
 	if (idump)
 		prof_idump();
@@ -446,12 +439,9 @@ prof_lookup(prof_bt_t *bt)
 
 	cassert(config_prof);
 
-	prof_tdata = *prof_tdata_tsd_get();
-	if (prof_tdata == NULL) {
-		prof_tdata = prof_tdata_init();
-		if (prof_tdata == NULL)
-			return (NULL);
-	}
+	prof_tdata = prof_tdata_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return (NULL);
 
 	if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
 		union {
@@ -468,52 +458,48 @@ prof_lookup(prof_bt_t *bt)
 		 * This thread's cache lacks bt.  Look for it in the global
 		 * cache.
 		 */
-		prof_enter();
+		prof_enter(prof_tdata);
 		if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
 			/* bt has never been seen before.  Insert it. */
 			ctx.v = imalloc(sizeof(prof_ctx_t));
 			if (ctx.v == NULL) {
-				prof_leave();
+				prof_leave(prof_tdata);
 				return (NULL);
 			}
 			btkey.p = bt_dup(bt);
 			if (btkey.v == NULL) {
-				prof_leave();
+				prof_leave(prof_tdata);
 				idalloc(ctx.v);
 				return (NULL);
 			}
 			ctx.p->bt = btkey.p;
 			ctx.p->lock = prof_ctx_mutex_choose();
+			/*
+			 * Set nlimbo to 1, in order to avoid a race condition
+			 * with prof_ctx_merge()/prof_ctx_destroy().
+			 */
+			ctx.p->nlimbo = 1;
 			memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
 			ql_new(&ctx.p->cnts_ql);
 			if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
 				/* OOM. */
-				prof_leave();
+				prof_leave(prof_tdata);
 				idalloc(btkey.v);
 				idalloc(ctx.v);
 				return (NULL);
 			}
-			/*
-			 * Artificially raise curobjs, in order to avoid a race
-			 * condition with prof_ctx_merge()/prof_ctx_destroy().
-			 *
-			 * No locking is necessary for ctx here because no other
-			 * threads have had the opportunity to fetch it from
-			 * bt2ctx yet.
-			 */
-			ctx.p->cnt_merged.curobjs++;
 			new_ctx = true;
 		} else {
 			/*
-			 * Artificially raise curobjs, in order to avoid a race
-			 * condition with prof_ctx_merge()/prof_ctx_destroy().
+			 * Increment nlimbo, in order to avoid a race condition
+			 * with prof_ctx_merge()/prof_ctx_destroy().
 			 */
 			malloc_mutex_lock(ctx.p->lock);
-			ctx.p->cnt_merged.curobjs++;
+			ctx.p->nlimbo++;
 			malloc_mutex_unlock(ctx.p->lock);
 			new_ctx = false;
 		}
-		prof_leave();
+		prof_leave(prof_tdata);
 
 		/* Link a prof_thd_cnt_t into ctx for this thread. */
 		if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
@@ -555,7 +541,7 @@ prof_lookup(prof_bt_t *bt)
 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
 		malloc_mutex_lock(ctx.p->lock);
 		ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
-		ctx.p->cnt_merged.curobjs--;
+		ctx.p->nlimbo--;
 		malloc_mutex_unlock(ctx.p->lock);
 	} else {
 		/* Move ret to the front of the LRU. */
@@ -688,26 +674,30 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
 static void
 prof_ctx_destroy(prof_ctx_t *ctx)
 {
+	prof_tdata_t *prof_tdata;
 
 	cassert(config_prof);
 
 	/*
 	 * Check that ctx is still unused by any thread cache before destroying
-	 * it.  prof_lookup() artificially raises ctx->cnt_merge.curobjs in
-	 * order to avoid a race condition with this function, as does
-	 * prof_ctx_merge() in order to avoid a race between the main body of
-	 * prof_ctx_merge() and entry into this function.
+	 * it.  prof_lookup() increments ctx->nlimbo in order to avoid a race
+	 * condition with this function, as does prof_ctx_merge() in order to
+	 * avoid a race between the main body of prof_ctx_merge() and entry
+	 * into this function.
 	 */
-	prof_enter();
+	prof_tdata = *prof_tdata_tsd_get();
+	assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
+	prof_enter(prof_tdata);
 	malloc_mutex_lock(ctx->lock);
-	if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
+	if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
+	    ctx->nlimbo == 1) {
 		assert(ctx->cnt_merged.curbytes == 0);
 		assert(ctx->cnt_merged.accumobjs == 0);
 		assert(ctx->cnt_merged.accumbytes == 0);
 		/* Remove ctx from bt2ctx. */
 		if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
 			assert(false);
-		prof_leave();
+		prof_leave(prof_tdata);
 		/* Destroy ctx. */
 		malloc_mutex_unlock(ctx->lock);
 		bt_destroy(ctx->bt);
@@ -717,9 +707,9 @@ prof_ctx_destroy(prof_ctx_t *ctx)
 		 * Compensate for increment in prof_ctx_merge() or
 		 * prof_lookup().
 		 */
-		ctx->cnt_merged.curobjs--;
+		ctx->nlimbo--;
 		malloc_mutex_unlock(ctx->lock);
-		prof_leave();
+		prof_leave(prof_tdata);
 	}
 }
 
@@ -738,12 +728,12 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
 	ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
 	ql_remove(&ctx->cnts_ql, cnt, cnts_link);
 	if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
-	    ctx->cnt_merged.curobjs == 0) {
+	    ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
 		/*
-		 * Artificially raise ctx->cnt_merged.curobjs in order to keep
-		 * another thread from winning the race to destroy ctx while
-		 * this one has ctx->lock dropped.  Without this, it would be
-		 * possible for another thread to:
+		 * Increment ctx->nlimbo in order to keep another thread from
+		 * winning the race to destroy ctx while this one has ctx->lock
+		 * dropped.  Without this, it would be possible for another
+		 * thread to:
 		 *
 		 * 1) Sample an allocation associated with ctx.
 		 * 2) Deallocate the sampled object.
@@ -752,7 +742,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
 		 * The result would be that ctx no longer exists by the time
 		 * this thread accesses it in prof_ctx_destroy().
 		 */
-		ctx->cnt_merged.curobjs++;
+		ctx->nlimbo++;
 		destroy = true;
 	} else
 		destroy = false;
@@ -768,7 +758,16 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt)
 
 	cassert(config_prof);
 
-	if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) {
+	/*
+	 * Current statistics can sum to 0 as a result of unmerged per thread
+	 * statistics.  Additionally, interval- and growth-triggered dumps can
+	 * occur between the time a ctx is created and when its statistics are
+	 * filled in.  Avoid dumping any ctx that is an artifact of either
+	 * implementation detail.
+	 */
+	if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
+	    (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
+		assert(ctx->cnt_summed.curobjs == 0);
 		assert(ctx->cnt_summed.curbytes == 0);
 		assert(ctx->cnt_summed.accumobjs == 0);
 		assert(ctx->cnt_summed.accumbytes == 0);
@@ -831,6 +830,7 @@ prof_dump_maps(bool propagate_err)
 static bool
 prof_dump(bool propagate_err, const char *filename, bool leakcheck)
 {
+	prof_tdata_t *prof_tdata;
 	prof_cnt_t cnt_all;
 	size_t tabind;
 	union {
@@ -845,7 +845,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
 
 	cassert(config_prof);
 
-	prof_enter();
+	prof_tdata = prof_tdata_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return (true);
+	prof_enter(prof_tdata);
 	prof_dump_fd = creat(filename, 0644);
 	if (prof_dump_fd == -1) {
 		if (propagate_err == false) {
@@ -896,7 +899,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
 	if (prof_flush(propagate_err))
 		goto label_error;
 	close(prof_dump_fd);
-	prof_leave();
+	prof_leave(prof_tdata);
 
 	if (leakcheck && cnt_all.curbytes != 0) {
 		malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
@@ -911,7 +914,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
 
 	return (false);
 label_error:
-	prof_leave();
+	prof_leave(prof_tdata);
 	return (true);
 }
 
@@ -933,6 +936,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq)
 		    "%s.%d.%"PRIu64".%c.heap",
 		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
 	}
+	prof_dump_seq++;
 }
 
 static void
@@ -956,19 +960,24 @@ prof_fdump(void)
 void
 prof_idump(void)
 {
+	prof_tdata_t *prof_tdata;
 	char filename[PATH_MAX + 1];
 
 	cassert(config_prof);
 
 	if (prof_booted == false)
 		return;
-	malloc_mutex_lock(&enq_mtx);
-	if (enq) {
-		enq_idump = true;
-		malloc_mutex_unlock(&enq_mtx);
+	/*
+	 * Don't call prof_tdata_get() here, because it could cause recursive
+	 * allocation.
+	 */
+	prof_tdata = *prof_tdata_tsd_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return;
+	if (prof_tdata->enq) {
+		prof_tdata->enq_idump = true;
 		return;
 	}
-	malloc_mutex_unlock(&enq_mtx);
 
 	if (opt_prof_prefix[0] != '\0') {
 		malloc_mutex_lock(&prof_dump_seq_mtx);
@@ -1005,19 +1014,24 @@ prof_mdump(const char *filename)
 void
 prof_gdump(void)
 {
+	prof_tdata_t *prof_tdata;
 	char filename[DUMP_FILENAME_BUFSIZE];
 
 	cassert(config_prof);
 
 	if (prof_booted == false)
 		return;
-	malloc_mutex_lock(&enq_mtx);
-	if (enq) {
-		enq_gdump = true;
-		malloc_mutex_unlock(&enq_mtx);
+	/*
+	 * Don't call prof_tdata_get() here, because it could cause recursive
+	 * allocation.
+	 */
+	prof_tdata = *prof_tdata_tsd_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return;
+	if (prof_tdata->enq) {
+		prof_tdata->enq_gdump = true;
 		return;
 	}
-	malloc_mutex_unlock(&enq_mtx);
 
 	if (opt_prof_prefix[0] != '\0') {
 		malloc_mutex_lock(&prof_dump_seq_mtx);
@@ -1110,6 +1124,10 @@ prof_tdata_init(void)
 	prof_tdata->threshold = 0;
 	prof_tdata->accum = 0;
 
+	prof_tdata->enq = false;
+	prof_tdata->enq_idump = false;
+	prof_tdata->enq_gdump = false;
+
 	prof_tdata_tsd_set(&prof_tdata);
 
 	return (prof_tdata);
@@ -1123,24 +1141,41 @@ prof_tdata_cleanup(void *arg)
 
 	cassert(config_prof);
 
-	/*
-	 * Delete the hash table.  All of its contents can still be iterated
-	 * over via the LRU.
-	 */
-	ckh_delete(&prof_tdata->bt2cnt);
-
-	/* Iteratively merge cnt's into the global stats and delete them. */
-	while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
-		ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
-		prof_ctx_merge(cnt->ctx, cnt);
-		idalloc(cnt);
+	if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
+		/*
+		 * Another destructor deallocated memory after this destructor
+		 * was called.  Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
+		 * in order to receive another callback.
+		 */
+		prof_tdata = PROF_TDATA_STATE_PURGATORY;
+		prof_tdata_tsd_set(&prof_tdata);
+	} else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
+		/*
+		 * The previous time this destructor was called, we set the key
+		 * to PROF_TDATA_STATE_PURGATORY so that other destructors
+		 * wouldn't cause re-creation of the prof_tdata.  This time, do
+		 * nothing, so that the destructor will not be called again.
+		 */
+	} else if (prof_tdata != NULL) {
+		/*
+		 * Delete the hash table.  All of its contents can still be
+		 * iterated over via the LRU.
+		 */
+		ckh_delete(&prof_tdata->bt2cnt);
+		/*
+		 * Iteratively merge cnt's into the global stats and delete
+		 * them.
+		 */
+		while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
+			ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
+			prof_ctx_merge(cnt->ctx, cnt);
+			idalloc(cnt);
+		}
+		idalloc(prof_tdata->vec);
+		idalloc(prof_tdata);
+		prof_tdata = PROF_TDATA_STATE_PURGATORY;
+		prof_tdata_tsd_set(&prof_tdata);
 	}
-
-	idalloc(prof_tdata->vec);
-
-	idalloc(prof_tdata);
-	prof_tdata = NULL;
-	prof_tdata_tsd_set(&prof_tdata);
 }
 
 void
@@ -1206,12 +1241,6 @@ prof_boot2(void)
 		if (malloc_mutex_init(&prof_dump_seq_mtx))
 			return (true);
 
-		if (malloc_mutex_init(&enq_mtx))
-			return (true);
-		enq = false;
-		enq_idump = false;
-		enq_gdump = false;
-
 		if (atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
 			if (opt_abort)
diff --git a/contrib/jemalloc/src/quarantine.c b/contrib/jemalloc/src/quarantine.c
index 5fb6c390c41d..9005ab3ba067 100644
--- a/contrib/jemalloc/src/quarantine.c
+++ b/contrib/jemalloc/src/quarantine.c
@@ -1,17 +1,31 @@
 #include "jemalloc/internal/jemalloc_internal.h"
 
+/*
+ * quarantine pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define	QUARANTINE_STATE_REINCARNATED	((quarantine_t *)(uintptr_t)1)
+#define	QUARANTINE_STATE_PURGATORY	((quarantine_t *)(uintptr_t)2)
+#define	QUARANTINE_STATE_MAX		QUARANTINE_STATE_PURGATORY
+
 /******************************************************************************/
 /* Data. */
 
+typedef struct quarantine_obj_s quarantine_obj_t;
 typedef struct quarantine_s quarantine_t;
 
+struct quarantine_obj_s {
+	void	*ptr;
+	size_t	usize;
+};
+
 struct quarantine_s {
-	size_t	curbytes;
-	size_t	curobjs;
-	size_t	first;
+	size_t			curbytes;
+	size_t			curobjs;
+	size_t			first;
 #define	LG_MAXOBJS_INIT 10
-	size_t	lg_maxobjs;
-	void	*objs[1]; /* Dynamically sized ring buffer. */
+	size_t			lg_maxobjs;
+	quarantine_obj_t	objs[1]; /* Dynamically sized ring buffer. */
 };
 
 static void	quarantine_cleanup(void *arg);
@@ -35,7 +49,7 @@ quarantine_init(size_t lg_maxobjs)
 	quarantine_t *quarantine;
 
 	quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) +
-	    ((ZU(1) << lg_maxobjs) * sizeof(void *)));
+	    ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)));
 	if (quarantine == NULL)
 		return (NULL);
 	quarantine->curbytes = 0;
@@ -58,23 +72,22 @@ quarantine_grow(quarantine_t *quarantine)
 		return (quarantine);
 
 	ret->curbytes = quarantine->curbytes;
-	if (quarantine->first + quarantine->curobjs < (ZU(1) <<
+	ret->curobjs = quarantine->curobjs;
+	if (quarantine->first + quarantine->curobjs <= (ZU(1) <<
 	    quarantine->lg_maxobjs)) {
 		/* objs ring buffer data are contiguous. */
 		memcpy(ret->objs, &quarantine->objs[quarantine->first],
-		    quarantine->curobjs * sizeof(void *));
-		ret->curobjs = quarantine->curobjs;
+		    quarantine->curobjs * sizeof(quarantine_obj_t));
 	} else {
 		/* objs ring buffer data wrap around. */
-		size_t ncopy = (ZU(1) << quarantine->lg_maxobjs) -
+		size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) -
 		    quarantine->first;
-		memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy *
-		    sizeof(void *));
-		ret->curobjs = ncopy;
-		if (quarantine->curobjs != 0) {
-			memcpy(&ret->objs[ret->curobjs], quarantine->objs,
-			    quarantine->curobjs - ncopy);
-		}
+		size_t ncopy_b = quarantine->curobjs - ncopy_a;
+
+		memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a
+		    * sizeof(quarantine_obj_t));
+		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
+		    sizeof(quarantine_obj_t));
 	}
 
 	return (ret);
@@ -85,10 +98,10 @@ quarantine_drain(quarantine_t *quarantine, size_t upper_bound)
 {
 
 	while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) {
-		void *ptr = quarantine->objs[quarantine->first];
-		size_t usize = isalloc(ptr, config_prof);
-		idalloc(ptr);
-		quarantine->curbytes -= usize;
+		quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
+		assert(obj->usize == isalloc(obj->ptr, config_prof));
+		idalloc(obj->ptr);
+		quarantine->curbytes -= obj->usize;
 		quarantine->curobjs--;
 		quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
 		    quarantine->lg_maxobjs) - 1);
@@ -105,10 +118,25 @@ quarantine(void *ptr)
 	assert(opt_quarantine);
 
 	quarantine = *quarantine_tsd_get();
-	if (quarantine == NULL && (quarantine =
-	    quarantine_init(LG_MAXOBJS_INIT)) == NULL) {
-		idalloc(ptr);
-		return;
+	if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) {
+		if (quarantine == NULL) {
+			if ((quarantine = quarantine_init(LG_MAXOBJS_INIT)) ==
+			    NULL) {
+				idalloc(ptr);
+				return;
+			}
+		} else {
+			if (quarantine == QUARANTINE_STATE_PURGATORY) {
+				/*
+				 * Make a note that quarantine() was called
+				 * after quarantine_cleanup() was called.
+				 */
+				quarantine = QUARANTINE_STATE_REINCARNATED;
+				quarantine_tsd_set(&quarantine);
+			}
+			idalloc(ptr);
+			return;
+		}
 	}
 	/*
 	 * Drain one or more objects if the quarantine size limit would be
@@ -128,7 +156,9 @@ quarantine(void *ptr)
 	if (quarantine->curbytes + usize <= opt_quarantine) {
 		size_t offset = (quarantine->first + quarantine->curobjs) &
 		    ((ZU(1) << quarantine->lg_maxobjs) - 1);
-		quarantine->objs[offset] = ptr;
+		quarantine_obj_t *obj = &quarantine->objs[offset];
+		obj->ptr = ptr;
+		obj->usize = usize;
 		quarantine->curbytes += usize;
 		quarantine->curobjs++;
 		if (opt_junk)
@@ -144,9 +174,26 @@ quarantine_cleanup(void *arg)
 {
 	quarantine_t *quarantine = *(quarantine_t **)arg;
 
-	if (quarantine != NULL) {
+	if (quarantine == QUARANTINE_STATE_REINCARNATED) {
+		/*
+		 * Another destructor deallocated memory after this destructor
+		 * was called.  Reset quarantine to QUARANTINE_STATE_PURGATORY
+		 * in order to receive another callback.
+		 */
+		quarantine = QUARANTINE_STATE_PURGATORY;
+		quarantine_tsd_set(&quarantine);
+	} else if (quarantine == QUARANTINE_STATE_PURGATORY) {
+		/*
+		 * The previous time this destructor was called, we set the key
+		 * to QUARANTINE_STATE_PURGATORY so that other destructors
+		 * wouldn't cause re-creation of the quarantine.  This time, do
+		 * nothing, so that the destructor will not be called again.
+		 */
+	} else if (quarantine != NULL) {
 		quarantine_drain(quarantine, 0);
 		idalloc(quarantine);
+		quarantine = QUARANTINE_STATE_PURGATORY;
+		quarantine_tsd_set(&quarantine);
 	}
 }
 
diff --git a/contrib/jemalloc/src/stats.c b/contrib/jemalloc/src/stats.c
index 08f7098caad3..433b80d128d9 100644
--- a/contrib/jemalloc/src/stats.c
+++ b/contrib/jemalloc/src/stats.c
@@ -295,16 +295,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		abort();
 	}
 
-	if (write_cb == NULL) {
-		/*
-		 * The caller did not provide an alternate write_cb callback
-		 * function, so use the default one.  malloc_write() is an
-		 * inline function, so use malloc_message() directly here.
-		 */
-		write_cb = je_malloc_message;
-		cbopaque = NULL;
-	}
-
 	if (opts != NULL) {
 		unsigned i;
 
@@ -330,7 +320,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		}
 	}
 
-	write_cb(cbopaque, "___ Begin jemalloc statistics ___\n");
+	malloc_cprintf(write_cb, cbopaque,
+	    "___ Begin jemalloc statistics ___\n");
 	if (general) {
 		int err;
 		const char *cpv;
@@ -375,7 +366,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    "  opt."#n": \"%s\"\n", cpv);		\
 		}
 
-		write_cb(cbopaque, "Run-time option settings:\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    "Run-time option settings:\n");
 		OPT_WRITE_BOOL(abort)
 		OPT_WRITE_SIZE_T(lg_chunk)
 		OPT_WRITE_SIZE_T(narenas)
@@ -425,7 +417,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    "Min active:dirty page ratio per arena: %u:1\n",
 			    (1U << ssv));
 		} else {
-			write_cb(cbopaque,
+			malloc_cprintf(write_cb, cbopaque,
 			    "Min active:dirty page ratio per arena: N/A\n");
 		}
 		if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0))
@@ -447,7 +439,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    " (2^%zd)\n",
 				    (((uint64_t)1U) << ssv), ssv);
 			} else {
-				write_cb(cbopaque,
+				malloc_cprintf(write_cb, cbopaque,
 				    "Average profile dump interval: N/A\n");
 			}
 		}
@@ -498,11 +490,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 			CTL_GET("arenas.narenas", &narenas, unsigned);
 			{
-				bool initialized[narenas];
+				VARIABLE_ARRAY(bool, initialized, narenas);
 				size_t isz;
 				unsigned i, ninitialized;
 
-				isz = sizeof(initialized);
+				isz = sizeof(bool) * narenas;
 				xmallctl("arenas.initialized", initialized,
 				    &isz, NULL, 0);
 				for (i = ninitialized = 0; i < narenas; i++) {
@@ -527,11 +519,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 			CTL_GET("arenas.narenas", &narenas, unsigned);
 			{
-				bool initialized[narenas];
+				VARIABLE_ARRAY(bool, initialized, narenas);
 				size_t isz;
 				unsigned i;
 
-				isz = sizeof(initialized);
+				isz = sizeof(bool) * narenas;
 				xmallctl("arenas.initialized", initialized,
 				    &isz, NULL, 0);
 
@@ -547,5 +539,5 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			}
 		}
 	}
-	write_cb(cbopaque, "--- End jemalloc statistics ---\n");
+	malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n");
 }
diff --git a/contrib/jemalloc/src/tcache.c b/contrib/jemalloc/src/tcache.c
index 9c4970c52389..60244c45f8ba 100644
--- a/contrib/jemalloc/src/tcache.c
+++ b/contrib/jemalloc/src/tcache.c
@@ -24,6 +24,46 @@ size_t	tcache_salloc(const void *ptr)
 	return (arena_salloc(ptr, false));
 }
 
+void
+tcache_event_hard(tcache_t *tcache)
+{
+	size_t binind = tcache->next_gc_bin;
+	tcache_bin_t *tbin = &tcache->tbins[binind];
+	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+
+	if (tbin->low_water > 0) {
+		/*
+		 * Flush (ceiling) 3/4 of the objects below the low water mark.
+		 */
+		if (binind < NBINS) {
+			tcache_bin_flush_small(tbin, binind, tbin->ncached -
+			    tbin->low_water + (tbin->low_water >> 2), tcache);
+		} else {
+			tcache_bin_flush_large(tbin, binind, tbin->ncached -
+			    tbin->low_water + (tbin->low_water >> 2), tcache);
+		}
+		/*
+		 * Reduce fill count by 2X.  Limit lg_fill_div such that the
+		 * fill count is always at least 1.
+		 */
+		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1)
+			tbin->lg_fill_div++;
+	} else if (tbin->low_water < 0) {
+		/*
+		 * Increase fill count by 2X.  Make sure lg_fill_div stays
+		 * greater than 0.
+		 */
+		if (tbin->lg_fill_div > 1)
+			tbin->lg_fill_div--;
+	}
+	tbin->low_water = tbin->ncached;
+
+	tcache->next_gc_bin++;
+	if (tcache->next_gc_bin == nhbins)
+		tcache->next_gc_bin = 0;
+	tcache->ev_cnt = 0;
+}
+
 void *
 tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
 {
@@ -80,12 +120,13 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_t *mapelm =
-				    &chunk->map[pageind-map_bias];
+				    arena_mapp_get(chunk, pageind);
 				if (config_fill && opt_junk) {
 					arena_alloc_junk_small(ptr,
 					    &arena_bin_info[binind], true);
 				}
-				arena_dalloc_bin(arena, chunk, ptr, mapelm);
+				arena_dalloc_bin_locked(arena, chunk, ptr,
+				    mapelm);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -158,7 +199,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (chunk->arena == arena)
-				arena_dalloc_large(arena, chunk, ptr);
+				arena_dalloc_large_locked(arena, chunk, ptr);
 			else {
 				/*
 				 * This object was allocated via a different
@@ -314,22 +355,14 @@ tcache_destroy(tcache_t *tcache)
 		arena_t *arena = chunk->arena;
 		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
 		    LG_PAGE;
-		arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
-		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-		    (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) <<
-		    LG_PAGE));
-		arena_bin_t *bin = run->bin;
+		arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
 
-		malloc_mutex_lock(&bin->lock);
-		arena_dalloc_bin(arena, chunk, tcache, mapelm);
-		malloc_mutex_unlock(&bin->lock);
+		arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm);
 	} else if (tcache_size <= tcache_maxclass) {
 		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
 		arena_t *arena = chunk->arena;
 
-		malloc_mutex_lock(&arena->lock);
 		arena_dalloc_large(arena, chunk, tcache);
-		malloc_mutex_unlock(&arena->lock);
 	} else
 		idalloc(tcache);
 }
diff --git a/contrib/jemalloc/src/tsd.c b/contrib/jemalloc/src/tsd.c
index 281a2e9be70e..961a546329c1 100644
--- a/contrib/jemalloc/src/tsd.c
+++ b/contrib/jemalloc/src/tsd.c
@@ -14,7 +14,7 @@ malloc_tsd_malloc(size_t size)
 {
 
 	/* Avoid choose_arena() in order to dodge bootstrapping issues. */
-	return arena_malloc(arenas[0], size, false, false);
+	return (arena_malloc(arenas[0], size, false, false));
 }
 
 void
@@ -31,12 +31,14 @@ malloc_tsd_no_cleanup(void *arg)
 	not_reached();
 }
 
-#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-JEMALLOC_ATTR(visibility("default"))
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+#ifndef _WIN32
+JEMALLOC_EXPORT
+#endif
 void
 _malloc_thread_cleanup(void)
 {
-	bool pending[ncleanups], again;
+	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
 	unsigned i;
 
 	for (i = 0; i < ncleanups; i++)
@@ -70,3 +72,36 @@ malloc_tsd_boot(void)
 
 	ncleanups = 0;
 }
+
+#ifdef _WIN32
+static BOOL WINAPI
+_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
+{
+
+	switch (fdwReason) {
+#ifdef JEMALLOC_LAZY_LOCK
+	case DLL_THREAD_ATTACH:
+		isthreaded = true;
+		break;
+#endif
+	case DLL_THREAD_DETACH:
+		_malloc_thread_cleanup();
+		break;
+	default:
+		break;
+	}
+	return (true);
+}
+
+#ifdef _MSC_VER
+#  ifdef _M_IX86
+#    pragma comment(linker, "/INCLUDE:__tls_used")
+#  else
+#    pragma comment(linker, "/INCLUDE:_tls_used")
+#  endif
+#  pragma section(".CRT$XLY",long,read)
+#endif
+JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
+static const BOOL	(WINAPI *tls_callback)(HINSTANCE hinstDLL,
+    DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
+#endif
diff --git a/contrib/jemalloc/src/util.c b/contrib/jemalloc/src/util.c
index b80676c3c40a..f94799f380bd 100644
--- a/contrib/jemalloc/src/util.c
+++ b/contrib/jemalloc/src/util.c
@@ -40,8 +40,7 @@ static char	*x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
 /******************************************************************************/
 
 /* malloc_message() setup. */
-JEMALLOC_CATTR(visibility("hidden"), static)
-void
+static void
 wrtmessage(void *cbopaque, const char *s)
 {
 
@@ -57,10 +56,9 @@ wrtmessage(void *cbopaque, const char *s)
 #endif
 }
 
-void	(*je_malloc_message)(void *, const char *s)
-    JEMALLOC_ATTR(visibility("default")) = wrtmessage;
+JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
 
-JEMALLOC_CATTR(visibility("hidden"), static)
+JEMALLOC_ATTR(visibility("hidden"))
 void
 wrtmessage_1_0(const char *s1, const char *s2, const char *s3,
     const char *s4)
@@ -76,14 +74,33 @@ void	(*__malloc_message_1_0)(const char *s1, const char *s2, const char *s3,
     const char *s4) = wrtmessage_1_0;
 __sym_compat(_malloc_message, __malloc_message_1_0, FBSD_1.0);
 
+/*
+ * Wrapper around malloc_message() that avoids the need for
+ * je_malloc_message(...) throughout the code.
+ */
+void
+malloc_write(const char *s)
+{
+
+	if (je_malloc_message != NULL)
+		je_malloc_message(NULL, s);
+	else
+		wrtmessage(NULL, s);
+}
+
 /*
  * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
  * provide a wrapper.
  */
 int
-buferror(int errnum, char *buf, size_t buflen)
+buferror(char *buf, size_t buflen)
 {
-#ifdef _GNU_SOURCE
+
+#ifdef _WIN32
+	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0,
+	    (LPSTR)buf, buflen, NULL);
+	return (0);
+#elif defined(_GNU_SOURCE)
 	char *b = strerror_r(errno, buf, buflen);
 	if (b != buf) {
 		strncpy(buf, b, buflen);
@@ -104,7 +121,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base)
 	const char *p, *ns;
 
 	if (base < 0 || base == 1 || base > 36) {
-		errno = EINVAL;
+		set_errno(EINVAL);
 		return (UINTMAX_MAX);
 	}
 	b = base;
@@ -179,7 +196,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base)
 		ret += digit;
 		if (ret < pret) {
 			/* Overflow. */
-			errno = ERANGE;
+			set_errno(ERANGE);
 			return (UINTMAX_MAX);
 		}
 		p++;
@@ -299,7 +316,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 	int ret;
 	size_t i;
 	const char *f;
-	va_list tap;
 
 #define	APPEND_C(c) do {						\
 	if (i < size)							\
@@ -370,9 +386,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 	}								\
 } while (0)
 
-	if (config_debug)
-		va_copy(tap, ap);
-
 	i = 0;
 	f = format;
 	while (true) {
@@ -431,9 +444,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 			case '0': case '1': case '2': case '3': case '4':
 			case '5': case '6': case '7': case '8': case '9': {
 				uintmax_t uwidth;
-				errno = 0;
+				set_errno(0);
 				uwidth = malloc_strtoumax(f, (char **)&f, 10);
-				assert(uwidth != UINTMAX_MAX || errno !=
+				assert(uwidth != UINTMAX_MAX || get_errno() !=
 				    ERANGE);
 				width = (int)uwidth;
 				if (*f == '.') {
@@ -457,9 +470,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 			case '0': case '1': case '2': case '3': case '4':
 			case '5': case '6': case '7': case '8': case '9': {
 				uintmax_t uprec;
-				errno = 0;
+				set_errno(0);
 				uprec = malloc_strtoumax(f, (char **)&f, 10);
-				assert(uprec != UINTMAX_MAX || errno != ERANGE);
+				assert(uprec != UINTMAX_MAX || get_errno() !=
+				    ERANGE);
 				prec = (int)uprec;
 				break;
 			}
@@ -610,7 +624,8 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
 		 * function, so use the default one.  malloc_write() is an
 		 * inline function, so use malloc_message() directly here.
 		 */
-		write_cb = je_malloc_message;
+		write_cb = (je_malloc_message != NULL) ? je_malloc_message :
+		    wrtmessage;
 		cbopaque = NULL;
 	}