From 136e1e4ddc6169ae4068a28d94389e8a0b7bef72 Mon Sep 17 00:00:00 2001 From: jasone Date: Thu, 10 May 2012 18:29:40 +0000 Subject: [PATCH] Import jemalloc 37b6f95dcd866f51c91488531a2efc3ed4c2b754 (dev branch, prior to 3.0.0 release). This version is likely very close to what will be 3.0.0. --- contrib/jemalloc/ChangeLog | 15 +- contrib/jemalloc/FREEBSD-Xlist | 1 + contrib/jemalloc/FREEBSD-diffs | 63 ++- contrib/jemalloc/VERSION | 2 +- contrib/jemalloc/doc/jemalloc.3 | 16 +- .../include/jemalloc/internal/arena.h | 401 +++++++++++--- .../include/jemalloc/internal/atomic.h | 28 + .../jemalloc/include/jemalloc/internal/ctl.h | 31 +- .../jemalloc/internal/jemalloc_internal.h | 89 +++- .../include/jemalloc/internal/mutex.h | 20 +- .../jemalloc/internal/private_namespace.h | 28 +- .../jemalloc/include/jemalloc/internal/prof.h | 60 ++- .../include/jemalloc/internal/tcache.h | 71 +-- .../jemalloc/include/jemalloc/internal/tsd.h | 101 ++++ .../jemalloc/include/jemalloc/internal/util.h | 32 +- contrib/jemalloc/include/jemalloc/jemalloc.h | 64 ++- .../jemalloc/include/jemalloc/jemalloc_defs.h | 39 +- contrib/jemalloc/src/arena.c | 503 +++++++++--------- contrib/jemalloc/src/chunk.c | 97 ++-- contrib/jemalloc/src/chunk_mmap.c | 149 +++--- contrib/jemalloc/src/ctl.c | 252 +++++---- contrib/jemalloc/src/huge.c | 4 +- contrib/jemalloc/src/jemalloc.c | 122 ++--- contrib/jemalloc/src/mutex.c | 21 +- contrib/jemalloc/src/prof.c | 215 ++++---- contrib/jemalloc/src/quarantine.c | 101 +++- contrib/jemalloc/src/stats.c | 30 +- contrib/jemalloc/src/tcache.c | 59 +- contrib/jemalloc/src/tsd.c | 43 +- contrib/jemalloc/src/util.c | 51 +- 30 files changed, 1728 insertions(+), 980 deletions(-) diff --git a/contrib/jemalloc/ChangeLog b/contrib/jemalloc/ChangeLog index b71fa165b60f..691630bc8d31 100644 --- a/contrib/jemalloc/ChangeLog +++ b/contrib/jemalloc/ChangeLog @@ -19,9 +19,10 @@ found in the git revision history: New features: - Implement Valgrind support, redzones, and quarantine. - - Add support for additional operating systems: + - Add support for additional platforms: + FreeBSD + Mac OS X Lion + + MinGW - Add support for additional architectures: + MIPS + SH4 @@ -64,18 +65,24 @@ found in the git revision history: - Remove the --enable-sysv configure option. Bug fixes: - - Fix fork-related bugs that could cause deadlock in children between fork - and exec. - Fix a statistics-related bug in the "thread.arena" mallctl that could cause invalid statistics and crashes. - - Work around TLS dallocation via free() on Linux. This bug could cause + - Work around TLS deallocation via free() on Linux. This bug could cause write-after-free memory corruption. + - Fix a potential deadlock that could occur during interval- and + growth-triggered heap profile dumps. - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could cause memory corruption and crashes with --enable-dss specified. + - Fix fork-related bugs that could cause deadlock in children between fork + and exec. - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. - Fix realloc(p, 0) to act like free(p). - Do not enforce minimum alignment in memalign(). - Check for NULL pointer in malloc_usable_size(). + - Fix an off-by-one heap profile statistics bug that could be observed in + interval- and growth-triggered heap profiles. + - Fix the "epoch" mallctl to update cached stats even if the passed in epoch + is 0. - Fix bin->runcur management to fix a layout policy bug. This bug did not affect correctness. - Fix a bug in choose_arena_hard() that potentially caused more arenas to be diff --git a/contrib/jemalloc/FREEBSD-Xlist b/contrib/jemalloc/FREEBSD-Xlist index f3945de46f8c..cd0f8add7b60 100644 --- a/contrib/jemalloc/FREEBSD-Xlist +++ b/contrib/jemalloc/FREEBSD-Xlist @@ -18,6 +18,7 @@ include/jemalloc/internal/jemalloc_internal.h.in include/jemalloc/internal/size_classes.sh include/jemalloc/jemalloc.h.in include/jemalloc/jemalloc_defs.h.in +include/msvc_compat/ install-sh src/zone.c test/ diff --git a/contrib/jemalloc/FREEBSD-diffs b/contrib/jemalloc/FREEBSD-diffs index 76e0a87c8c6e..c38b92fe9218 100644 --- a/contrib/jemalloc/FREEBSD-diffs +++ b/contrib/jemalloc/FREEBSD-diffs @@ -1,5 +1,5 @@ diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in -index e8a5722..cec85b5 100644 +index 93c16dc..b5c5595 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -51,12 +51,23 @@ @@ -27,7 +27,7 @@ index e8a5722..cec85b5 100644 Standard API -@@ -2091,4 +2102,16 @@ malloc_conf = "lg_chunk:24";]]> +@@ -2101,4 +2112,16 @@ malloc_conf = "lg_chunk:24";]]> The posix_memalign function conforms to IEEE Std 1003.1-2001 (“POSIX.1”). @@ -45,7 +45,7 @@ index e8a5722..cec85b5 100644 + diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in -index b61abe8..edbb437 100644 +index 268cd14..cfb1fb9 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,5 +1,8 @@ @@ -54,12 +54,12 @@ index b61abe8..edbb437 100644 +#include "libc_private.h" +#include "namespace.h" + - #include - #include - #include -@@ -35,6 +38,9 @@ - #include #include + #ifdef _WIN32 + # include +@@ -54,6 +57,9 @@ typedef intptr_t ssize_t; + #endif + #include +#include "un-namespace.h" +#include "libc_private.h" @@ -68,10 +68,10 @@ index b61abe8..edbb437 100644 #include "../jemalloc@install_suffix@.h" diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h -index 8837ef5..d7133f4 100644 +index de44e14..564d604 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h -@@ -39,9 +39,6 @@ struct malloc_mutex_s { +@@ -43,9 +43,6 @@ struct malloc_mutex_s { #ifdef JEMALLOC_LAZY_LOCK extern bool isthreaded; @@ -82,10 +82,10 @@ index 8837ef5..d7133f4 100644 bool malloc_mutex_init(malloc_mutex_t *mutex); diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h -index bb1b63e..00eb169 100644 +index b816647..b8ce6b1 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h -@@ -165,7 +165,6 @@ +@@ -186,7 +186,6 @@ #define iqalloc JEMALLOC_N(iqalloc) #define iralloc JEMALLOC_N(iralloc) #define isalloc JEMALLOC_N(isalloc) @@ -94,7 +94,7 @@ index bb1b63e..00eb169 100644 #define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) #define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in -index f0581db..f26d8bc 100644 +index ad06948..505dd38 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -15,6 +15,7 @@ extern "C" { @@ -107,10 +107,10 @@ index f0581db..f26d8bc 100644 #define ALLOCM_LG_ALIGN(la) (la) diff --git a/include/jemalloc/jemalloc_FreeBSD.h b/include/jemalloc/jemalloc_FreeBSD.h new file mode 100644 -index 0000000..2c5797f +index 0000000..9efab93 --- /dev/null +++ b/include/jemalloc/jemalloc_FreeBSD.h -@@ -0,0 +1,76 @@ +@@ -0,0 +1,80 @@ +/* + * Override settings that were generated in jemalloc_defs.h as necessary. + */ @@ -154,8 +154,12 @@ index 0000000..2c5797f +# define LG_SIZEOF_PTR 2 +#endif +#ifdef __mips__ ++#ifdef __mips_n64 ++# define LG_SIZEOF_PTR 3 ++#else +# define LG_SIZEOF_PTR 2 +#endif ++#endif +#ifdef __powerpc64__ +# define LG_SIZEOF_PTR 3 +#elif defined(__powerpc__) @@ -188,20 +192,21 @@ index 0000000..2c5797f +#define pthread_mutex_lock _pthread_mutex_lock +#define pthread_mutex_unlock _pthread_mutex_unlock diff --git a/src/jemalloc.c b/src/jemalloc.c -index f9c8916..8e24a5a 100644 +index d42e91d..cdf6222 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c -@@ -8,6 +8,9 @@ malloc_tsd_data(, arenas, arena_t *, NULL) +@@ -8,6 +8,10 @@ malloc_tsd_data(, arenas, arena_t *, NULL) malloc_tsd_data(, thread_allocated, thread_allocated_t, THREAD_ALLOCATED_INITIALIZER) -+const char *__malloc_options_1_0; ++/* Work around : */ ++const char *__malloc_options_1_0 = NULL; +__sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0); + /* Runtime configuration options. */ - const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); + const char *je_malloc_conf; #ifdef JEMALLOC_DEBUG -@@ -401,7 +404,8 @@ malloc_conf_init(void) +@@ -429,7 +433,8 @@ malloc_conf_init(void) #endif ; @@ -212,10 +217,10 @@ index f9c8916..8e24a5a 100644 * Do nothing; opts is already initialized to * the value of the MALLOC_CONF environment diff --git a/src/mutex.c b/src/mutex.c -index 4b8ce57..7be5fc9 100644 +index 37a843e..4a90a05 100644 --- a/src/mutex.c +++ b/src/mutex.c -@@ -63,6 +63,17 @@ pthread_create(pthread_t *__restrict thread, +@@ -66,6 +66,17 @@ pthread_create(pthread_t *__restrict thread, #ifdef JEMALLOC_MUTEX_INIT_CB int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t)); @@ -234,14 +239,14 @@ index 4b8ce57..7be5fc9 100644 bool diff --git a/src/util.c b/src/util.c -index 99ae26d..b80676c 100644 +index 9b73c3e..f94799f 100644 --- a/src/util.c +++ b/src/util.c -@@ -60,6 +60,22 @@ wrtmessage(void *cbopaque, const char *s) - void (*je_malloc_message)(void *, const char *s) - JEMALLOC_ATTR(visibility("default")) = wrtmessage; +@@ -58,6 +58,22 @@ wrtmessage(void *cbopaque, const char *s) -+JEMALLOC_CATTR(visibility("hidden"), static) + JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); + ++JEMALLOC_ATTR(visibility("hidden")) +void +wrtmessage_1_0(const char *s1, const char *s2, const char *s3, + const char *s4) @@ -258,5 +263,5 @@ index 99ae26d..b80676c 100644 +__sym_compat(_malloc_message, __malloc_message_1_0, FBSD_1.0); + /* - * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so - * provide a wrapper. + * Wrapper around malloc_message() that avoids the need for + * je_malloc_message(...) throughout the code. diff --git a/contrib/jemalloc/VERSION b/contrib/jemalloc/VERSION index 629165c7f2de..6651c8ee386c 100644 --- a/contrib/jemalloc/VERSION +++ b/contrib/jemalloc/VERSION @@ -1 +1 @@ -1.0.0-286-ga8f8d7540d66ddee7337db80c92890916e1063ca +1.0.0-335-g37b6f95dcd866f51c91488531a2efc3ed4c2b754 diff --git a/contrib/jemalloc/doc/jemalloc.3 b/contrib/jemalloc/doc/jemalloc.3 index 209b1331219c..0621c8cc5e96 100644 --- a/contrib/jemalloc/doc/jemalloc.3 +++ b/contrib/jemalloc/doc/jemalloc.3 @@ -2,12 +2,12 @@ .\" Title: JEMALLOC .\" Author: Jason Evans .\" Generator: DocBook XSL Stylesheets v1.76.1 -.\" Date: 04/21/2012 +.\" Date: 05/09/2012 .\" Manual: User Manual -.\" Source: jemalloc 1.0.0-286-ga8f8d7540d66ddee7337db80c92890916e1063ca +.\" Source: jemalloc 1.0.0-335-g37b6f95dcd866f51c91488531a2efc3ed4c2b754 .\" Language: English .\" -.TH "JEMALLOC" "3" "04/21/2012" "jemalloc 1.0.0-286-ga8f8d7540d" "User Manual" +.TH "JEMALLOC" "3" "05/09/2012" "jemalloc 1.0.0-335-g37b6f95dcd" "User Manual" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- @@ -31,7 +31,7 @@ jemalloc \- general purpose memory allocation functions .SH "LIBRARY" .PP -This manual describes jemalloc 1\&.0\&.0\-286\-ga8f8d7540d66ddee7337db80c92890916e1063ca\&. More information can be found at the +This manual describes jemalloc 1\&.0\&.0\-335\-g37b6f95dcd866f51c91488531a2efc3ed4c2b754\&. More information can be found at the \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. .PP The following configuration options are enabled in libc\*(Aqs built\-in jemalloc: @@ -567,6 +567,12 @@ was specified during build configuration\&. was specified during build configuration\&. .RE .PP +"config\&.mremap" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-mremap\fR +was specified during build configuration\&. +.RE +.PP "config\&.munmap" (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-munmap\fR @@ -1462,7 +1468,7 @@ jemalloc website .IP " 2." 4 Valgrind .RS 4 -\%http://http://valgrind.org/ +\%http://valgrind.org/ .RE .IP " 3." 4 gperftools package diff --git a/contrib/jemalloc/include/jemalloc/internal/arena.h b/contrib/jemalloc/include/jemalloc/internal/arena.h index 2eb41cd97e57..264b5d3a24d1 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena.h @@ -109,7 +109,8 @@ struct arena_chunk_map_s { * * p : run page offset * s : run size - * c : (binind+1) for size class (used only if prof_promote is true) + * n : binind for size class; large objects set these to BININD_INVALID + * except for promoted allocations (see prof_promote) * x : don't care * - : 0 * + : 1 @@ -117,35 +118,38 @@ struct arena_chunk_map_s { * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss---- ----du-a - * xxxxxxxx xxxxxxxx xxxx---- -----Uxx - * ssssssss ssssssss ssss---- ----dU-a + * ssssssss ssssssss ssss1111 1111du-a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx + * ssssssss ssssssss ssss1111 1111dU-a * * Unallocated (dirty): - * ssssssss ssssssss ssss---- ----D--a - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * ssssssss ssssssss ssss---- ----D--a + * ssssssss ssssssss ssss1111 1111D--a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * ssssssss ssssssss ssss1111 1111D--a * * Small: - * pppppppp pppppppp pppp---- ----d--A - * pppppppp pppppppp pppp---- -------A - * pppppppp pppppppp pppp---- ----d--A + * pppppppp pppppppp ppppnnnn nnnnd--A + * pppppppp pppppppp ppppnnnn nnnn---A + * pppppppp pppppppp ppppnnnn nnnnd--A * * Large: - * ssssssss ssssssss ssss---- ----D-LA - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * -------- -------- -------- ----D-LA + * ssssssss ssssssss ssss1111 1111D-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----1111 1111D-LA * * Large (sampled, size <= PAGE): - * ssssssss ssssssss sssscccc ccccD-LA + * ssssssss ssssssss ssssnnnn nnnnD-LA * * Large (not sampled, size == PAGE): - * ssssssss ssssssss ssss---- ----D-LA + * ssssssss ssssssss ssss1111 1111D-LA */ size_t bits; -#define CHUNK_MAP_CLASS_SHIFT 4 -#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) -#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) +#define CHUNK_MAP_BININD_SHIFT 4 +#define BININD_INVALID ((size_t)0xffU) +/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ +#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) +#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK +#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) #define CHUNK_MAP_DIRTY ((size_t)0x8U) #define CHUNK_MAP_UNZEROED ((size_t)0x4U) #define CHUNK_MAP_LARGE ((size_t)0x2U) @@ -409,8 +413,14 @@ void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); void arena_prof_promoted(const void *ptr, size_t size); -void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, +void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_t *mapelm); +void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind); +void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, @@ -430,6 +440,31 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); +size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); +void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size); +void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind); +void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, + size_t runind, size_t binind, size_t flags); +void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed); +size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); @@ -442,6 +477,227 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +# ifdef JEMALLOC_ARENA_INLINE_A +JEMALLOC_INLINE arena_chunk_map_t * +arena_mapp_get(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (&chunk->map[pageind-map_bias]); +} + +JEMALLOC_INLINE size_t * +arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (&arena_mapp_get(chunk, pageind)->bits); +} + +JEMALLOC_INLINE size_t +arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (*arena_mapbitsp_get(chunk, pageind)); +} + +JEMALLOC_INLINE size_t +arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_INLINE size_t +arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_INLINE size_t +arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + CHUNK_MAP_ALLOCATED); + return (mapbits >> LG_PAGE); +} + +JEMALLOC_INLINE size_t +arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + size_t binind; + + mapbits = arena_mapbits_get(chunk, pageind); + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + assert(binind < NBINS || binind == BININD_INVALID); + return (binind); +} + +JEMALLOC_INLINE size_t +arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_DIRTY); +} + +JEMALLOC_INLINE size_t +arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_UNZEROED); +} + +JEMALLOC_INLINE size_t +arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_LARGE); +} + +JEMALLOC_INLINE size_t +arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_INLINE void +arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; +} + +JEMALLOC_INLINE void +arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + *mapbitsp = size | (*mapbitsp & PAGE_MASK); +} + +JEMALLOC_INLINE void +arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; +} + +JEMALLOC_INLINE void +arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind) +{ + size_t *mapbitsp; + + assert(binind <= BININD_INVALID); + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + *mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind << + CHUNK_MAP_BININD_SHIFT); +} + +JEMALLOC_INLINE void +arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, + size_t binind, size_t flags) +{ + size_t *mapbitsp; + + assert(binind < BININD_INVALID); + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert(pageind - runind >= map_bias); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | + flags | CHUNK_MAP_ALLOCATED; +} + +JEMALLOC_INLINE void +arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; +} + +JEMALLOC_INLINE size_t +arena_ptr_small_binind_get(const void *ptr, size_t mapbits) +{ + size_t binind; + + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + + if (config_debug) { + arena_chunk_t *chunk; + arena_t *arena; + size_t pageind; + size_t actual_mapbits; + arena_run_t *run; + arena_bin_t *bin; + size_t actual_binind; + arena_bin_info_t *bin_info; + + assert(binind != BININD_INVALID); + assert(binind < NBINS); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + actual_mapbits = arena_mapbits_get(chunk, pageind); + assert(mapbits == actual_mapbits); + assert(arena_mapbits_large_get(chunk, pageind) == 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (actual_mapbits >> LG_PAGE)) << LG_PAGE)); + bin = run->bin; + actual_binind = bin - arena->bins; + assert(binind == actual_binind); + bin_info = &arena_bin_info[actual_binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); + } + + return (binind); +} +# endif /* JEMALLOC_ARENA_INLINE_A */ + +# ifdef JEMALLOC_ARENA_INLINE_B JEMALLOC_INLINE size_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { @@ -535,7 +791,7 @@ arena_prof_ctx_get(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; + mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote) @@ -544,7 +800,8 @@ arena_prof_ctx_get(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); + size_t binind = arena_ptr_small_binind_get(ptr, + mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; @@ -554,7 +811,7 @@ arena_prof_ctx_get(const void *ptr) sizeof(prof_ctx_t *))); } } else - ret = chunk->map[pageind-map_bias].prof_ctx; + ret = arena_mapp_get(chunk, pageind)->prof_ctx; return (ret); } @@ -571,19 +828,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; + mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote == false) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - arena_bin_t *bin = run->bin; size_t binind; arena_bin_info_t *bin_info; unsigned regind; - binind = arena_bin_index(chunk->arena, bin); + binind = arena_ptr_small_binind_get(ptr, mapbits); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -592,7 +848,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) } else assert((uintptr_t)ctx == (uintptr_t)1U); } else - chunk->map[pageind-map_bias].prof_ctx = ctx; + arena_mapp_get(chunk, pageind)->prof_ctx = ctx; } JEMALLOC_INLINE void * @@ -631,35 +887,42 @@ arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; - size_t pageind, mapbits; + size_t pageind, binind; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval - == 0); - ret = bin_info->reg_size; - } else { + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + binind = arena_mapbits_binind_get(chunk, pageind); + if (binind == BININD_INVALID || (config_prof && demote == false && + prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { + /* + * Large allocation. In the common case (demote == true), and + * as this is an inline function, most callers will only end up + * looking at binind to determine that ptr is a small + * allocation. + */ assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = mapbits & ~PAGE_MASK; - if (config_prof && demote && prof_promote && ret == PAGE && - (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { - size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT) - 1; - assert(binind < NBINS); - ret = arena_bin_info[binind].reg_size; - } + ret = arena_mapbits_large_size_get(chunk, pageind); assert(ret != 0); + assert(pageind + (ret>>LG_PAGE) <= chunk_npages); + assert(ret == PAGE || arena_mapbits_large_size_get(chunk, + pageind+(ret>>LG_PAGE)-1) == 0); + assert(binind == arena_mapbits_binind_get(chunk, + pageind+(ret>>LG_PAGE)-1)); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); + } else { + /* + * Small allocation (possibly promoted to a large object due to + * prof_promote). + */ + assert(arena_mapbits_large_get(chunk, pageind) != 0 || + arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)) == binind); + ret = arena_bin_info[binind].reg_size; } return (ret); @@ -668,8 +931,7 @@ arena_salloc(const void *ptr, bool demote) JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { - size_t pageind; - arena_chunk_map_t *mapelm; + size_t pageind, mapbits; tcache_t *tcache; assert(arena != NULL); @@ -678,47 +940,30 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(CHUNK_ADDR2BASE(ptr) != ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapelm = &chunk->map[pageind-map_bias]; - assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { + mapbits = arena_mapbits_get(chunk, pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ - if (try_tcache && (tcache = tcache_get(false)) != NULL) - tcache_dalloc_small(tcache, ptr); - else { - arena_run_t *run; - arena_bin_t *bin; + if (try_tcache && (tcache = tcache_get(false)) != NULL) { + size_t binind; - run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << - LG_PAGE)); - bin = run->bin; - if (config_debug) { - size_t binind = arena_bin_index(arena, bin); - UNUSED arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % - bin_info->reg_interval == 0); - } - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin(arena, chunk, ptr, mapelm); - malloc_mutex_unlock(&bin->lock); - } + binind = arena_ptr_small_binind_get(ptr, mapbits); + tcache_dalloc_small(tcache, ptr, binind); + } else + arena_dalloc_small(arena, chunk, ptr, pageind); } else { - size_t size = mapelm->bits & ~PAGE_MASK; + size_t size = arena_mapbits_large_size_get(chunk, pageind); assert(((uintptr_t)ptr & PAGE_MASK) == 0); if (try_tcache && size <= tcache_maxclass && (tcache = tcache_get(false)) != NULL) { tcache_dalloc_large(tcache, ptr, size); - } else { - malloc_mutex_lock(&arena->lock); + } else arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } } } +# endif /* JEMALLOC_ARENA_INLINE_B */ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/contrib/jemalloc/include/jemalloc/internal/atomic.h b/contrib/jemalloc/include/jemalloc/internal/atomic.h index 016c472aba98..11a7b47fe0f9 100644 --- a/contrib/jemalloc/include/jemalloc/internal/atomic.h +++ b/contrib/jemalloc/include/jemalloc/internal/atomic.h @@ -47,6 +47,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, -((int64_t)x))); +} #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) @@ -145,6 +159,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (__sync_sub_and_fetch(p, x)); } +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, -((int32_t)x))); +} #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) diff --git a/contrib/jemalloc/include/jemalloc/internal/ctl.h b/contrib/jemalloc/include/jemalloc/internal/ctl.h index a48d09fe7929..adf3827f0948 100644 --- a/contrib/jemalloc/include/jemalloc/internal/ctl.h +++ b/contrib/jemalloc/include/jemalloc/internal/ctl.h @@ -2,6 +2,8 @@ #ifdef JEMALLOC_H_TYPES typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_named_node_s ctl_named_node_t; +typedef struct ctl_indexed_node_s ctl_indexed_node_t; typedef struct ctl_arena_stats_s ctl_arena_stats_t; typedef struct ctl_stats_s ctl_stats_t; @@ -11,20 +13,21 @@ typedef struct ctl_stats_s ctl_stats_t; struct ctl_node_s { bool named; - union { - struct { - const char *name; - /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - } named; - struct { - const ctl_node_t *(*index)(const size_t *, size_t, - size_t); - } indexed; - } u; - int (*ctl)(const size_t *, size_t, void *, size_t *, void *, - size_t); +}; + +struct ctl_named_node_s { + struct ctl_node_s node; + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + unsigned nchildren; + const ctl_node_t *children; + int (*ctl)(const size_t *, size_t, void *, size_t *, + void *, size_t); +}; + +struct ctl_indexed_node_s { + struct ctl_node_s node; + const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); }; struct ctl_arena_stats_s { diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h index 1f38d1ad475b..a7e76f37eec1 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -3,23 +3,34 @@ #include "libc_private.h" #include "namespace.h" -#include -#include -#include -#if !defined(SYS_write) && defined(__NR_write) -#define SYS_write __NR_write +#include +#ifdef _WIN32 +# include +# define ENOENT ERROR_PATH_NOT_FOUND +# define EINVAL ERROR_BAD_ARGUMENTS +# define EAGAIN ERROR_OUTOFMEMORY +# define EPERM ERROR_WRITE_FAULT +# define EFAULT ERROR_INVALID_ADDRESS +# define ENOMEM ERROR_NOT_ENOUGH_MEMORY +# undef ERANGE +# define ERANGE ERROR_INVALID_DATA +#else +# include +# include +# include +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include +# include +# include #endif -#include #include -#include -#include #include #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX #endif -#include -#include #include #include #include @@ -33,10 +44,18 @@ #include #include #include -#include +#ifdef _MSC_VER +# include +typedef intptr_t ssize_t; +# define PATH_MAX 1024 +# define STDERR_FILENO 2 +# define __func__ __FUNCTION__ +/* Disable warnings about deprecated system functions */ +# pragma warning(disable: 4996) +#else +# include +#endif #include -#include -#include #include "un-namespace.h" #include "libc_private.h" @@ -110,6 +129,13 @@ static const bool config_prof_libunwind = false #endif ; +static const bool config_mremap = +#ifdef JEMALLOC_MREMAP + true +#else + false +#endif + ; static const bool config_munmap = #ifdef JEMALLOC_MUNMAP true @@ -218,6 +244,9 @@ static const bool config_ivsalloc = #else # define JEMALLOC_ENABLE_INLINE # define JEMALLOC_INLINE static inline +# ifdef _MSC_VER +# define inline _inline +# endif #endif /* Smallest size class to support. */ @@ -229,7 +258,7 @@ static const bool config_ivsalloc = * classes). */ #ifndef LG_QUANTUM -# ifdef __i386__ +# if (defined(__i386__) || defined(_M_IX86)) # define LG_QUANTUM 4 # endif # ifdef __ia64__ @@ -241,7 +270,7 @@ static const bool config_ivsalloc = # ifdef __sparc64__ # define LG_QUANTUM 4 # endif -# if (defined(__amd64__) || defined(__x86_64__)) +# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) # define LG_QUANTUM 4 # endif # ifdef __arm__ @@ -291,9 +320,12 @@ static const bool config_ivsalloc = /* * Maximum size of L1 cache line. This is used to avoid cache line aliasing. * In addition, this controls the spacing of cacheline-spaced size classes. + * + * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can + * only handle raw constants. */ #define LG_CACHELINE 6 -#define CACHELINE ((size_t)(1U << LG_CACHELINE)) +#define CACHELINE 64 #define CACHELINE_MASK (CACHELINE - 1) /* Return the smallest cacheline multiple that is >= s. */ @@ -324,6 +356,20 @@ static const bool config_ivsalloc = #define ALIGNMENT_CEILING(s, alignment) \ (((s) + (alignment - 1)) & (-(alignment))) +/* Declare a variable length array */ +#if __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# include +# define alloca _alloca +# else +# include +# endif +# define VARIABLE_ARRAY(type, name, count) \ + type *name = alloca(sizeof(type) * count) +#else +# define VARIABLE_ARRAY(type, name, count) type name[count] +#endif + #ifdef JEMALLOC_VALGRIND /* * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions @@ -655,8 +701,17 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" +/* + * Include arena.h twice in order to resolve circular dependencies with + * tcache.h. + */ +#define JEMALLOC_ARENA_INLINE_A #include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/tcache.h" +#define JEMALLOC_ARENA_INLINE_B +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_B #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" diff --git a/contrib/jemalloc/include/jemalloc/internal/mutex.h b/contrib/jemalloc/include/jemalloc/internal/mutex.h index d7133f4d29c0..564d6046b425 100644 --- a/contrib/jemalloc/include/jemalloc/internal/mutex.h +++ b/contrib/jemalloc/include/jemalloc/internal/mutex.h @@ -3,10 +3,12 @@ typedef struct malloc_mutex_s malloc_mutex_t; -#ifdef JEMALLOC_OSSPIN -#define MALLOC_MUTEX_INITIALIZER {0} +#ifdef _WIN32 +# define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OSSPIN)) +# define MALLOC_MUTEX_INITIALIZER {0} #elif (defined(JEMALLOC_MUTEX_INIT_CB)) -#define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else # if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) @@ -23,7 +25,9 @@ typedef struct malloc_mutex_s malloc_mutex_t; #ifdef JEMALLOC_H_STRUCTS struct malloc_mutex_s { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + CRITICAL_SECTION lock; +#elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) pthread_mutex_t lock; @@ -62,7 +66,9 @@ malloc_mutex_lock(malloc_mutex_t *mutex) { if (isthreaded) { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + EnterCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mutex->lock); #else pthread_mutex_lock(&mutex->lock); @@ -75,7 +81,9 @@ malloc_mutex_unlock(malloc_mutex_t *mutex) { if (isthreaded) { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + LeaveCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mutex->lock); #else pthread_mutex_unlock(&mutex->lock); diff --git a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h index 00eb169cfd24..b8ce6b16125a 100644 --- a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h +++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h @@ -7,11 +7,31 @@ #define arena_boot JEMALLOC_N(arena_boot) #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked) #define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) #define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) +#define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) +#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) +#define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get) +#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) +#define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) +#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) +#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) +#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) +#define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) +#define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) +#define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) +#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) +#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) +#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) +#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) +#define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) +#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) +#define arena_mapp_get JEMALLOC_N(arena_mapp_get) #define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_new JEMALLOC_N(arena_new) #define arena_palloc JEMALLOC_N(arena_palloc) @@ -22,6 +42,7 @@ #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) +#define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) #define arena_purge_all JEMALLOC_N(arena_purge_all) #define arena_ralloc JEMALLOC_N(arena_ralloc) #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) @@ -232,13 +253,13 @@ #define prof_lookup JEMALLOC_N(prof_lookup) #define prof_malloc JEMALLOC_N(prof_malloc) #define prof_mdump JEMALLOC_N(prof_mdump) -#define prof_lookup JEMALLOC_N(prof_lookup) #define prof_promote JEMALLOC_N(prof_promote) #define prof_realloc JEMALLOC_N(prof_realloc) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) #define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) #define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_get JEMALLOC_N(prof_tdata_get) #define prof_tdata_init JEMALLOC_N(prof_tdata_init) #define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) #define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) @@ -294,12 +315,13 @@ #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) -#define tcache_initialized JEMALLOC_N(tcache_initialized) +#define tcache_event_hard JEMALLOC_N(tcache_event_hard) #define tcache_flush JEMALLOC_N(tcache_flush) #define tcache_get JEMALLOC_N(tcache_get) +#define tcache_initialized JEMALLOC_N(tcache_initialized) #define tcache_maxclass JEMALLOC_N(tcache_maxclass) -#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) #define tcache_salloc JEMALLOC_N(tcache_salloc) +#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) #define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) #define tcache_tls JEMALLOC_N(tcache_tls) #define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) diff --git a/contrib/jemalloc/include/jemalloc/internal/prof.h b/contrib/jemalloc/include/jemalloc/internal/prof.h index a4c563cc63d3..c3e3f9e4bcd8 100644 --- a/contrib/jemalloc/include/jemalloc/internal/prof.h +++ b/contrib/jemalloc/include/jemalloc/internal/prof.h @@ -37,6 +37,14 @@ typedef struct prof_tdata_s prof_tdata_t; */ #define PROF_NCTX_LOCKS 1024 +/* + * prof_tdata pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) +#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) +#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -113,9 +121,19 @@ struct prof_ctx_s { /* Associated backtrace. */ prof_bt_t *bt; - /* Protects cnt_merged and cnts_ql. */ + /* Protects nlimbo, cnt_merged, and cnts_ql. */ malloc_mutex_t *lock; + /* + * Number of threads that currently cause this ctx to be in a state of + * limbo due to one of: + * - Initializing per thread counters associated with this ctx. + * - Preparing to destroy this ctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * ctx. + */ + unsigned nlimbo; + /* Temporary storage for summation during dump. */ prof_cnt_t cnt_summed; @@ -152,6 +170,11 @@ struct prof_tdata_s { uint64_t prng_state; uint64_t threshold; uint64_t accum; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; }; #endif /* JEMALLOC_H_STRUCTS */ @@ -211,13 +234,13 @@ bool prof_boot2(void); \ assert(size == s2u(size)); \ \ - prof_tdata = *prof_tdata_tsd_get(); \ - if (prof_tdata == NULL) { \ - prof_tdata = prof_tdata_init(); \ - if (prof_tdata == NULL) { \ + prof_tdata = prof_tdata_get(); \ + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ + if (prof_tdata != NULL) \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + else \ ret = NULL; \ - break; \ - } \ + break; \ } \ \ if (opt_prof_active == false) { \ @@ -260,6 +283,7 @@ bool prof_boot2(void); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) +prof_tdata_t *prof_tdata_get(void); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -276,6 +300,22 @@ malloc_tsd_externs(prof_tdata, prof_tdata_t *) malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, prof_tdata_cleanup) +JEMALLOC_INLINE prof_tdata_t * +prof_tdata_get(void) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + prof_tdata = *prof_tdata_tsd_get(); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { + if (prof_tdata == NULL) + prof_tdata = prof_tdata_init(); + } + + return (prof_tdata); +} + JEMALLOC_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { @@ -355,7 +395,8 @@ prof_sample_accum_update(size_t size) assert(opt_lg_prof_sample != 0); prof_tdata = *prof_tdata_tsd_get(); - assert(prof_tdata != NULL); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (true); /* Take care to avoid integer overflow. */ if (size >= prof_tdata->threshold - prof_tdata->accum) { @@ -501,8 +542,9 @@ prof_free(const void *ptr, size_t size) cassert(config_prof); if ((uintptr_t)ctx > (uintptr_t)1) { + prof_thr_cnt_t *tcnt; assert(size == isalloc(ptr, true)); - prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); + tcnt = prof_lookup(ctx->bt); if (tcnt != NULL) { tcnt->epoch++; diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache.h b/contrib/jemalloc/include/jemalloc/internal/tcache.h index cfb17c28d1bd..38d735c86c81 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tcache.h +++ b/contrib/jemalloc/include/jemalloc/internal/tcache.h @@ -101,6 +101,7 @@ extern size_t nhbins; extern size_t tcache_maxclass; size_t tcache_salloc(const void *ptr); +void tcache_event_hard(tcache_t *tcache); void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind); void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, @@ -132,7 +133,7 @@ void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr); +void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind); void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #endif @@ -266,47 +267,8 @@ tcache_event(tcache_t *tcache) tcache->ev_cnt++; assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (tcache->ev_cnt == TCACHE_GC_INCR) { - size_t binind = tcache->next_gc_bin; - tcache_bin_t *tbin = &tcache->tbins[binind]; - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; - - if (tbin->low_water > 0) { - /* - * Flush (ceiling) 3/4 of the objects below the low - * water mark. - */ - if (binind < NBINS) { - tcache_bin_flush_small(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2), tcache); - } else { - tcache_bin_flush_large(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2), tcache); - } - /* - * Reduce fill count by 2X. Limit lg_fill_div such that - * the fill count is always at least 1. - */ - if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) - >= 1) - tbin->lg_fill_div++; - } else if (tbin->low_water < 0) { - /* - * Increase fill count by 2X. Make sure lg_fill_div - * stays greater than 0. - */ - if (tbin->lg_fill_div > 1) - tbin->lg_fill_div--; - } - tbin->low_water = tbin->ncached; - - tcache->next_gc_bin++; - if (tcache->next_gc_bin == nhbins) - tcache->next_gc_bin = 0; - tcache->ev_cnt = 0; - } + if (tcache->ev_cnt == TCACHE_GC_INCR) + tcache_event_hard(tcache); } JEMALLOC_INLINE void * @@ -390,13 +352,13 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } else { - if (config_prof) { + if (config_prof && prof_promote && size == PAGE) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> LG_PAGE); - chunk->map[pageind-map_bias].bits &= - ~CHUNK_MAP_CLASS_MASK; + arena_mapbits_large_binind_set(chunk, pageind, + BININD_INVALID); } if (zero == false) { if (config_fill) { @@ -421,30 +383,13 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } JEMALLOC_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr) +tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) { - arena_t *arena; - arena_chunk_t *chunk; - arena_run_t *run; - arena_bin_t *bin; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; - size_t pageind, binind; - arena_chunk_map_t *mapelm; assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapelm = &chunk->map[pageind-map_bias]; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); - bin = run->bin; - binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / - sizeof(arena_bin_t); - assert(binind < NBINS); - if (config_fill && opt_junk) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); diff --git a/contrib/jemalloc/include/jemalloc/internal/tsd.h b/contrib/jemalloc/include/jemalloc/internal/tsd.h index 20491c885282..0037cf35e703 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tsd.h +++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h @@ -74,6 +74,10 @@ extern bool a_name##_booted; extern __thread a_type a_name##_tls; \ extern pthread_key_t a_name##_tsd; \ extern bool a_name##_booted; +#elif (defined(_WIN32)) +#define malloc_tsd_externs(a_name, a_type) \ +extern DWORD a_name##_tsd; \ +extern bool a_name##_booted; #else #define malloc_tsd_externs(a_name, a_type) \ extern pthread_key_t a_name##_tsd; \ @@ -94,6 +98,10 @@ a_attr __thread a_type JEMALLOC_TLS_MODEL \ a_name##_tls = a_initializer; \ a_attr pthread_key_t a_name##_tsd; \ a_attr bool a_name##_booted = false; +#elif (defined(_WIN32)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr DWORD a_name##_tsd; \ +a_attr bool a_name##_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr pthread_key_t a_name##_tsd; \ @@ -182,6 +190,99 @@ a_name##_tsd_set(a_type *val) \ } \ } \ } +#elif (defined(_WIN32)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_cleanup_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \ + if (wrapper == NULL) \ + return (false); \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + a_type val = wrapper->val; \ + a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + a_cleanup(&val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + return (true); \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ + return (false); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + a_name##_tsd = TlsAlloc(); \ + if (a_name##_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_wrapper); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + TlsGetValue(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} #else #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ diff --git a/contrib/jemalloc/include/jemalloc/internal/util.h b/contrib/jemalloc/include/jemalloc/internal/util.h index d360ae3f5a8f..8479693631ab 100644 --- a/contrib/jemalloc/include/jemalloc/internal/util.h +++ b/contrib/jemalloc/include/jemalloc/internal/util.h @@ -82,10 +82,9 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern void (*je_malloc_message)(void *wcbopaque, const char *s); - -int buferror(int errnum, char *buf, size_t buflen); +int buferror(char *buf, size_t buflen); uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); +void malloc_write(const char *s); /* * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating @@ -109,6 +108,8 @@ void malloc_printf(const char *format, ...) #ifndef JEMALLOC_ENABLE_INLINE size_t pow2_ceil(size_t x); void malloc_write(const char *s); +void set_errno(int errnum); +int get_errno(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) @@ -130,15 +131,28 @@ pow2_ceil(size_t x) return (x); } -/* - * Wrapper around malloc_message() that avoids the need for - * je_malloc_message(...) throughout the code. - */ +/* Sets error code */ JEMALLOC_INLINE void -malloc_write(const char *s) +set_errno(int errnum) { - je_malloc_message(NULL, s); +#ifdef _WIN32 + SetLastError(errnum); +#else + errno = errnum; +#endif +} + +/* Get last error code */ +JEMALLOC_INLINE int +get_errno(void) +{ + +#ifdef _WIN32 + return (GetLastError()); +#else + return (errno); +#endif } #endif diff --git a/contrib/jemalloc/include/jemalloc/jemalloc.h b/contrib/jemalloc/include/jemalloc/jemalloc.h index 6322d9773c17..dbb738668b3d 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc/include/jemalloc/jemalloc.h @@ -7,12 +7,12 @@ extern "C" { #include #include -#define JEMALLOC_VERSION "1.0.0-286-ga8f8d7540d66ddee7337db80c92890916e1063ca" +#define JEMALLOC_VERSION "1.0.0-335-g37b6f95dcd866f51c91488531a2efc3ed4c2b754" #define JEMALLOC_VERSION_MAJOR 1 #define JEMALLOC_VERSION_MINOR 0 #define JEMALLOC_VERSION_BUGFIX 0 -#define JEMALLOC_VERSION_NREV 286 -#define JEMALLOC_VERSION_GID "a8f8d7540d66ddee7337db80c92890916e1063ca" +#define JEMALLOC_VERSION_NREV 335 +#define JEMALLOC_VERSION_GID "37b6f95dcd866f51c91488531a2efc3ed4c2b754" #include "jemalloc_defs.h" #include "jemalloc_FreeBSD.h" @@ -37,35 +37,49 @@ extern "C" { * namespace management, and should be omitted in application code unless * JEMALLOC_NO_DEMANGLE is defined (see below). */ -extern const char *je_malloc_conf; -extern void (*je_malloc_message)(void *, const char *); +extern JEMALLOC_EXPORT const char *je_malloc_conf; +extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, + const char *s); -void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); -void *je_calloc(size_t num, size_t size) JEMALLOC_ATTR(malloc); -int je_posix_memalign(void **memptr, size_t alignment, size_t size) - JEMALLOC_ATTR(nonnull(1)); -void *je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_ATTR(malloc); -void *je_realloc(void *ptr, size_t size); -void je_free(void *ptr); +JEMALLOC_EXPORT void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_calloc(size_t num, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT int je_posix_memalign(void **memptr, size_t alignment, + size_t size) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); +JEMALLOC_EXPORT void je_free(void *ptr); -size_t je_malloc_usable_size(const void *ptr); -void je_malloc_stats_print(void (*write_cb)(void *, const char *), - void *je_cbopaque, const char *opts); -int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen); -int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp); -int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); +#endif + +JEMALLOC_EXPORT size_t je_malloc_usable_size(const void *ptr); +JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, + const char *), void *je_cbopaque, const char *opts); +JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, + size_t *miblenp); +JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); #ifdef JEMALLOC_EXPERIMENTAL -int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) - JEMALLOC_ATTR(nonnull(1)); -int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, +JEMALLOC_EXPORT int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_ATTR(nonnull(1)); -int je_sallocm(const void *ptr, size_t *rsize, int flags) +JEMALLOC_EXPORT int je_rallocm(void **ptr, size_t *rsize, size_t size, + size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_sallocm(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); -int je_dallocm(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); -int je_nallocm(size_t *rsize, size_t size, int flags); +JEMALLOC_EXPORT int je_dallocm(void *ptr, int flags) + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); #endif /* diff --git a/contrib/jemalloc/include/jemalloc/jemalloc_defs.h b/contrib/jemalloc/include/jemalloc/jemalloc_defs.h index 7ef7dcfbed2d..85571b9a6594 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc_defs.h +++ b/contrib/jemalloc/include/jemalloc/jemalloc_defs.h @@ -105,11 +105,27 @@ /* Defined if __attribute__((...)) syntax is supported. */ #define JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_CATTR(s, a) __attribute__((s)) -# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +#elif _MSC_VER +# define JEMALLOC_ATTR(s) +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_NOINLINE __declspec(noinline) #else -# define JEMALLOC_CATTR(s, a) a -# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) +# define JEMALLOC_ATTR(s) +# define JEMALLOC_EXPORT +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_SECTION(s) +# define JEMALLOC_NOINLINE #endif /* Defined if sbrk() is supported. */ @@ -178,12 +194,18 @@ /* * If defined, use munmap() to unmap freed chunks, rather than storing them for - * later reuse. This is automatically disabled if configuration determines - * that common sequences of mmap()/munmap() calls will cause virtual memory map - * holes. + * later reuse. This is disabled by default on Linux because common sequences + * of mmap()/munmap() calls will cause virtual memory map holes. */ #define JEMALLOC_MUNMAP +/* + * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is + * disabled by default because it is Linux-specific and it will cause virtual + * memory map holes, much like munmap(2) does. + */ +/* #undef JEMALLOC_MREMAP */ + /* TLS is used to map arenas and magazine caches to threads. */ #define JEMALLOC_TLS @@ -206,9 +228,6 @@ /* #undef JEMALLOC_ZONE */ /* #undef JEMALLOC_ZONE_VERSION */ -/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */ -/* #undef JEMALLOC_MREMAP_FIXED */ - /* * Methods for purging unused pages differ between operating systems. * diff --git a/contrib/jemalloc/src/arena.c b/contrib/jemalloc/src/arena.c index 6f28abe94d44..9f24e7caf61b 100644 --- a/contrib/jemalloc/src/arena.c +++ b/contrib/jemalloc/src/arena.c @@ -7,7 +7,7 @@ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; arena_bin_info_t arena_bin_info[NBINS]; -JEMALLOC_ATTR(aligned(CACHELINE)) +JEMALLOC_ALIGNED(CACHELINE) const uint8_t small_size2bin[] = { #define S2B_8(i) i, #define S2B_16(i) S2B_8(i) S2B_8(i) @@ -41,11 +41,11 @@ const uint8_t small_size2bin[] = { /* Function prototypes for non-inline static functions. */ static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, - bool large, bool zero); + bool large, size_t binind, bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, - bool zero); + size_t binind, bool zero); static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, @@ -152,7 +152,9 @@ static inline void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t binind = arena_bin_index(chunk->arena, run->bin); + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t mapbits = arena_mapbits_get(chunk, pageind); + size_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + @@ -184,28 +186,31 @@ arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, - bool zero) + size_t binind, bool zero) { arena_chunk_t *chunk; size_t run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; + assert((large && binind == BININD_INVALID) || (large == false && binind + != BININD_INVALID)); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; + flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : &arena->runs_avail_clean; - total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> + total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> LG_PAGE; - assert((chunk->map[run_ind+total_pages-1-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty); + assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == + flag_dirty); need_pages = (size >> LG_PAGE); assert(need_pages > 0); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); + arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind)); if (config_stats) { /* * Update stats_cactive if nactive is crossing a chunk @@ -222,22 +227,23 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { if (flag_dirty != 0) { - chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; - chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; + arena_mapbits_unallocated_set(chunk, run_ind+need_pages, + (rem_pages << LG_PAGE), CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + CHUNK_MAP_DIRTY); } else { - chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << LG_PAGE) | - (chunk->map[run_ind+need_pages-map_bias].bits & - CHUNK_MAP_UNZEROED); - chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << LG_PAGE) | - (chunk->map[run_ind+total_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED); + arena_mapbits_unallocated_set(chunk, run_ind+need_pages, + (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages)); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+total_pages-1)); } - arena_avail_tree_insert(runs_avail, - &chunk->map[run_ind+need_pages-map_bias]); + arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, + run_ind+need_pages)); } /* Update dirty page accounting. */ @@ -258,8 +264,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * zeroed (i.e. never before touched). */ for (i = 0; i < need_pages; i++) { - if ((chunk->map[run_ind+i-map_bias].bits - & CHUNK_MAP_UNZEROED) != 0) { + if (arena_mapbits_unzeroed_get(chunk, + run_ind+i) != 0) { VALGRIND_MAKE_MEM_UNDEFINED( (void *)((uintptr_t) chunk + ((run_ind+i) << @@ -293,10 +299,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * Set the last element first, in case the run only contains one * page (i.e. both statements set the same element). */ - chunk->map[run_ind+need_pages-1-map_bias].bits = - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty; - chunk->map[run_ind-map_bias].bits = size | flag_dirty | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, + flag_dirty); + arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); } else { assert(zero == false); /* @@ -304,34 +309,30 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * small run, so that arena_dalloc_bin_run() has the ability to * conditionally trim clean pages. */ - chunk->map[run_ind-map_bias].bits = - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | - CHUNK_MAP_ALLOCATED | flag_dirty; + arena_mapbits_small_set(chunk, run_ind, 0, binind, + arena_mapbits_unzeroed_get(chunk, run_ind) | flag_dirty); /* * The first page will always be dirtied during small run * initialization, so a validation failure here would not * actually cause an observable failure. */ if (config_debug && flag_dirty == 0 && - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) - == 0) + arena_mapbits_unzeroed_get(chunk, run_ind) == 0) arena_chunk_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { - chunk->map[run_ind+i-map_bias].bits = (i << LG_PAGE) - | (chunk->map[run_ind+i-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; + arena_mapbits_small_set(chunk, run_ind+i, i, + binind, arena_mapbits_unzeroed_get(chunk, + run_ind+i)); if (config_debug && flag_dirty == 0 && - (chunk->map[run_ind+i-map_bias].bits & - CHUNK_MAP_UNZEROED) == 0) + arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) arena_chunk_validate_zeroed(chunk, run_ind+i); } - chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages - - 1) << LG_PAGE) | - (chunk->map[run_ind+need_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; + arena_mapbits_small_set(chunk, run_ind+need_pages-1, + need_pages-1, binind, arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages-1) | flag_dirty); if (config_debug && flag_dirty == 0 && - (chunk->map[run_ind+need_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) == 0) { + arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == + 0) { arena_chunk_validate_zeroed(chunk, run_ind+need_pages-1); } @@ -351,17 +352,18 @@ arena_chunk_alloc(arena_t *arena) arena->spare = NULL; /* Insert the run into the appropriate runs_avail_* tree. */ - if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + if (arena_mapbits_dirty_get(chunk, map_bias) == 0) runs_avail = &arena->runs_avail_clean; else runs_avail = &arena->runs_avail_dirty; - assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass); - assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK) - == arena_maxclass); - assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) == - (chunk->map[chunk_npages-1-map_bias].bits & - CHUNK_MAP_DIRTY)); - arena_avail_tree_insert(runs_avail, &chunk->map[0]); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); + assert(arena_mapbits_unallocated_size_get(chunk, + chunk_npages-1) == arena_maxclass); + assert(arena_mapbits_dirty_get(chunk, map_bias) == + arena_mapbits_dirty_get(chunk, chunk_npages-1)); + arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, + map_bias)); } else { bool zero; size_t unzeroed; @@ -392,24 +394,27 @@ arena_chunk_alloc(arena_t *arena) * chunk. */ unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - chunk->map[0].bits = arena_maxclass | unzeroed; + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, + unzeroed); /* * There is no need to initialize the internal page map entries * unless the chunk is not zeroed. */ if (zero == false) { for (i = map_bias+1; i < chunk_npages-1; i++) - chunk->map[i-map_bias].bits = unzeroed; + arena_mapbits_unzeroed_set(chunk, i, unzeroed); } else if (config_debug) { - for (i = map_bias+1; i < chunk_npages-1; i++) - assert(chunk->map[i-map_bias].bits == unzeroed); + for (i = map_bias+1; i < chunk_npages-1; i++) { + assert(arena_mapbits_unzeroed_get(chunk, i) == + unzeroed); + } } - chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | - unzeroed; + arena_mapbits_unallocated_set(chunk, chunk_npages-1, + arena_maxclass, unzeroed); /* Insert the run into the runs_avail_clean tree. */ arena_avail_tree_insert(&arena->runs_avail_clean, - &chunk->map[0]); + arena_mapp_get(chunk, map_bias)); } return (chunk); @@ -424,11 +429,11 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) * Remove run from the appropriate runs_avail_* tree, so that the arena * does not use it. */ - if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + if (arena_mapbits_dirty_get(chunk, map_bias) == 0) runs_avail = &arena->runs_avail_clean; else runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_remove(runs_avail, &chunk->map[0]); + arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias)); if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; @@ -449,7 +454,8 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) } static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) +arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, + bool zero) { arena_chunk_t *chunk; arena_run_t *run; @@ -457,6 +463,8 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) assert(size <= arena_maxclass); assert((size & PAGE_MASK) == 0); + assert((large && binind == BININD_INVALID) || (large == false && binind + != BININD_INVALID)); /* Search the arena's chunks for the lowest best fit. */ key.bits = size | CHUNK_MAP_KEY; @@ -469,7 +477,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); @@ -481,7 +489,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } @@ -491,7 +499,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) chunk = arena_chunk_alloc(arena); if (chunk != NULL) { run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } @@ -509,7 +517,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); @@ -521,7 +529,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } @@ -579,40 +587,38 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) * run. */ if (chunk == arena->spare) { - assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0); + assert(arena_mapbits_dirty_get(chunk, map_bias) != 0); arena_chunk_alloc(arena); } /* Temporarily allocate all free dirty runs within chunk. */ for (pageind = map_bias; pageind < chunk_npages;) { - mapelm = &chunk->map[pageind-map_bias]; - if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { + mapelm = arena_mapp_get(chunk, pageind); + if (arena_mapbits_allocated_get(chunk, pageind) == 0) { size_t npages; - npages = mapelm->bits >> LG_PAGE; + npages = arena_mapbits_unallocated_size_get(chunk, + pageind) >> LG_PAGE; assert(pageind + npages <= chunk_npages); - if (mapelm->bits & CHUNK_MAP_DIRTY) { + if (arena_mapbits_dirty_get(chunk, pageind)) { size_t i; arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); - mapelm->bits = (npages << LG_PAGE) | - flag_unzeroed | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, pageind, + (npages << LG_PAGE), flag_unzeroed); /* * Update internal elements in the page map, so * that CHUNK_MAP_UNZEROED is properly set. */ for (i = 1; i < npages - 1; i++) { - chunk->map[pageind+i-map_bias].bits = - flag_unzeroed; + arena_mapbits_unzeroed_set(chunk, + pageind+i, flag_unzeroed); } if (npages > 1) { - chunk->map[ - pageind+npages-1-map_bias].bits = - flag_unzeroed | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, + pageind+npages-1, 0, flag_unzeroed); } if (config_stats) { @@ -637,17 +643,19 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) pageind += npages; } else { /* Skip allocated run. */ - if (mapelm->bits & CHUNK_MAP_LARGE) - pageind += mapelm->bits >> LG_PAGE; + if (arena_mapbits_large_get(chunk, pageind)) + pageind += arena_mapbits_large_size_get(chunk, + pageind) >> LG_PAGE; else { + size_t binind; + arena_bin_info_t *bin_info; arena_run_t *run = (arena_run_t *)((uintptr_t) chunk + (uintptr_t)(pageind << LG_PAGE)); - assert((mapelm->bits >> LG_PAGE) == 0); - size_t binind = arena_bin_index(arena, - run->bin); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; + assert(arena_mapbits_small_runind_get(chunk, + pageind) == 0); + binind = arena_bin_index(arena, run->bin); + bin_info = &arena_bin_info[binind]; pageind += bin_info->run_size >> LG_PAGE; } } @@ -669,7 +677,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_foreach(mapelm, &mapelms, u.ql_link) { size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; - size_t npages = mapelm->bits >> LG_PAGE; + size_t npages = arena_mapbits_large_size_get(chunk, pageind) >> + LG_PAGE; assert(pageind + npages <= chunk_npages); assert(ndirty >= npages); @@ -806,15 +815,11 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); assert(run_ind >= map_bias); assert(run_ind < chunk_npages); - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { - size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; + if (arena_mapbits_large_get(chunk, run_ind) != 0) { + size = arena_mapbits_large_size_get(chunk, run_ind); assert(size == PAGE || - (chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & - ~PAGE_MASK) == 0); - assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & - CHUNK_MAP_LARGE) != 0); - assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) != 0); + arena_mapbits_large_size_get(chunk, + run_ind+(size>>LG_PAGE)-1) == 0); } else { size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; @@ -837,7 +842,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * The run is dirty if the caller claims to have dirtied it, as well as * if it was already dirty before being allocated. */ - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0) + if (arena_mapbits_dirty_get(chunk, run_ind) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; runs_avail = dirty ? &arena->runs_avail_dirty : @@ -845,58 +850,52 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) /* Mark pages as unallocated in the chunk map. */ if (dirty) { - chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY; - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - CHUNK_MAP_DIRTY; + arena_mapbits_unallocated_set(chunk, run_ind, size, + CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + CHUNK_MAP_DIRTY); chunk->ndirty += run_pages; arena->ndirty += run_pages; } else { - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED); + arena_mapbits_unallocated_set(chunk, run_ind, size, + arena_mapbits_unzeroed_get(chunk, run_ind)); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); } /* Try to coalesce forward. */ if (run_ind + run_pages < chunk_npages && - (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED) - == 0 && (chunk->map[run_ind+run_pages-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty) { - size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & - ~PAGE_MASK; + arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 && + arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) { + size_t nrun_size = arena_mapbits_unallocated_size_get(chunk, + run_ind+run_pages); size_t nrun_pages = nrun_size >> LG_PAGE; /* * Remove successor from runs_avail; the coalesced run is * inserted later. */ - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & ~PAGE_MASK) == nrun_size); - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & CHUNK_MAP_ALLOCATED) == 0); - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & CHUNK_MAP_DIRTY) == flag_dirty); + assert(arena_mapbits_unallocated_size_get(chunk, + run_ind+run_pages+nrun_pages-1) == nrun_size); + assert(arena_mapbits_dirty_get(chunk, + run_ind+run_pages+nrun_pages-1) == flag_dirty); arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind+run_pages-map_bias]); + arena_mapp_get(chunk, run_ind+run_pages)); size += nrun_size; run_pages += nrun_pages; - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); + arena_mapbits_unallocated_size_set(chunk, run_ind, size); + arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, + size); } /* Try to coalesce backward. */ - if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty) { - size_t prun_size = chunk->map[run_ind-1-map_bias].bits & - ~PAGE_MASK; + if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1) + == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == flag_dirty) { + size_t prun_size = arena_mapbits_unallocated_size_get(chunk, + run_ind-1); size_t prun_pages = prun_size >> LG_PAGE; run_ind -= prun_pages; @@ -905,31 +904,26 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * Remove predecessor from runs_avail; the coalesced run is * inserted later. */ - assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) - == prun_size); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED) - == 0); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) - == flag_dirty); - arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind-map_bias]); + assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == + prun_size); + assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); + arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, + run_ind)); size += prun_size; run_pages += prun_pages; - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); + arena_mapbits_unallocated_size_set(chunk, run_ind, size); + arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, + size); } /* Insert into runs_avail, now that coalescing is complete. */ - assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) == - (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK)); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == - (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY)); - arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]); + assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == + arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); + arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind)); if (dirty) { /* @@ -943,14 +937,15 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) } } - /* - * Deallocate chunk if it is now completely unused. The bit - * manipulation checks whether the first run is unallocated and extends - * to the end of the chunk. - */ - if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) == - arena_maxclass) + /* Deallocate chunk if it is now completely unused. */ + if (size == arena_maxclass) { + assert(run_ind == map_bias); + assert(run_pages == (arena_maxclass >> LG_PAGE)); + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); arena_chunk_dealloc(arena, chunk); + } /* * It is okay to do dirty page processing here even if the chunk was @@ -969,7 +964,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; size_t head_npages = (oldsize - newsize) >> LG_PAGE; - size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; + size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); assert(oldsize > newsize); @@ -978,29 +973,21 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * leading run as separately allocated. Set the last element of each * run first, in case of single-page runs. */ - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | - (chunk->map[pageind+head_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind-map_bias].bits = (oldsize - newsize) - | flag_dirty | (chunk->map[pageind-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); + arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind)); if (config_debug) { UNUSED size_t tail_npages = newsize >> LG_PAGE; - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & ~PAGE_MASK) == 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_DIRTY) == flag_dirty); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_ALLOCATED) != 0); + assert(arena_mapbits_large_size_get(chunk, + pageind+head_npages+tail_npages-1) == 0); + assert(arena_mapbits_dirty_get(chunk, + pageind+head_npages+tail_npages-1) == flag_dirty); } - chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | - (chunk->map[pageind+head_npages-map_bias].bits & - CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty + | arena_mapbits_unzeroed_get(chunk, pageind+head_npages)); arena_run_dalloc(arena, run, false); } @@ -1011,9 +998,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; size_t head_npages = newsize >> LG_PAGE; - size_t tail_npages = (oldsize - newsize) >> LG_PAGE; - size_t flag_dirty = chunk->map[pageind-map_bias].bits & - CHUNK_MAP_DIRTY; + size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); assert(oldsize > newsize); @@ -1022,28 +1007,22 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trailing run as separately allocated. Set the last element of each * run first, in case of single-page runs. */ - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | - (chunk->map[pageind+head_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind-map_bias].bits = newsize | flag_dirty | - (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); + arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind)); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - ~PAGE_MASK) == 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits = - flag_dirty | - (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) | - flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + if (config_debug) { + UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE; + assert(arena_mapbits_large_size_get(chunk, + pageind+head_npages+tail_npages-1) == 0); + assert(arena_mapbits_dirty_get(chunk, + pageind+head_npages+tail_npages-1) == flag_dirty); + } + arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize, + flag_dirty | arena_mapbits_unzeroed_get(chunk, + pageind+head_npages)); arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), dirty); @@ -1056,12 +1035,13 @@ arena_bin_runs_first(arena_bin_t *bin) if (mapelm != NULL) { arena_chunk_t *chunk; size_t pageind; + arena_run_t *run; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t))) + map_bias; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); return (run); } @@ -1074,7 +1054,7 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); @@ -1086,7 +1066,7 @@ arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); @@ -1125,12 +1105,14 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin_info->run_size, false, false); + run = arena_run_alloc(arena, bin_info->run_size, false, binind, false); if (run != NULL) { bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + (uintptr_t)bin_info->bitmap_offset); /* Initialize run internals. */ + VALGRIND_MAKE_MEM_UNDEFINED(run, bin_info->reg0_offset - + bin_info->redzone_size); run->bin = bin; run->nextind = 0; run->nfree = bin_info->nregs; @@ -1381,7 +1363,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) /* Large allocation. */ size = PAGE_CEILING(size); malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, size, true, zero); + ret = (void *)arena_run_alloc(arena, size, true, BININD_INVALID, zero); if (ret == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1425,7 +1407,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) alloc_size = size + alignment - PAGE; malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, alloc_size, true, zero); + run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, zero); if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1482,8 +1464,7 @@ arena_prof_promoted(const void *ptr, size_t size) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; binind = SMALL_SIZE2BIN(size); assert(binind < NBINS); - chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & - ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); + arena_mapbits_large_binind_set(chunk, pageind, binind); assert(isalloc(ptr, false) == PAGE); assert(isalloc(ptr, true) == size); @@ -1521,8 +1502,9 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t npages, run_ind, past; assert(run != bin->runcur); - assert(arena_run_tree_search(&bin->runs, &chunk->map[ - (((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)-map_bias]) == NULL); + assert(arena_run_tree_search(&bin->runs, + arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) + == NULL); binind = arena_bin_index(chunk->arena, run->bin); bin_info = &arena_bin_info[binind]; @@ -1542,18 +1524,16 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trim the clean pages before deallocating the dirty portion of the * run. */ - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past - - run_ind < npages) { + if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind < + npages) { /* * Trim clean pages. Convert to large run beforehand. Set the * last map element first, in case this is a one-page run. */ - chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | - (chunk->map[run_ind+npages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind-map_bias].bits = bin_info->run_size | - CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); + arena_mapbits_large_set(chunk, run_ind+npages-1, 0, + arena_mapbits_unzeroed_get(chunk, run_ind+npages-1)); + arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, + arena_mapbits_unzeroed_get(chunk, run_ind)); arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ @@ -1588,20 +1568,21 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, } void -arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, +arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm) { size_t pageind; arena_run_t *run; arena_bin_t *bin; - size_t size; + arena_bin_info_t *bin_info; + size_t size, binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); bin = run->bin; - size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + binind = arena_ptr_small_binind_get(ptr, mapelm->bits); + bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; @@ -1621,6 +1602,35 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, } } +void +arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_t *mapelm) +{ + arena_run_t *run; + arena_bin_t *bin; + + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); + bin = run->bin; + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin_locked(arena, chunk, ptr, mapelm); + malloc_mutex_unlock(&bin->lock); +} + +void +arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind) +{ + arena_chunk_map_t *mapelm; + + if (config_debug) { + /* arena_ptr_small_binind_get() does extra sanity checking. */ + assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)) != BININD_INVALID); + } + mapelm = arena_mapp_get(chunk, pageind); + arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); +} void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, @@ -1669,12 +1679,12 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, } void -arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) { if (config_fill || config_stats) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; + size_t size = arena_mapbits_large_size_get(chunk, pageind); if (config_fill && config_stats && opt_junk) memset(ptr, 0x5a, size); @@ -1689,6 +1699,15 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_run_dalloc(arena, (arena_run_t *)ptr, true); } +void +arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + + malloc_mutex_lock(&arena->lock); + arena_dalloc_large_locked(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); +} + static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size) @@ -1727,16 +1746,15 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t npages = oldsize >> LG_PAGE; size_t followsize; - assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); + assert(oldsize == arena_mapbits_large_size_get(chunk, pageind)); /* Try to extend the run. */ assert(size + extra > oldsize); malloc_mutex_lock(&arena->lock); if (pageind + npages < chunk_npages && - (chunk->map[pageind+npages-map_bias].bits - & CHUNK_MAP_ALLOCATED) == 0 && (followsize = - chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size - - oldsize) { + arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && + (followsize = arena_mapbits_unallocated_size_get(chunk, + pageind+npages)) >= size - oldsize) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing @@ -1746,7 +1764,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t splitsize = (oldsize + followsize <= size + extra) ? followsize : size + extra - oldsize; arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << LG_PAGE)), splitsize, true, zero); + ((pageind+npages) << LG_PAGE)), splitsize, true, + BININD_INVALID, zero); size = oldsize + splitsize; npages = size >> LG_PAGE; @@ -1759,29 +1778,22 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, * arena_run_dalloc() with the dirty argument set to false * (which is when dirty flag consistency would really matter). */ - flag_dirty = (chunk->map[pageind-map_bias].bits & - CHUNK_MAP_DIRTY) | - (chunk->map[pageind+npages-1-map_bias].bits & - CHUNK_MAP_DIRTY); - chunk->map[pageind-map_bias].bits = size | flag_dirty - | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + flag_dirty = arena_mapbits_dirty_get(chunk, pageind) | + arena_mapbits_dirty_get(chunk, pageind+npages-1); + arena_mapbits_large_set(chunk, pageind, size, flag_dirty); + arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty); if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - - 1].curruns--; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } malloc_mutex_unlock(&arena->lock); @@ -1924,6 +1936,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * expectation that the extra bytes will be reliably preserved. */ copysize = (size < oldsize) ? size : oldsize; + VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); iqalloc(ptr); return (ret); diff --git a/contrib/jemalloc/src/chunk.c b/contrib/jemalloc/src/chunk.c index 5426b0275c79..6bc245447977 100644 --- a/contrib/jemalloc/src/chunk.c +++ b/contrib/jemalloc/src/chunk.c @@ -30,19 +30,30 @@ size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *chunk_recycle(size_t size, size_t alignment, bool *zero); +static void *chunk_recycle(size_t size, size_t alignment, bool base, + bool *zero); static void chunk_record(void *chunk, size_t size); /******************************************************************************/ static void * -chunk_recycle(size_t size, size_t alignment, bool *zero) +chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) { void *ret; extent_node_t *node; extent_node_t key; size_t alloc_size, leadsize, trailsize; + if (base) { + /* + * This function may need to call base_node_{,de}alloc(), but + * the current chunk allocation request is on behalf of the + * base allocator. Avoid deadlock (and if that weren't an + * issue, potential for infinite recursion) by returning NULL. + */ + return (NULL); + } + alloc_size = size + alignment - chunksize; /* Beware size_t wrap-around. */ if (alloc_size < size) @@ -57,8 +68,8 @@ chunk_recycle(size_t size, size_t alignment, bool *zero) } leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - (uintptr_t)node->addr; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; + assert(node->size >= leadsize + size); + trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); /* Remove node from the tree. */ extent_tree_szad_remove(&chunks_szad, node); @@ -123,9 +134,10 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert(size != 0); assert((size & chunksize_mask) == 0); + assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - ret = chunk_recycle(size, alignment, zero); + ret = chunk_recycle(size, alignment, base, zero); if (ret != NULL) goto label_return; @@ -168,6 +180,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) size_t i; size_t *p = (size_t *)(uintptr_t)ret; + VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); } @@ -182,50 +195,48 @@ chunk_record(void *chunk, size_t size) pages_purge(chunk, size); - xnode = NULL; + /* + * Allocate a node before acquiring chunks_mtx even though it might not + * be needed, because base_node_alloc() may cause a new base chunk to + * be allocated, which could cause deadlock if chunks_mtx were already + * held. + */ + xnode = base_node_alloc(); + malloc_mutex_lock(&chunks_mtx); - while (true) { - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. This does + * not change the position within chunks_ad, so only + * remove/insert from/into chunks_szad. + */ + extent_tree_szad_remove(&chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&chunks_szad, node); + if (xnode != NULL) + base_node_dealloc(xnode); + } else { + /* Coalescing forward failed, so insert a new node. */ + if (xnode == NULL) { /* - * Coalesce chunk with the following address range. - * This does not change the position within chunks_ad, - * so only remove/insert from/into chunks_szad. - */ - extent_tree_szad_remove(&chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&chunks_szad, node); - break; - } else if (xnode == NULL) { - /* - * It is possible that base_node_alloc() will cause a - * new base chunk to be allocated, so take care not to - * deadlock on chunks_mtx, and recover if another thread - * deallocates an adjacent chunk while this one is busy - * allocating xnode. + * base_node_alloc() failed, which is an exceedingly + * unlikely failure. Leak chunk; its pages have + * already been purged, so this is only a virtual + * memory leak. */ malloc_mutex_unlock(&chunks_mtx); - xnode = base_node_alloc(); - if (xnode == NULL) - return; - malloc_mutex_lock(&chunks_mtx); - } else { - /* Coalescing forward failed, so insert a new node. */ - node = xnode; - xnode = NULL; - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&chunks_ad, node); - extent_tree_szad_insert(&chunks_szad, node); - break; + return; } + node = xnode; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(&chunks_szad, node); } - /* Discard xnode if it ended up unused due to a race. */ - if (xnode != NULL) - base_node_dealloc(xnode); /* Try to coalesce backward. */ prev = extent_tree_ad_prev(&chunks_ad, node); diff --git a/contrib/jemalloc/src/chunk_mmap.c b/contrib/jemalloc/src/chunk_mmap.c index 9ff7480a06f6..c8da6556b098 100644 --- a/contrib/jemalloc/src/chunk_mmap.c +++ b/contrib/jemalloc/src/chunk_mmap.c @@ -7,7 +7,7 @@ static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, - bool unaligned, bool *zero); + bool *zero); /******************************************************************************/ @@ -16,6 +16,16 @@ pages_map(void *addr, size_t size) { void *ret; + assert(size != 0); + +#ifdef _WIN32 + /* + * If VirtualAlloc can't allocate at the given address when one is + * given, it fails and returns NULL. + */ + ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE, + PAGE_READWRITE); +#else /* * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. @@ -33,7 +43,7 @@ pages_map(void *addr, size_t size) if (munmap(ret, size) == -1) { char buf[BUFERROR_BUF]; - buferror(errno, buf, sizeof(buf)); + buferror(buf, sizeof(buf)); malloc_printf(": Error in munmap(): %s\n", buf); + buferror(buf, sizeof(buf)); + malloc_printf(": Error in " +#ifdef _WIN32 + "VirtualFree" +#else + "munmap" +#endif + "(): %s\n", buf); if (opt_abort) abort(); } } +static void * +pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) +{ + void *ret = (void *)((uintptr_t)addr + leadsize); + + assert(alloc_size >= leadsize + size); +#ifdef _WIN32 + { + void *new_addr; + + pages_unmap(addr, alloc_size); + new_addr = pages_map(ret, size); + if (new_addr == ret) + return (ret); + if (new_addr) + pages_unmap(new_addr, size); + return (NULL); + } +#else + { + size_t trailsize = alloc_size - leadsize - size; + + if (leadsize != 0) + pages_unmap(addr, leadsize); + if (trailsize != 0) + pages_unmap((void *)((uintptr_t)ret + size), trailsize); + return (ret); + } +#endif +} + void pages_purge(void *addr, size_t length) { -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define JEMALLOC_MADV_PURGE MADV_DONTNEED -#elif defined(JEMALLOC_PURGE_MADVISE_FREE) -# define JEMALLOC_MADV_PURGE MADV_FREE +#ifdef _WIN32 + VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE); #else -# error "No method defined for purging unused dirty pages." -#endif +# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +# elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +# else +# error "No method defined for purging unused dirty pages." +# endif madvise(addr, length, JEMALLOC_MADV_PURGE); +#endif } static void * -chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned, bool *zero) +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero) { void *ret, *pages; - size_t alloc_size, leadsize, trailsize; + size_t alloc_size, leadsize; alloc_size = size + alignment - PAGE; /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - pages = pages_map(NULL, alloc_size); - if (pages == NULL) - return (NULL); - leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - - (uintptr_t)pages; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; - ret = (void *)((uintptr_t)pages + leadsize); - if (leadsize != 0) { - /* Note that mmap() returned an unaligned mapping. */ - unaligned = true; - pages_unmap(pages, leadsize); - } - if (trailsize != 0) - pages_unmap((void *)((uintptr_t)ret + size), trailsize); + do { + pages = pages_map(NULL, alloc_size); + if (pages == NULL) + return (NULL); + leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - + (uintptr_t)pages; + ret = pages_trim(pages, alloc_size, leadsize, size); + } while (ret == NULL); assert(ret != NULL); *zero = true; @@ -117,48 +166,24 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) * NetBSD has), but in the absence of such a feature, we have to work * hard to efficiently create aligned mappings. The reliable, but * slow method is to create a mapping that is over-sized, then trim the - * excess. However, that always results in at least one call to + * excess. However, that always results in one or two calls to * pages_unmap(). * - * A more optimistic approach is to try mapping precisely the right - * amount, then try to append another mapping if alignment is off. In - * practice, this works out well as long as the application is not - * interleaving mappings via direct mmap() calls. If we do run into a - * situation where there is an interleaved mapping and we are unable to - * extend an unaligned mapping, our best option is to switch to the - * slow method until mmap() returns another aligned mapping. This will - * tend to leave a gap in the memory map that is too small to cause - * later problems for the optimistic method. - * - * Another possible confounding factor is address space layout - * randomization (ASLR), which causes mmap(2) to disregard the - * requested address. As such, repeatedly trying to extend unaligned - * mappings could result in an infinite loop, so if extension fails, - * immediately fall back to the reliable method of over-allocation - * followed by trimming. + * Optimistically try mapping precisely the right amount before falling + * back to the slow method, with the expectation that the optimistic + * approach works most of the time. */ + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); + ret = pages_map(NULL, size); if (ret == NULL) return (NULL); - offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { - /* Try to extend chunk boundary. */ - if (pages_map((void *)((uintptr_t)ret + size), chunksize - - offset) == NULL) { - /* - * Extension failed. Clean up, then fall back to the - * reliable-but-expensive method. - */ - pages_unmap(ret, size); - return (chunk_alloc_mmap_slow(size, alignment, true, - zero)); - } else { - /* Clean up unneeded leading space. */ - pages_unmap(ret, chunksize - offset); - ret = (void *)((uintptr_t)ret + (chunksize - offset)); - } + pages_unmap(ret, size); + return (chunk_alloc_mmap_slow(size, alignment, zero)); } assert(ret != NULL); diff --git a/contrib/jemalloc/src/ctl.c b/contrib/jemalloc/src/ctl.c index 98ea3d1c5b19..55e766777fd7 100644 --- a/contrib/jemalloc/src/ctl.c +++ b/contrib/jemalloc/src/ctl.c @@ -14,6 +14,32 @@ static bool ctl_initialized; static uint64_t ctl_epoch; static ctl_stats_t ctl_stats; +/******************************************************************************/ +/* Helpers for named and indexed nodes. */ + +static inline const ctl_named_node_t * +ctl_named_node(const ctl_node_t *node) +{ + + return ((node->named) ? (const ctl_named_node_t *)node : NULL); +} + +static inline const ctl_named_node_t * +ctl_named_children(const ctl_named_node_t *node, int index) +{ + const ctl_named_node_t *children = ctl_named_node(node->children); + + return (children ? &children[index] : NULL); +} + +static inline const ctl_indexed_node_t * +ctl_indexed_node(const ctl_node_t *node) +{ + + return ((node->named == false) ? (const ctl_indexed_node_t *)node : + NULL); +} + /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -22,7 +48,7 @@ static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ size_t *oldlenp, void *newp, size_t newlen); #define INDEX_PROTO(n) \ -const ctl_node_t *n##_index(const size_t *mib, size_t miblen, \ +const ctl_named_node_t *n##_index(const size_t *mib, size_t miblen, \ size_t i); static bool ctl_arena_init(ctl_arena_stats_t *astats); @@ -50,6 +76,7 @@ CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_mremap) CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) @@ -149,35 +176,39 @@ CTL_PROTO(stats_mapped) /* Maximum tree depth. */ #define CTL_MAX_DEPTH 6 -#define NAME(n) true, {.named = {n -#define CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t), c##_node}}, NULL -#define CTL(c) 0, NULL}}, c##_ctl +#define NAME(n) {true}, n +#define CHILD(t, c) \ + sizeof(c##_node) / sizeof(ctl_##t##_node_t), \ + (ctl_node_t *)c##_node, \ + NULL +#define CTL(c) 0, NULL, c##_ctl /* * Only handles internal indexed nodes, since there are currently no external * ones. */ -#define INDEX(i) false, {.indexed = {i##_index}}, NULL +#define INDEX(i) {false}, i##_index -static const ctl_node_t tcache_node[] = { +static const ctl_named_node_t tcache_node[] = { {NAME("enabled"), CTL(thread_tcache_enabled)}, {NAME("flush"), CTL(thread_tcache_flush)} }; -static const ctl_node_t thread_node[] = { +static const ctl_named_node_t thread_node[] = { {NAME("arena"), CTL(thread_arena)}, {NAME("allocated"), CTL(thread_allocated)}, {NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("deallocated"), CTL(thread_deallocated)}, {NAME("deallocatedp"), CTL(thread_deallocatedp)}, - {NAME("tcache"), CHILD(tcache)} + {NAME("tcache"), CHILD(named, tcache)} }; -static const ctl_node_t config_node[] = { +static const ctl_named_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("dss"), CTL(config_dss)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("mremap"), CTL(config_mremap)}, {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, @@ -190,7 +221,7 @@ static const ctl_node_t config_node[] = { {NAME("xmalloc"), CTL(config_xmalloc)} }; -static const ctl_node_t opt_node[] = { +static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, @@ -216,31 +247,31 @@ static const ctl_node_t opt_node[] = { {NAME("prof_accum"), CTL(opt_prof_accum)} }; -static const ctl_node_t arenas_bin_i_node[] = { +static const ctl_named_node_t arenas_bin_i_node[] = { {NAME("size"), CTL(arenas_bin_i_size)}, {NAME("nregs"), CTL(arenas_bin_i_nregs)}, {NAME("run_size"), CTL(arenas_bin_i_run_size)} }; -static const ctl_node_t super_arenas_bin_i_node[] = { - {NAME(""), CHILD(arenas_bin_i)} +static const ctl_named_node_t super_arenas_bin_i_node[] = { + {NAME(""), CHILD(named, arenas_bin_i)} }; -static const ctl_node_t arenas_bin_node[] = { +static const ctl_indexed_node_t arenas_bin_node[] = { {INDEX(arenas_bin_i)} }; -static const ctl_node_t arenas_lrun_i_node[] = { +static const ctl_named_node_t arenas_lrun_i_node[] = { {NAME("size"), CTL(arenas_lrun_i_size)} }; -static const ctl_node_t super_arenas_lrun_i_node[] = { - {NAME(""), CHILD(arenas_lrun_i)} +static const ctl_named_node_t super_arenas_lrun_i_node[] = { + {NAME(""), CHILD(named, arenas_lrun_i)} }; -static const ctl_node_t arenas_lrun_node[] = { +static const ctl_indexed_node_t arenas_lrun_node[] = { {INDEX(arenas_lrun_i)} }; -static const ctl_node_t arenas_node[] = { +static const ctl_named_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("quantum"), CTL(arenas_quantum)}, @@ -248,45 +279,45 @@ static const ctl_node_t arenas_node[] = { {NAME("tcache_max"), CTL(arenas_tcache_max)}, {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)}, - {NAME("bin"), CHILD(arenas_bin)}, + {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, - {NAME("lrun"), CHILD(arenas_lrun)}, + {NAME("lrun"), CHILD(indexed, arenas_lrun)}, {NAME("purge"), CTL(arenas_purge)} }; -static const ctl_node_t prof_node[] = { +static const ctl_named_node_t prof_node[] = { {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)}, {NAME("interval"), CTL(prof_interval)} }; -static const ctl_node_t stats_chunks_node[] = { +static const ctl_named_node_t stats_chunks_node[] = { {NAME("current"), CTL(stats_chunks_current)}, {NAME("total"), CTL(stats_chunks_total)}, {NAME("high"), CTL(stats_chunks_high)} }; -static const ctl_node_t stats_huge_node[] = { +static const ctl_named_node_t stats_huge_node[] = { {NAME("allocated"), CTL(stats_huge_allocated)}, {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, {NAME("ndalloc"), CTL(stats_huge_ndalloc)} }; -static const ctl_node_t stats_arenas_i_small_node[] = { +static const ctl_named_node_t stats_arenas_i_small_node[] = { {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} }; -static const ctl_node_t stats_arenas_i_large_node[] = { +static const ctl_named_node_t stats_arenas_i_large_node[] = { {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} }; -static const ctl_node_t stats_arenas_i_bins_j_node[] = { +static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, @@ -297,29 +328,29 @@ static const ctl_node_t stats_arenas_i_bins_j_node[] = { {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} }; -static const ctl_node_t super_stats_arenas_i_bins_j_node[] = { - {NAME(""), CHILD(stats_arenas_i_bins_j)} +static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { + {NAME(""), CHILD(named, stats_arenas_i_bins_j)} }; -static const ctl_node_t stats_arenas_i_bins_node[] = { +static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { {INDEX(stats_arenas_i_bins_j)} }; -static const ctl_node_t stats_arenas_i_lruns_j_node[] = { +static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = { {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} }; -static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { - {NAME(""), CHILD(stats_arenas_i_lruns_j)} +static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = { + {NAME(""), CHILD(named, stats_arenas_i_lruns_j)} }; -static const ctl_node_t stats_arenas_i_lruns_node[] = { +static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { {INDEX(stats_arenas_i_lruns_j)} }; -static const ctl_node_t stats_arenas_i_node[] = { +static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, @@ -327,41 +358,41 @@ static const ctl_node_t stats_arenas_i_node[] = { {NAME("npurge"), CTL(stats_arenas_i_npurge)}, {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, {NAME("purged"), CTL(stats_arenas_i_purged)}, - {NAME("small"), CHILD(stats_arenas_i_small)}, - {NAME("large"), CHILD(stats_arenas_i_large)}, - {NAME("bins"), CHILD(stats_arenas_i_bins)}, - {NAME("lruns"), CHILD(stats_arenas_i_lruns)} + {NAME("small"), CHILD(named, stats_arenas_i_small)}, + {NAME("large"), CHILD(named, stats_arenas_i_large)}, + {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)} }; -static const ctl_node_t super_stats_arenas_i_node[] = { - {NAME(""), CHILD(stats_arenas_i)} +static const ctl_named_node_t super_stats_arenas_i_node[] = { + {NAME(""), CHILD(named, stats_arenas_i)} }; -static const ctl_node_t stats_arenas_node[] = { +static const ctl_indexed_node_t stats_arenas_node[] = { {INDEX(stats_arenas_i)} }; -static const ctl_node_t stats_node[] = { +static const ctl_named_node_t stats_node[] = { {NAME("cactive"), CTL(stats_cactive)}, {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("mapped"), CTL(stats_mapped)}, - {NAME("chunks"), CHILD(stats_chunks)}, - {NAME("huge"), CHILD(stats_huge)}, - {NAME("arenas"), CHILD(stats_arenas)} + {NAME("chunks"), CHILD(named, stats_chunks)}, + {NAME("huge"), CHILD(named, stats_huge)}, + {NAME("arenas"), CHILD(indexed, stats_arenas)} }; -static const ctl_node_t root_node[] = { +static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, - {NAME("thread"), CHILD(thread)}, - {NAME("config"), CHILD(config)}, - {NAME("opt"), CHILD(opt)}, - {NAME("arenas"), CHILD(arenas)}, - {NAME("prof"), CHILD(prof)}, - {NAME("stats"), CHILD(stats)} + {NAME("thread"), CHILD(named, thread)}, + {NAME("config"), CHILD(named, config)}, + {NAME("opt"), CHILD(named, opt)}, + {NAME("arenas"), CHILD(named, arenas)}, + {NAME("prof"), CHILD(named, prof)}, + {NAME("stats"), CHILD(named, stats)} }; -static const ctl_node_t super_root_node[] = { - {NAME(""), CHILD(root)} +static const ctl_named_node_t super_root_node[] = { + {NAME(""), CHILD(named, root)} }; #undef NAME @@ -491,7 +522,7 @@ static void ctl_refresh(void) { unsigned i; - arena_t *tarenas[narenas]; + VARIABLE_ARRAY(arena_t *, tarenas, narenas); if (config_stats) { malloc_mutex_lock(&chunks_mtx); @@ -597,7 +628,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, int ret; const char *elm, *tdot, *dot; size_t elen, i, j; - const ctl_node_t *node; + const ctl_named_node_t *node; elm = name; /* Equivalent to strchrnul(). */ @@ -609,21 +640,21 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } node = super_root_node; for (i = 0; i < *depthp; i++) { - assert(node->named); - assert(node->u.named.nchildren > 0); - if (node->u.named.children[0].named) { - const ctl_node_t *pnode = node; + assert(node); + assert(node->nchildren > 0); + if (ctl_named_node(node->children) != NULL) { + const ctl_named_node_t *pnode = node; /* Children are named. */ - for (j = 0; j < node->u.named.nchildren; j++) { - const ctl_node_t *child = - &node->u.named.children[j]; - if (strlen(child->u.named.name) == elen - && strncmp(elm, child->u.named.name, - elen) == 0) { + for (j = 0; j < node->nchildren; j++) { + const ctl_named_node_t *child = + ctl_named_children(node, j); + if (strlen(child->name) == elen && + strncmp(elm, child->name, elen) == 0) { node = child; if (nodesp != NULL) - nodesp[i] = node; + nodesp[i] = + (const ctl_node_t *)node; mibp[i] = j; break; } @@ -634,7 +665,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } } else { uintmax_t index; - const ctl_node_t *inode; + const ctl_indexed_node_t *inode; /* Children are indexed. */ index = malloc_strtoumax(elm, NULL, 10); @@ -643,16 +674,15 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, goto label_return; } - inode = &node->u.named.children[0]; - node = inode->u.indexed.index(mibp, *depthp, - (size_t)index); + inode = ctl_indexed_node(node->children); + node = inode->index(mibp, *depthp, (size_t)index); if (node == NULL) { ret = ENOENT; goto label_return; } if (nodesp != NULL) - nodesp[i] = node; + nodesp[i] = (const ctl_node_t *)node; mibp[i] = (size_t)index; } @@ -696,6 +726,7 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t depth; ctl_node_t const *nodes[CTL_MAX_DEPTH]; size_t mib[CTL_MAX_DEPTH]; + const ctl_named_node_t *node; if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; @@ -707,13 +738,14 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, if (ret != 0) goto label_return; - if (nodes[depth-1]->ctl == NULL) { + node = ctl_named_node(nodes[depth-1]); + if (node != NULL && node->ctl) + ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen); + else { /* The name refers to a partial path through the ctl tree. */ ret = ENOENT; - goto label_return; } - ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); label_return: return(ret); } @@ -738,7 +770,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - const ctl_node_t *node; + const ctl_named_node_t *node; size_t i; if (ctl_initialized == false && ctl_init()) { @@ -749,19 +781,21 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Iterate down the tree. */ node = super_root_node; for (i = 0; i < miblen; i++) { - if (node->u.named.children[0].named) { + assert(node); + assert(node->nchildren > 0); + if (ctl_named_node(node->children) != NULL) { /* Children are named. */ - if (node->u.named.nchildren <= mib[i]) { + if (node->nchildren <= mib[i]) { ret = ENOENT; goto label_return; } - node = &node->u.named.children[mib[i]]; + node = ctl_named_children(node, mib[i]); } else { - const ctl_node_t *inode; + const ctl_indexed_node_t *inode; /* Indexed element. */ - inode = &node->u.named.children[0]; - node = inode->u.indexed.index(mib, miblen, mib[i]); + inode = ctl_indexed_node(node->children); + node = inode->index(mib, miblen, mib[i]); if (node == NULL) { ret = ENOENT; goto label_return; @@ -770,12 +804,12 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Call the ctl function. */ - if (node->ctl == NULL) { + if (node && node->ctl) + ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); + else { /* Partial MIB. */ ret = ENOENT; - goto label_return; } - ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); label_return: return(ret); @@ -799,22 +833,17 @@ ctl_boot(void) #define READONLY() do { \ if (newp != NULL || newlen != 0) { \ ret = EPERM; \ - goto label_return; \ + goto label_return; \ } \ } while (0) #define WRITEONLY() do { \ if (oldp != NULL || oldlenp != NULL) { \ ret = EPERM; \ - goto label_return; \ + goto label_return; \ } \ } while (0) -#define VOID() do { \ - READONLY(); \ - WRITEONLY(); \ -} while (0) - #define READ(v, t) do { \ if (oldp != NULL && oldlenp != NULL) { \ if (*oldlenp != sizeof(t)) { \ @@ -822,7 +851,7 @@ ctl_boot(void) ? sizeof(t) : *oldlenp; \ memcpy(oldp, (void *)&v, copylen); \ ret = EINVAL; \ - goto label_return; \ + goto label_return; \ } else \ *(t *)oldp = v; \ } \ @@ -832,7 +861,7 @@ ctl_boot(void) if (newp != NULL) { \ if (newlen != sizeof(t)) { \ ret = EINVAL; \ - goto label_return; \ + goto label_return; \ } \ v = *(t *)newp; \ } \ @@ -859,7 +888,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ if (l) \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ @@ -881,7 +910,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -900,7 +929,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -924,7 +953,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ return (ret); \ } @@ -941,7 +970,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ return (ret); \ } @@ -958,7 +987,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, bool); \ \ ret = 0; \ -label_return: \ +label_return: \ return (ret); \ } @@ -972,9 +1001,8 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, uint64_t newval; malloc_mutex_lock(&ctl_mtx); - newval = 0; WRITE(newval, uint64_t); - if (newval != 0) + if (newp != NULL) ctl_refresh(); READ(ctl_epoch, uint64_t); @@ -1018,7 +1046,8 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, if (config_tcache == false) return (ENOENT); - VOID(); + READONLY(); + WRITEONLY(); tcache_flush(); @@ -1091,6 +1120,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_debug) CTL_RO_BOOL_CONFIG_GEN(config_dss) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_mremap) CTL_RO_BOOL_CONFIG_GEN(config_munmap) CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) @@ -1133,7 +1163,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -const ctl_node_t * +const ctl_named_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1143,7 +1173,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) -const ctl_node_t * +const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1201,7 +1231,7 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = EFAULT; goto label_return; } else { - arena_t *tarenas[narenas]; + VARIABLE_ARRAY(arena_t *, tarenas, narenas); malloc_mutex_lock(&arenas_lock); memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); @@ -1326,7 +1356,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) -const ctl_node_t * +const ctl_named_node_t * stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1344,7 +1374,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -const ctl_node_t * +const ctl_named_node_t * stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1365,10 +1395,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, CTL_RO_CGEN(config_stats, stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, uint64_t) -const ctl_node_t * +const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { - const ctl_node_t * ret; + const ctl_named_node_t * ret; malloc_mutex_lock(&ctl_mtx); if (ctl_stats.arenas[i].initialized == false) { diff --git a/contrib/jemalloc/src/huge.c b/contrib/jemalloc/src/huge.c index 23eb074ac53b..8a4ec942410e 100644 --- a/contrib/jemalloc/src/huge.c +++ b/contrib/jemalloc/src/huge.c @@ -140,11 +140,11 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ copysize = (size < oldsize) ? size : oldsize; +#ifdef JEMALLOC_MREMAP /* * Use mremap(2) if this is a huge-->huge reallocation, and neither the * source nor the destination are in dss. */ -#ifdef JEMALLOC_MREMAP_FIXED if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false))) { size_t newsize = huge_salloc(ret); @@ -168,7 +168,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ char buf[BUFERROR_BUF]; - buferror(errno, buf, sizeof(buf)); + buferror(buf, sizeof(buf)); malloc_printf(": Error in mremap(): %s\n", buf); if (opt_abort) diff --git a/contrib/jemalloc/src/jemalloc.c b/contrib/jemalloc/src/jemalloc.c index 9292d26f6fb0..cdf622237ba2 100644 --- a/contrib/jemalloc/src/jemalloc.c +++ b/contrib/jemalloc/src/jemalloc.c @@ -13,7 +13,7 @@ const char *__malloc_options_1_0 = NULL; __sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0); /* Runtime configuration options. */ -const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); +const char *je_malloc_conf; #ifdef JEMALLOC_DEBUG bool opt_abort = true; # ifdef JEMALLOC_FILL @@ -56,7 +56,26 @@ static bool malloc_initializer = NO_INITIALIZER; #endif /* Used to avoid initialization races. */ +#ifdef _WIN32 +static malloc_mutex_t init_lock; + +JEMALLOC_ATTR(constructor) +static void WINAPI +_init_init_lock(void) +{ + + malloc_mutex_init(&init_lock); +} + +#ifdef _MSC_VER +# pragma section(".CRT$XCU", read) +JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used) +static const void (WINAPI *init_init_lock)(void) = _init_init_lock; +#endif + +#else static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; +#endif typedef struct { void *p; /* Input pointer (as in realloc(p, s)). */ @@ -233,11 +252,17 @@ malloc_ncpus(void) unsigned ret; long result; +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + result = si.dwNumberOfProcessors; +#else result = sysconf(_SC_NPROCESSORS_ONLN); if (result == -1) { /* Error. */ ret = 1; } +#endif ret = (unsigned)result; return (ret); @@ -373,13 +398,14 @@ malloc_conf_init(void) } break; case 1: { +#ifndef _WIN32 int linklen; const char *linkname = -#ifdef JEMALLOC_PREFIX +# ifdef JEMALLOC_PREFIX "/etc/"JEMALLOC_PREFIX"malloc.conf" -#else +# else "/etc/malloc.conf" -#endif +# endif ; if ((linklen = readlink(linkname, buf, @@ -390,7 +416,9 @@ malloc_conf_init(void) */ buf[linklen] = '\0'; opts = buf; - } else { + } else +#endif + { /* No configuration specified. */ buf[0] = '\0'; opts = buf; @@ -456,9 +484,9 @@ malloc_conf_init(void) uintmax_t um; \ char *end; \ \ - errno = 0; \ + set_errno(0); \ um = malloc_strtoumax(v, &end, 0); \ - if (errno != 0 || (uintptr_t)end - \ + if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ malloc_conf_error( \ "Invalid conf value", \ @@ -477,9 +505,9 @@ malloc_conf_init(void) long l; \ char *end; \ \ - errno = 0; \ + set_errno(0); \ l = strtol(v, &end, 0); \ - if (errno != 0 || (uintptr_t)end - \ + if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ malloc_conf_error( \ "Invalid conf value", \ @@ -615,7 +643,8 @@ malloc_init_hard(void) malloc_conf_init(); -#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE)) +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ + && !defined(_WIN32)) /* Register fork handlers. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { @@ -770,13 +799,11 @@ malloc_init_hard(void) * Begin malloc(3)-compatible functions. */ -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_malloc(size_t size) { void *ret; - size_t usize; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { @@ -814,7 +841,7 @@ je_malloc(size_t size) "out of memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); @@ -921,8 +948,6 @@ imemalign(void **memptr, size_t alignment, size_t size, return (ret); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_posix_memalign(void **memptr, size_t alignment, size_t size) { @@ -932,8 +957,6 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) return (ret); } -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_aligned_alloc(size_t alignment, size_t size) { @@ -942,21 +965,19 @@ je_aligned_alloc(size_t alignment, size_t size) if ((err = imemalign(&ret, alignment, size, 1)) != 0) { ret = NULL; - errno = err; + set_errno(err); } JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), false); return (ret); } -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_calloc(size_t num, size_t size) { void *ret; size_t num_size; - size_t usize; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { @@ -1012,7 +1033,7 @@ je_calloc(size_t num, size_t size) "memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } if (config_prof && opt_prof && ret != NULL) @@ -1026,12 +1047,11 @@ je_calloc(size_t num, size_t size) return (ret); } -JEMALLOC_ATTR(visibility("default")) void * je_realloc(void *ptr, size_t size) { void *ret; - size_t usize; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_size = 0; size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); @@ -1113,7 +1133,7 @@ je_realloc(void *ptr, size_t size) "out of memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ @@ -1155,7 +1175,7 @@ je_realloc(void *ptr, size_t size) "out of memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } } @@ -1174,7 +1194,6 @@ je_realloc(void *ptr, size_t size) return (ret); } -JEMALLOC_ATTR(visibility("default")) void je_free(void *ptr) { @@ -1209,8 +1228,6 @@ je_free(void *ptr) */ #ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_memalign(size_t alignment, size_t size) { @@ -1222,8 +1239,6 @@ je_memalign(size_t alignment, size_t size) #endif #ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_valloc(size_t size) { @@ -1252,17 +1267,12 @@ je_valloc(size_t size) * passed an extra argument for the caller return address, which will be * ignored. */ -JEMALLOC_ATTR(visibility("default")) -void (* const __free_hook)(void *ptr) = je_free; - -JEMALLOC_ATTR(visibility("default")) -void *(* const __malloc_hook)(size_t size) = je_malloc; - -JEMALLOC_ATTR(visibility("default")) -void *(* const __realloc_hook)(void *ptr, size_t size) = je_realloc; - -JEMALLOC_ATTR(visibility("default")) -void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign; +JEMALLOC_EXPORT void (* const __free_hook)(void *ptr) = je_free; +JEMALLOC_EXPORT void *(* const __malloc_hook)(size_t size) = je_malloc; +JEMALLOC_EXPORT void *(* const __realloc_hook)(void *ptr, size_t size) = + je_realloc; +JEMALLOC_EXPORT void *(* const __memalign_hook)(size_t alignment, size_t size) = + je_memalign; #endif /* @@ -1273,7 +1283,6 @@ void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign; * Begin non-standard functions. */ -JEMALLOC_ATTR(visibility("default")) size_t je_malloc_usable_size(const void *ptr) { @@ -1289,7 +1298,6 @@ je_malloc_usable_size(const void *ptr) return (ret); } -JEMALLOC_ATTR(visibility("default")) void je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) @@ -1298,7 +1306,6 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, stats_print(write_cb, cbopaque, opts); } -JEMALLOC_ATTR(visibility("default")) int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1310,7 +1317,6 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, return (ctl_byname(name, oldp, oldlenp, newp, newlen)); } -JEMALLOC_ATTR(visibility("default")) int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { @@ -1321,7 +1327,6 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) return (ctl_nametomib(name, mibp, miblenp)); } -JEMALLOC_ATTR(visibility("default")) int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1357,8 +1362,6 @@ iallocm(size_t usize, size_t alignment, bool zero) return (imalloc(usize)); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) { @@ -1367,7 +1370,6 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; - prof_thr_cnt_t *cnt; assert(ptr != NULL); assert(size != 0); @@ -1380,6 +1382,8 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) goto label_oom; if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) goto label_oom; @@ -1426,8 +1430,6 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) return (ALLOCM_ERR_OOM); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) { @@ -1439,7 +1441,6 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; - prof_thr_cnt_t *cnt; assert(ptr != NULL); assert(*ptr != NULL); @@ -1449,6 +1450,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) p = *ptr; if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and @@ -1536,8 +1539,6 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) return (ALLOCM_ERR_OOM); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_sallocm(const void *ptr, size_t *rsize, int flags) { @@ -1557,8 +1558,6 @@ je_sallocm(const void *ptr, size_t *rsize, int flags) return (ALLOCM_SUCCESS); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_dallocm(void *ptr, int flags) { @@ -1586,7 +1585,6 @@ je_dallocm(void *ptr, int flags) return (ALLOCM_SUCCESS); } -JEMALLOC_ATTR(visibility("default")) int je_nallocm(size_t *rsize, size_t size, int flags) { @@ -1622,8 +1620,7 @@ je_nallocm(size_t *rsize, size_t size, int flags) void jemalloc_prefork(void) #else -JEMALLOC_ATTR(visibility("default")) -void +JEMALLOC_EXPORT void _malloc_prefork(void) #endif { @@ -1644,8 +1641,7 @@ _malloc_prefork(void) void jemalloc_postfork_parent(void) #else -JEMALLOC_ATTR(visibility("default")) -void +JEMALLOC_EXPORT void _malloc_postfork(void) #endif { diff --git a/contrib/jemalloc/src/mutex.c b/contrib/jemalloc/src/mutex.c index 7be5fc9ef10e..4a90a05e984e 100644 --- a/contrib/jemalloc/src/mutex.c +++ b/contrib/jemalloc/src/mutex.c @@ -1,10 +1,14 @@ #define JEMALLOC_MUTEX_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_LAZY_LOCK +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) #include #endif +#ifndef _CRT_SPINCOUNT +#define _CRT_SPINCOUNT 4000 +#endif + /******************************************************************************/ /* Data. */ @@ -16,7 +20,7 @@ static bool postpone_init = true; static malloc_mutex_t *postponed_mutexes = NULL; #endif -#ifdef JEMALLOC_LAZY_LOCK +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) static void pthread_create_once(void); #endif @@ -26,7 +30,7 @@ static void pthread_create_once(void); * process goes multi-threaded. */ -#ifdef JEMALLOC_LAZY_LOCK +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, void *(*)(void *), void *__restrict); @@ -44,8 +48,7 @@ pthread_create_once(void) isthreaded = true; } -JEMALLOC_ATTR(visibility("default")) -int +JEMALLOC_EXPORT int pthread_create(pthread_t *__restrict thread, const pthread_attr_t *__restrict attr, void *(*start_routine)(void *), void *__restrict arg) @@ -79,7 +82,12 @@ _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex, bool malloc_mutex_init(malloc_mutex_t *mutex) { -#ifdef JEMALLOC_OSSPIN + +#ifdef _WIN32 + if (!InitializeCriticalSectionAndSpinCount(&mutex->lock, + _CRT_SPINCOUNT)) + return (true); +#elif (defined(JEMALLOC_OSSPIN)) mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) if (postpone_init) { @@ -101,7 +109,6 @@ malloc_mutex_init(malloc_mutex_t *mutex) return (true); } pthread_mutexattr_destroy(&attr); - #endif return (false); } diff --git a/contrib/jemalloc/src/prof.c b/contrib/jemalloc/src/prof.c index 227560b8fce5..de1d392993e7 100644 --- a/contrib/jemalloc/src/prof.c +++ b/contrib/jemalloc/src/prof.c @@ -64,11 +64,6 @@ static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ static bool prof_booted = false; -static malloc_mutex_t enq_mtx; -static bool enq; -static bool enq_idump; -static bool enq_gdump; - /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -148,20 +143,19 @@ bt_dup(prof_bt_t *bt) } static inline void -prof_enter(void) +prof_enter(prof_tdata_t *prof_tdata) { cassert(config_prof); - malloc_mutex_lock(&enq_mtx); - enq = true; - malloc_mutex_unlock(&enq_mtx); + assert(prof_tdata->enq == false); + prof_tdata->enq = true; malloc_mutex_lock(&bt2ctx_mtx); } static inline void -prof_leave(void) +prof_leave(prof_tdata_t *prof_tdata) { bool idump, gdump; @@ -169,13 +163,12 @@ prof_leave(void) malloc_mutex_unlock(&bt2ctx_mtx); - malloc_mutex_lock(&enq_mtx); - enq = false; - idump = enq_idump; - enq_idump = false; - gdump = enq_gdump; - enq_gdump = false; - malloc_mutex_unlock(&enq_mtx); + assert(prof_tdata->enq); + prof_tdata->enq = false; + idump = prof_tdata->enq_idump; + prof_tdata->enq_idump = false; + gdump = prof_tdata->enq_gdump; + prof_tdata->enq_gdump = false; if (idump) prof_idump(); @@ -446,12 +439,9 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); - prof_tdata = *prof_tdata_tsd_get(); - if (prof_tdata == NULL) { - prof_tdata = prof_tdata_init(); - if (prof_tdata == NULL) - return (NULL); - } + prof_tdata = prof_tdata_get(); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (NULL); if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { union { @@ -468,52 +458,48 @@ prof_lookup(prof_bt_t *bt) * This thread's cache lacks bt. Look for it in the global * cache. */ - prof_enter(); + prof_enter(prof_tdata); if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { /* bt has never been seen before. Insert it. */ ctx.v = imalloc(sizeof(prof_ctx_t)); if (ctx.v == NULL) { - prof_leave(); + prof_leave(prof_tdata); return (NULL); } btkey.p = bt_dup(bt); if (btkey.v == NULL) { - prof_leave(); + prof_leave(prof_tdata); idalloc(ctx.v); return (NULL); } ctx.p->bt = btkey.p; ctx.p->lock = prof_ctx_mutex_choose(); + /* + * Set nlimbo to 1, in order to avoid a race condition + * with prof_ctx_merge()/prof_ctx_destroy(). + */ + ctx.p->nlimbo = 1; memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); ql_new(&ctx.p->cnts_ql); if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ - prof_leave(); + prof_leave(prof_tdata); idalloc(btkey.v); idalloc(ctx.v); return (NULL); } - /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). - * - * No locking is necessary for ctx here because no other - * threads have had the opportunity to fetch it from - * bt2ctx yet. - */ - ctx.p->cnt_merged.curobjs++; new_ctx = true; } else { /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). + * Increment nlimbo, in order to avoid a race condition + * with prof_ctx_merge()/prof_ctx_destroy(). */ malloc_mutex_lock(ctx.p->lock); - ctx.p->cnt_merged.curobjs++; + ctx.p->nlimbo++; malloc_mutex_unlock(ctx.p->lock); new_ctx = false; } - prof_leave(); + prof_leave(prof_tdata); /* Link a prof_thd_cnt_t into ctx for this thread. */ if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { @@ -555,7 +541,7 @@ prof_lookup(prof_bt_t *bt) ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); malloc_mutex_lock(ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - ctx.p->cnt_merged.curobjs--; + ctx.p->nlimbo--; malloc_mutex_unlock(ctx.p->lock); } else { /* Move ret to the front of the LRU. */ @@ -688,26 +674,30 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) static void prof_ctx_destroy(prof_ctx_t *ctx) { + prof_tdata_t *prof_tdata; cassert(config_prof); /* * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in - * order to avoid a race condition with this function, as does - * prof_ctx_merge() in order to avoid a race between the main body of - * prof_ctx_merge() and entry into this function. + * it. prof_lookup() increments ctx->nlimbo in order to avoid a race + * condition with this function, as does prof_ctx_merge() in order to + * avoid a race between the main body of prof_ctx_merge() and entry + * into this function. */ - prof_enter(); + prof_tdata = *prof_tdata_tsd_get(); + assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); + prof_enter(prof_tdata); malloc_mutex_lock(ctx->lock); - if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && + ctx->nlimbo == 1) { assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) assert(false); - prof_leave(); + prof_leave(prof_tdata); /* Destroy ctx. */ malloc_mutex_unlock(ctx->lock); bt_destroy(ctx->bt); @@ -717,9 +707,9 @@ prof_ctx_destroy(prof_ctx_t *ctx) * Compensate for increment in prof_ctx_merge() or * prof_lookup(). */ - ctx->cnt_merged.curobjs--; + ctx->nlimbo--; malloc_mutex_unlock(ctx->lock); - prof_leave(); + prof_leave(prof_tdata); } } @@ -738,12 +728,12 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; ql_remove(&ctx->cnts_ql, cnt, cnts_link); if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && - ctx->cnt_merged.curobjs == 0) { + ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { /* - * Artificially raise ctx->cnt_merged.curobjs in order to keep - * another thread from winning the race to destroy ctx while - * this one has ctx->lock dropped. Without this, it would be - * possible for another thread to: + * Increment ctx->nlimbo in order to keep another thread from + * winning the race to destroy ctx while this one has ctx->lock + * dropped. Without this, it would be possible for another + * thread to: * * 1) Sample an allocation associated with ctx. * 2) Deallocate the sampled object. @@ -752,7 +742,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) * The result would be that ctx no longer exists by the time * this thread accesses it in prof_ctx_destroy(). */ - ctx->cnt_merged.curobjs++; + ctx->nlimbo++; destroy = true; } else destroy = false; @@ -768,7 +758,16 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) cassert(config_prof); - if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { + /* + * Current statistics can sum to 0 as a result of unmerged per thread + * statistics. Additionally, interval- and growth-triggered dumps can + * occur between the time a ctx is created and when its statistics are + * filled in. Avoid dumping any ctx that is an artifact of either + * implementation detail. + */ + if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || + (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { + assert(ctx->cnt_summed.curobjs == 0); assert(ctx->cnt_summed.curbytes == 0); assert(ctx->cnt_summed.accumobjs == 0); assert(ctx->cnt_summed.accumbytes == 0); @@ -831,6 +830,7 @@ prof_dump_maps(bool propagate_err) static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck) { + prof_tdata_t *prof_tdata; prof_cnt_t cnt_all; size_t tabind; union { @@ -845,7 +845,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cassert(config_prof); - prof_enter(); + prof_tdata = prof_tdata_get(); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (true); + prof_enter(prof_tdata); prof_dump_fd = creat(filename, 0644); if (prof_dump_fd == -1) { if (propagate_err == false) { @@ -896,7 +899,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (prof_flush(propagate_err)) goto label_error; close(prof_dump_fd); - prof_leave(); + prof_leave(prof_tdata); if (leakcheck && cnt_all.curbytes != 0) { malloc_printf(": Leak summary: %"PRId64" byte%s, %" @@ -911,7 +914,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) return (false); label_error: - prof_leave(); + prof_leave(prof_tdata); return (true); } @@ -933,6 +936,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) "%s.%d.%"PRIu64".%c.heap", opt_prof_prefix, (int)getpid(), prof_dump_seq, v); } + prof_dump_seq++; } static void @@ -956,19 +960,24 @@ prof_fdump(void) void prof_idump(void) { + prof_tdata_t *prof_tdata; char filename[PATH_MAX + 1]; cassert(config_prof); if (prof_booted == false) return; - malloc_mutex_lock(&enq_mtx); - if (enq) { - enq_idump = true; - malloc_mutex_unlock(&enq_mtx); + /* + * Don't call prof_tdata_get() here, because it could cause recursive + * allocation. + */ + prof_tdata = *prof_tdata_tsd_get(); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return; + if (prof_tdata->enq) { + prof_tdata->enq_idump = true; return; } - malloc_mutex_unlock(&enq_mtx); if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); @@ -1005,19 +1014,24 @@ prof_mdump(const char *filename) void prof_gdump(void) { + prof_tdata_t *prof_tdata; char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (prof_booted == false) return; - malloc_mutex_lock(&enq_mtx); - if (enq) { - enq_gdump = true; - malloc_mutex_unlock(&enq_mtx); + /* + * Don't call prof_tdata_get() here, because it could cause recursive + * allocation. + */ + prof_tdata = *prof_tdata_tsd_get(); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return; + if (prof_tdata->enq) { + prof_tdata->enq_gdump = true; return; } - malloc_mutex_unlock(&enq_mtx); if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); @@ -1110,6 +1124,10 @@ prof_tdata_init(void) prof_tdata->threshold = 0; prof_tdata->accum = 0; + prof_tdata->enq = false; + prof_tdata->enq_idump = false; + prof_tdata->enq_gdump = false; + prof_tdata_tsd_set(&prof_tdata); return (prof_tdata); @@ -1123,24 +1141,41 @@ prof_tdata_cleanup(void *arg) cassert(config_prof); - /* - * Delete the hash table. All of its contents can still be iterated - * over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); - - /* Iteratively merge cnt's into the global stats and delete them. */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - prof_ctx_merge(cnt->ctx, cnt); - idalloc(cnt); + if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { + /* + * Another destructor deallocated memory after this destructor + * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY + * in order to receive another callback. + */ + prof_tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&prof_tdata); + } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to PROF_TDATA_STATE_PURGATORY so that other destructors + * wouldn't cause re-creation of the prof_tdata. This time, do + * nothing, so that the destructor will not be called again. + */ + } else if (prof_tdata != NULL) { + /* + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + prof_ctx_merge(cnt->ctx, cnt); + idalloc(cnt); + } + idalloc(prof_tdata->vec); + idalloc(prof_tdata); + prof_tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&prof_tdata); } - - idalloc(prof_tdata->vec); - - idalloc(prof_tdata); - prof_tdata = NULL; - prof_tdata_tsd_set(&prof_tdata); } void @@ -1206,12 +1241,6 @@ prof_boot2(void) if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); - if (malloc_mutex_init(&enq_mtx)) - return (true); - enq = false; - enq_idump = false; - enq_gdump = false; - if (atexit(prof_fdump) != 0) { malloc_write(": Error in atexit()\n"); if (opt_abort) diff --git a/contrib/jemalloc/src/quarantine.c b/contrib/jemalloc/src/quarantine.c index 5fb6c390c41d..9005ab3ba067 100644 --- a/contrib/jemalloc/src/quarantine.c +++ b/contrib/jemalloc/src/quarantine.c @@ -1,17 +1,31 @@ #include "jemalloc/internal/jemalloc_internal.h" +/* + * quarantine pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define QUARANTINE_STATE_REINCARNATED ((quarantine_t *)(uintptr_t)1) +#define QUARANTINE_STATE_PURGATORY ((quarantine_t *)(uintptr_t)2) +#define QUARANTINE_STATE_MAX QUARANTINE_STATE_PURGATORY + /******************************************************************************/ /* Data. */ +typedef struct quarantine_obj_s quarantine_obj_t; typedef struct quarantine_s quarantine_t; +struct quarantine_obj_s { + void *ptr; + size_t usize; +}; + struct quarantine_s { - size_t curbytes; - size_t curobjs; - size_t first; + size_t curbytes; + size_t curobjs; + size_t first; #define LG_MAXOBJS_INIT 10 - size_t lg_maxobjs; - void *objs[1]; /* Dynamically sized ring buffer. */ + size_t lg_maxobjs; + quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ }; static void quarantine_cleanup(void *arg); @@ -35,7 +49,7 @@ quarantine_init(size_t lg_maxobjs) quarantine_t *quarantine; quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + - ((ZU(1) << lg_maxobjs) * sizeof(void *))); + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t))); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -58,23 +72,22 @@ quarantine_grow(quarantine_t *quarantine) return (quarantine); ret->curbytes = quarantine->curbytes; - if (quarantine->first + quarantine->curobjs < (ZU(1) << + ret->curobjs = quarantine->curobjs; + if (quarantine->first + quarantine->curobjs <= (ZU(1) << quarantine->lg_maxobjs)) { /* objs ring buffer data are contiguous. */ memcpy(ret->objs, &quarantine->objs[quarantine->first], - quarantine->curobjs * sizeof(void *)); - ret->curobjs = quarantine->curobjs; + quarantine->curobjs * sizeof(quarantine_obj_t)); } else { /* objs ring buffer data wrap around. */ - size_t ncopy = (ZU(1) << quarantine->lg_maxobjs) - + size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) - quarantine->first; - memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy * - sizeof(void *)); - ret->curobjs = ncopy; - if (quarantine->curobjs != 0) { - memcpy(&ret->objs[ret->curobjs], quarantine->objs, - quarantine->curobjs - ncopy); - } + size_t ncopy_b = quarantine->curobjs - ncopy_a; + + memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a + * sizeof(quarantine_obj_t)); + memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * + sizeof(quarantine_obj_t)); } return (ret); @@ -85,10 +98,10 @@ quarantine_drain(quarantine_t *quarantine, size_t upper_bound) { while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) { - void *ptr = quarantine->objs[quarantine->first]; - size_t usize = isalloc(ptr, config_prof); - idalloc(ptr); - quarantine->curbytes -= usize; + quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; + assert(obj->usize == isalloc(obj->ptr, config_prof)); + idalloc(obj->ptr); + quarantine->curbytes -= obj->usize; quarantine->curobjs--; quarantine->first = (quarantine->first + 1) & ((ZU(1) << quarantine->lg_maxobjs) - 1); @@ -105,10 +118,25 @@ quarantine(void *ptr) assert(opt_quarantine); quarantine = *quarantine_tsd_get(); - if (quarantine == NULL && (quarantine = - quarantine_init(LG_MAXOBJS_INIT)) == NULL) { - idalloc(ptr); - return; + if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) { + if (quarantine == NULL) { + if ((quarantine = quarantine_init(LG_MAXOBJS_INIT)) == + NULL) { + idalloc(ptr); + return; + } + } else { + if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * Make a note that quarantine() was called + * after quarantine_cleanup() was called. + */ + quarantine = QUARANTINE_STATE_REINCARNATED; + quarantine_tsd_set(&quarantine); + } + idalloc(ptr); + return; + } } /* * Drain one or more objects if the quarantine size limit would be @@ -128,7 +156,9 @@ quarantine(void *ptr) if (quarantine->curbytes + usize <= opt_quarantine) { size_t offset = (quarantine->first + quarantine->curobjs) & ((ZU(1) << quarantine->lg_maxobjs) - 1); - quarantine->objs[offset] = ptr; + quarantine_obj_t *obj = &quarantine->objs[offset]; + obj->ptr = ptr; + obj->usize = usize; quarantine->curbytes += usize; quarantine->curobjs++; if (opt_junk) @@ -144,9 +174,26 @@ quarantine_cleanup(void *arg) { quarantine_t *quarantine = *(quarantine_t **)arg; - if (quarantine != NULL) { + if (quarantine == QUARANTINE_STATE_REINCARNATED) { + /* + * Another destructor deallocated memory after this destructor + * was called. Reset quarantine to QUARANTINE_STATE_PURGATORY + * in order to receive another callback. + */ + quarantine = QUARANTINE_STATE_PURGATORY; + quarantine_tsd_set(&quarantine); + } else if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to QUARANTINE_STATE_PURGATORY so that other destructors + * wouldn't cause re-creation of the quarantine. This time, do + * nothing, so that the destructor will not be called again. + */ + } else if (quarantine != NULL) { quarantine_drain(quarantine, 0); idalloc(quarantine); + quarantine = QUARANTINE_STATE_PURGATORY; + quarantine_tsd_set(&quarantine); } } diff --git a/contrib/jemalloc/src/stats.c b/contrib/jemalloc/src/stats.c index 08f7098caad3..433b80d128d9 100644 --- a/contrib/jemalloc/src/stats.c +++ b/contrib/jemalloc/src/stats.c @@ -295,16 +295,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, abort(); } - if (write_cb == NULL) { - /* - * The caller did not provide an alternate write_cb callback - * function, so use the default one. malloc_write() is an - * inline function, so use malloc_message() directly here. - */ - write_cb = je_malloc_message; - cbopaque = NULL; - } - if (opts != NULL) { unsigned i; @@ -330,7 +320,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - write_cb(cbopaque, "___ Begin jemalloc statistics ___\n"); + malloc_cprintf(write_cb, cbopaque, + "___ Begin jemalloc statistics ___\n"); if (general) { int err; const char *cpv; @@ -375,7 +366,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, " opt."#n": \"%s\"\n", cpv); \ } - write_cb(cbopaque, "Run-time option settings:\n"); + malloc_cprintf(write_cb, cbopaque, + "Run-time option settings:\n"); OPT_WRITE_BOOL(abort) OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_SIZE_T(narenas) @@ -425,7 +417,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, "Min active:dirty page ratio per arena: %u:1\n", (1U << ssv)); } else { - write_cb(cbopaque, + malloc_cprintf(write_cb, cbopaque, "Min active:dirty page ratio per arena: N/A\n"); } if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) @@ -447,7 +439,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, " (2^%zd)\n", (((uint64_t)1U) << ssv), ssv); } else { - write_cb(cbopaque, + malloc_cprintf(write_cb, cbopaque, "Average profile dump interval: N/A\n"); } } @@ -498,11 +490,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.narenas", &narenas, unsigned); { - bool initialized[narenas]; + VARIABLE_ARRAY(bool, initialized, narenas); size_t isz; unsigned i, ninitialized; - isz = sizeof(initialized); + isz = sizeof(bool) * narenas; xmallctl("arenas.initialized", initialized, &isz, NULL, 0); for (i = ninitialized = 0; i < narenas; i++) { @@ -527,11 +519,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.narenas", &narenas, unsigned); { - bool initialized[narenas]; + VARIABLE_ARRAY(bool, initialized, narenas); size_t isz; unsigned i; - isz = sizeof(initialized); + isz = sizeof(bool) * narenas; xmallctl("arenas.initialized", initialized, &isz, NULL, 0); @@ -547,5 +539,5 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } } - write_cb(cbopaque, "--- End jemalloc statistics ---\n"); + malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n"); } diff --git a/contrib/jemalloc/src/tcache.c b/contrib/jemalloc/src/tcache.c index 9c4970c52389..60244c45f8ba 100644 --- a/contrib/jemalloc/src/tcache.c +++ b/contrib/jemalloc/src/tcache.c @@ -24,6 +24,46 @@ size_t tcache_salloc(const void *ptr) return (arena_salloc(ptr, false)); } +void +tcache_event_hard(tcache_t *tcache) +{ + size_t binind = tcache->next_gc_bin; + tcache_bin_t *tbin = &tcache->tbins[binind]; + tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; + + if (tbin->low_water > 0) { + /* + * Flush (ceiling) 3/4 of the objects below the low water mark. + */ + if (binind < NBINS) { + tcache_bin_flush_small(tbin, binind, tbin->ncached - + tbin->low_water + (tbin->low_water >> 2), tcache); + } else { + tcache_bin_flush_large(tbin, binind, tbin->ncached - + tbin->low_water + (tbin->low_water >> 2), tcache); + } + /* + * Reduce fill count by 2X. Limit lg_fill_div such that the + * fill count is always at least 1. + */ + if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1) + tbin->lg_fill_div++; + } else if (tbin->low_water < 0) { + /* + * Increase fill count by 2X. Make sure lg_fill_div stays + * greater than 0. + */ + if (tbin->lg_fill_div > 1) + tbin->lg_fill_div--; + } + tbin->low_water = tbin->ncached; + + tcache->next_gc_bin++; + if (tcache->next_gc_bin == nhbins) + tcache->next_gc_bin = 0; + tcache->ev_cnt = 0; +} + void * tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) { @@ -80,12 +120,13 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = - &chunk->map[pageind-map_bias]; + arena_mapp_get(chunk, pageind); if (config_fill && opt_junk) { arena_alloc_junk_small(ptr, &arena_bin_info[binind], true); } - arena_dalloc_bin(arena, chunk, ptr, mapelm); + arena_dalloc_bin_locked(arena, chunk, ptr, + mapelm); } else { /* * This object was allocated via a different @@ -158,7 +199,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) - arena_dalloc_large(arena, chunk, ptr); + arena_dalloc_large_locked(arena, chunk, ptr); else { /* * This object was allocated via a different @@ -314,22 +355,14 @@ tcache_destroy(tcache_t *tcache) arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << - LG_PAGE)); - arena_bin_t *bin = run->bin; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin(arena, chunk, tcache, mapelm); - malloc_mutex_unlock(&bin->lock); + arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm); } else if (tcache_size <= tcache_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; - malloc_mutex_lock(&arena->lock); arena_dalloc_large(arena, chunk, tcache); - malloc_mutex_unlock(&arena->lock); } else idalloc(tcache); } diff --git a/contrib/jemalloc/src/tsd.c b/contrib/jemalloc/src/tsd.c index 281a2e9be70e..961a546329c1 100644 --- a/contrib/jemalloc/src/tsd.c +++ b/contrib/jemalloc/src/tsd.c @@ -14,7 +14,7 @@ malloc_tsd_malloc(size_t size) { /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return arena_malloc(arenas[0], size, false, false); + return (arena_malloc(arenas[0], size, false, false)); } void @@ -31,12 +31,14 @@ malloc_tsd_no_cleanup(void *arg) not_reached(); } -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -JEMALLOC_ATTR(visibility("default")) +#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) +#ifndef _WIN32 +JEMALLOC_EXPORT +#endif void _malloc_thread_cleanup(void) { - bool pending[ncleanups], again; + bool pending[MALLOC_TSD_CLEANUPS_MAX], again; unsigned i; for (i = 0; i < ncleanups; i++) @@ -70,3 +72,36 @@ malloc_tsd_boot(void) ncleanups = 0; } + +#ifdef _WIN32 +static BOOL WINAPI +_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) +{ + + switch (fdwReason) { +#ifdef JEMALLOC_LAZY_LOCK + case DLL_THREAD_ATTACH: + isthreaded = true; + break; +#endif + case DLL_THREAD_DETACH: + _malloc_thread_cleanup(); + break; + default: + break; + } + return (true); +} + +#ifdef _MSC_VER +# ifdef _M_IX86 +# pragma comment(linker, "/INCLUDE:__tls_used") +# else +# pragma comment(linker, "/INCLUDE:_tls_used") +# endif +# pragma section(".CRT$XLY",long,read) +#endif +JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) +static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, + DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; +#endif diff --git a/contrib/jemalloc/src/util.c b/contrib/jemalloc/src/util.c index b80676c3c40a..f94799f380bd 100644 --- a/contrib/jemalloc/src/util.c +++ b/contrib/jemalloc/src/util.c @@ -40,8 +40,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, /******************************************************************************/ /* malloc_message() setup. */ -JEMALLOC_CATTR(visibility("hidden"), static) -void +static void wrtmessage(void *cbopaque, const char *s) { @@ -57,10 +56,9 @@ wrtmessage(void *cbopaque, const char *s) #endif } -void (*je_malloc_message)(void *, const char *s) - JEMALLOC_ATTR(visibility("default")) = wrtmessage; +JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); -JEMALLOC_CATTR(visibility("hidden"), static) +JEMALLOC_ATTR(visibility("hidden")) void wrtmessage_1_0(const char *s1, const char *s2, const char *s3, const char *s4) @@ -76,14 +74,33 @@ void (*__malloc_message_1_0)(const char *s1, const char *s2, const char *s3, const char *s4) = wrtmessage_1_0; __sym_compat(_malloc_message, __malloc_message_1_0, FBSD_1.0); +/* + * Wrapper around malloc_message() that avoids the need for + * je_malloc_message(...) throughout the code. + */ +void +malloc_write(const char *s) +{ + + if (je_malloc_message != NULL) + je_malloc_message(NULL, s); + else + wrtmessage(NULL, s); +} + /* * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so * provide a wrapper. */ int -buferror(int errnum, char *buf, size_t buflen) +buferror(char *buf, size_t buflen) { -#ifdef _GNU_SOURCE + +#ifdef _WIN32 + FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0, + (LPSTR)buf, buflen, NULL); + return (0); +#elif defined(_GNU_SOURCE) char *b = strerror_r(errno, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); @@ -104,7 +121,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) const char *p, *ns; if (base < 0 || base == 1 || base > 36) { - errno = EINVAL; + set_errno(EINVAL); return (UINTMAX_MAX); } b = base; @@ -179,7 +196,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) ret += digit; if (ret < pret) { /* Overflow. */ - errno = ERANGE; + set_errno(ERANGE); return (UINTMAX_MAX); } p++; @@ -299,7 +316,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) int ret; size_t i; const char *f; - va_list tap; #define APPEND_C(c) do { \ if (i < size) \ @@ -370,9 +386,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) } \ } while (0) - if (config_debug) - va_copy(tap, ap); - i = 0; f = format; while (true) { @@ -431,9 +444,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { uintmax_t uwidth; - errno = 0; + set_errno(0); uwidth = malloc_strtoumax(f, (char **)&f, 10); - assert(uwidth != UINTMAX_MAX || errno != + assert(uwidth != UINTMAX_MAX || get_errno() != ERANGE); width = (int)uwidth; if (*f == '.') { @@ -457,9 +470,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { uintmax_t uprec; - errno = 0; + set_errno(0); uprec = malloc_strtoumax(f, (char **)&f, 10); - assert(uprec != UINTMAX_MAX || errno != ERANGE); + assert(uprec != UINTMAX_MAX || get_errno() != + ERANGE); prec = (int)uprec; break; } @@ -610,7 +624,8 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, * function, so use the default one. malloc_write() is an * inline function, so use malloc_message() directly here. */ - write_cb = je_malloc_message; + write_cb = (je_malloc_message != NULL) ? je_malloc_message : + wrtmessage; cbopaque = NULL; }