From 8244f2aa7cb3ce15f2d5ce7f860913d2e7cc784d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 2 Mar 2017 01:14:48 +0000 Subject: [PATCH] Update jemalloc to 4.5.0. --- contrib/jemalloc/ChangeLog | 35 ++++ contrib/jemalloc/FREEBSD-Xlist | 1 + contrib/jemalloc/FREEBSD-diffs | 26 +-- contrib/jemalloc/VERSION | 2 +- contrib/jemalloc/doc/jemalloc.3 | 19 ++- .../include/jemalloc/internal/arena.h | 1 + .../include/jemalloc/internal/chunk.h | 4 +- .../include/jemalloc/internal/extent.h | 5 + .../jemalloc/internal/jemalloc_internal.h | 8 +- .../internal/jemalloc_internal_defs.h | 12 +- .../jemalloc/include/jemalloc/internal/mb.h | 2 +- .../include/jemalloc/internal/mutex.h | 12 +- .../jemalloc/internal/private_namespace.h | 10 +- .../include/jemalloc/internal/tcache.h | 3 + .../jemalloc/include/jemalloc/internal/tsd.h | 7 +- .../include/jemalloc/internal/witness.h | 72 ++++++-- contrib/jemalloc/include/jemalloc/jemalloc.h | 6 +- contrib/jemalloc/src/arena.c | 157 ++++++++++++++---- contrib/jemalloc/src/chunk.c | 14 +- contrib/jemalloc/src/chunk_dss.c | 45 +++-- contrib/jemalloc/src/ctl.c | 10 +- contrib/jemalloc/src/extent.c | 37 ++++- contrib/jemalloc/src/huge.c | 45 +++-- contrib/jemalloc/src/jemalloc.c | 43 +++-- contrib/jemalloc/src/pages.c | 4 +- contrib/jemalloc/src/stats.c | 97 ++++++----- contrib/jemalloc/src/tcache.c | 120 +++++++++---- contrib/jemalloc/src/witness.c | 18 +- 28 files changed, 580 insertions(+), 235 deletions(-) diff --git a/contrib/jemalloc/ChangeLog b/contrib/jemalloc/ChangeLog index f75edd933ad3..a9406853e1bf 100644 --- a/contrib/jemalloc/ChangeLog +++ b/contrib/jemalloc/ChangeLog @@ -4,6 +4,41 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.5.0 (February 28, 2017) + + This is the first release to benefit from much broader continuous integration + testing, thanks to @davidtgoldblatt. Had we had this testing infrastructure + in place for prior releases, it would have caught all of the most serious + regressions fixed by this release. + + New features: + - Add --disable-thp and the opt.thp to provide opt-out mechanisms for + transparent huge page integration. (@jasone) + - Update zone allocator integration to work with macOS 10.12. (@glandium) + - Restructure *CFLAGS configuration, so that CFLAGS behaves typically, and + EXTRA_CFLAGS provides a way to specify e.g. -Werror during building, but not + during configuration. (@jasone, @ronawho) + + Bug fixes: + - Fix DSS (sbrk(2)-based) allocation. This regression was first released in + 4.3.0. (@jasone) + - Handle race in per size class utilization computation. This functionality + was first released in 4.0.0. (@interwq) + - Fix lock order reversal during gdump. (@jasone) + - Fix-refactor tcache synchronization. This regression was first released in + 4.0.0. (@jasone) + - Fix various JSON-formatted malloc_stats_print() bugs. This functionality + was first released in 4.3.0. (@jasone) + - Fix huge-aligned allocation. This regression was first released in 4.4.0. + (@jasone) + - When transparent huge page integration is enabled, detect what state pages + start in according to the kernel's current operating mode, and only convert + arena chunks to non-huge during purging if that is not their initial state. + This functionality was first released in 4.4.0. (@jasone) + - Fix lg_chunk clamping for the --enable-cache-oblivious --disable-fill case. + This regression was first released in 4.0.0. (@jasone, @428desmo) + - Properly detect sparc64 when building for Linux. (@glaubitz) + * 4.4.0 (December 3, 2016) New features: diff --git a/contrib/jemalloc/FREEBSD-Xlist b/contrib/jemalloc/FREEBSD-Xlist index 5d1faad1a956..a1339c9d66bf 100644 --- a/contrib/jemalloc/FREEBSD-Xlist +++ b/contrib/jemalloc/FREEBSD-Xlist @@ -49,6 +49,7 @@ include/msvc_compat/ install-sh jemalloc.pc* msvc/ +scripts/ src/valgrind.c src/zone.c test/ diff --git a/contrib/jemalloc/FREEBSD-diffs b/contrib/jemalloc/FREEBSD-diffs index fce089d83d65..730a6f5f63bf 100644 --- a/contrib/jemalloc/FREEBSD-diffs +++ b/contrib/jemalloc/FREEBSD-diffs @@ -1,5 +1,5 @@ diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in -index d9c8345..9898c3c 100644 +index c97ab0f..be8dda5 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -53,11 +53,23 @@ @@ -27,7 +27,7 @@ index d9c8345..9898c3c 100644 Standard API -@@ -2963,4 +2975,18 @@ malloc_conf = "lg_chunk:24";]]> +@@ -2989,4 +3001,18 @@ malloc_conf = "lg_chunk:24";]]> The posix_memalign() function conforms to IEEE Std 1003.1-2001 (POSIX.1). @@ -47,10 +47,10 @@ index d9c8345..9898c3c 100644 + diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h -index ce4e602..35360b6 100644 +index 119e3a5..277989f 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h -@@ -730,8 +730,13 @@ arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind) +@@ -731,8 +731,13 @@ arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind) JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t * arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind) { @@ -64,7 +64,7 @@ index ce4e602..35360b6 100644 } JEMALLOC_ALWAYS_INLINE size_t -@@ -790,8 +795,13 @@ arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind) +@@ -791,8 +796,13 @@ arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind) JEMALLOC_ALWAYS_INLINE const size_t * arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind) { @@ -79,7 +79,7 @@ index ce4e602..35360b6 100644 JEMALLOC_ALWAYS_INLINE size_t diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in -index e7ace7d..d86c61d 100644 +index e3b499a..827fdbf 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -8,6 +8,9 @@ @@ -122,7 +122,7 @@ index c907d91..4626632 100644 #ifdef _WIN32 # include diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h -index b442d2d..76518db 100644 +index 2b4b1c3..e03a6d0 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -57,9 +57,6 @@ struct malloc_mutex_s { @@ -144,10 +144,10 @@ index b442d2d..76518db 100644 #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt -index c1c6c40..c6395fd 100644 +index 60b57e5..056a8fe 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt -@@ -310,7 +310,6 @@ iralloct_realign +@@ -312,7 +312,6 @@ iralloct_realign isalloc isdalloct isqalloc @@ -335,7 +335,7 @@ index f943891..47d032c 100755 +#include "jemalloc_FreeBSD.h" EOF diff --git a/src/jemalloc.c b/src/jemalloc.c -index baead66..8a49f26 100644 +index f73a26c..fcfe204 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -4,6 +4,10 @@ @@ -349,7 +349,7 @@ index baead66..8a49f26 100644 /* Runtime configuration options. */ const char *je_malloc_conf #ifndef _WIN32 -@@ -2775,6 +2779,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) +@@ -2781,6 +2785,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) */ /******************************************************************************/ /* @@ -457,7 +457,7 @@ index baead66..8a49f26 100644 * The following functions are used by threading libraries for protection of * malloc during fork(). */ -@@ -2913,4 +3018,11 @@ jemalloc_postfork_child(void) +@@ -2922,4 +3027,11 @@ jemalloc_postfork_child(void) ctl_postfork_child(tsd_tsdn(tsd)); } @@ -516,7 +516,7 @@ index 6333e73..13f8d79 100644 +#endif +} diff --git a/src/util.c b/src/util.c -index dd8c236..a4ff287 100755 +index dd8c236..a4ff287 100644 --- a/src/util.c +++ b/src/util.c @@ -67,6 +67,22 @@ wrtmessage(void *cbopaque, const char *s) diff --git a/contrib/jemalloc/VERSION b/contrib/jemalloc/VERSION index 810bd6d4c9a7..59deb3f8aa54 100644 --- a/contrib/jemalloc/VERSION +++ b/contrib/jemalloc/VERSION @@ -1 +1 @@ -4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc +4.5.0-0-g04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5 diff --git a/contrib/jemalloc/doc/jemalloc.3 b/contrib/jemalloc/doc/jemalloc.3 index cd3f197172ef..b8fb09e878c1 100644 --- a/contrib/jemalloc/doc/jemalloc.3 +++ b/contrib/jemalloc/doc/jemalloc.3 @@ -2,12 +2,12 @@ .\" Title: JEMALLOC .\" Author: Jason Evans .\" Generator: DocBook XSL Stylesheets v1.76.1 -.\" Date: 12/04/2016 +.\" Date: 02/28/2017 .\" Manual: User Manual -.\" Source: jemalloc 4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc +.\" Source: jemalloc 4.5.0-0-g04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5 .\" Language: English .\" -.TH "JEMALLOC" "3" "12/04/2016" "jemalloc 4.4.0-0-gf1f76357313e" "User Manual" +.TH "JEMALLOC" "3" "02/28/2017" "jemalloc 4.5.0-0-g04380e79f1e2" "User Manual" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- @@ -31,7 +31,7 @@ jemalloc \- general purpose memory allocation functions .SH "LIBRARY" .PP -This manual describes jemalloc 4\&.4\&.0\-0\-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc\&. More information can be found at the +This manual describes jemalloc 4\&.5\&.0\-0\-g04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5\&. More information can be found at the \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. .PP The following configuration options are enabled in libc\*(Aqs built\-in jemalloc: @@ -779,6 +779,12 @@ config\&.tcache (\fBbool\fR) r\- was not specified during build configuration\&. .RE .PP +config\&.thp (\fBbool\fR) r\- +.RS 4 +\fB\-\-disable\-thp\fR +was not specified during build configuration, and the system supports transparent huge page manipulation\&. +.RE +.PP config\&.tls (\fBbool\fR) r\- .RS 4 \fB\-\-disable\-tls\fR @@ -965,6 +971,11 @@ option for related tuning information\&. This option is enabled by default unles \m[blue]\fBValgrind\fR\m[]\&\s-2\u[3]\d\s+2, in which case it is forcefully disabled\&. .RE .PP +opt\&.thp (\fBbool\fR) r\- [\fB\-\-enable\-thp\fR] +.RS 4 +Transparent huge page (THP) integration enabled/disabled\&. When enabled, THPs are explicitly disabled as a side effect of unused dirty page purging for chunks that back small and/or large allocations, because such chunks typically comprise active, unused dirty, and untouched clean pages\&. This option is enabled by default\&. +.RE +.PP opt\&.lg_tcache_max (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] .RS 4 Maximum size class (log base 2) to cache in the thread\-specific cache (tcache)\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&. diff --git a/contrib/jemalloc/include/jemalloc/internal/arena.h b/contrib/jemalloc/include/jemalloc/internal/arena.h index 35360b64ad11..277989f4b795 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena.h @@ -506,6 +506,7 @@ static const size_t large_pad = #endif ; +extern bool opt_thp; extern purge_mode_t opt_purge; extern const char *purge_mode_names[]; extern ssize_t opt_lg_dirty_mult; diff --git a/contrib/jemalloc/include/jemalloc/internal/chunk.h b/contrib/jemalloc/include/jemalloc/internal/chunk.h index 50b9904b04ec..55df9acc7bb4 100644 --- a/contrib/jemalloc/include/jemalloc/internal/chunk.h +++ b/contrib/jemalloc/include/jemalloc/internal/chunk.h @@ -52,8 +52,8 @@ chunk_hooks_t chunk_hooks_get(tsdn_t *tsdn, arena_t *arena); chunk_hooks_t chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks); -bool chunk_register(tsdn_t *tsdn, const void *chunk, - const extent_node_t *node); +bool chunk_register(const void *chunk, const extent_node_t *node, + bool *gdump); void chunk_deregister(const void *chunk, const extent_node_t *node); void *chunk_alloc_base(size_t size); void *chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, diff --git a/contrib/jemalloc/include/jemalloc/internal/extent.h b/contrib/jemalloc/include/jemalloc/internal/extent.h index 168ffe64389d..fc77f9f55f3c 100644 --- a/contrib/jemalloc/include/jemalloc/internal/extent.h +++ b/contrib/jemalloc/include/jemalloc/internal/extent.h @@ -75,6 +75,11 @@ typedef rb_tree(extent_node_t) extent_tree_t; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS +#ifdef JEMALLOC_JET +size_t extent_size_quantize_floor(size_t size); +#endif +size_t extent_size_quantize_ceil(size_t size); + rb_proto(, extent_tree_szsnad_, extent_tree_t, extent_node_t) rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h index bf489970ecdf..7e72c0714658 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -96,6 +96,13 @@ static const bool config_tcache = false #endif ; +static const bool config_thp = +#ifdef JEMALLOC_THP + true +#else + false +#endif + ; static const bool config_tls = #ifdef JEMALLOC_TLS true @@ -155,7 +162,6 @@ static const bool config_cache_oblivious = #include #include #include -#include #endif #include "jemalloc/internal/ph.h" diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h index 79ce117f0d9a..c6dc20aa8522 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h @@ -240,7 +240,6 @@ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ /* #undef JEMALLOC_ZONE */ -/* #undef JEMALLOC_ZONE_VERSION */ /* * Methods for determining whether the OS overcommits. @@ -254,6 +253,12 @@ /* Defined if madvise(2) is available. */ #define JEMALLOC_HAVE_MADVISE +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + /* * Methods for purging unused pages differ between operating systems. * @@ -266,10 +271,7 @@ #define JEMALLOC_PURGE_MADVISE_FREE #define JEMALLOC_PURGE_MADVISE_DONTNEED -/* - * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE - * arguments to madvise(2). - */ +/* Defined if transparent huge page support is enabled. */ /* #undef JEMALLOC_THP */ /* Define if operating system has alloca.h header. */ diff --git a/contrib/jemalloc/include/jemalloc/internal/mb.h b/contrib/jemalloc/include/jemalloc/internal/mb.h index 5384728fd50e..e58da5c32477 100644 --- a/contrib/jemalloc/include/jemalloc/internal/mb.h +++ b/contrib/jemalloc/include/jemalloc/internal/mb.h @@ -76,7 +76,7 @@ mb_write(void) : "memory" /* Clobbers. */ ); } -#elif defined(__sparc64__) +#elif defined(__sparc__) && defined(__arch64__) JEMALLOC_INLINE void mb_write(void) { diff --git a/contrib/jemalloc/include/jemalloc/internal/mutex.h b/contrib/jemalloc/include/jemalloc/internal/mutex.h index 76518db7575c..e03a6d071ea4 100644 --- a/contrib/jemalloc/include/jemalloc/internal/mutex.h +++ b/contrib/jemalloc/include/jemalloc/internal/mutex.h @@ -83,8 +83,8 @@ JEMALLOC_INLINE void malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_assert_not_owner(tsdn, &mutex->witness); if (isthreaded) { - witness_assert_not_owner(tsdn, &mutex->witness); #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 AcquireSRWLockExclusive(&mutex->lock); @@ -98,16 +98,16 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) #else pthread_mutex_lock(&mutex->lock); #endif - witness_lock(tsdn, &mutex->witness); } + witness_lock(tsdn, &mutex->witness); } JEMALLOC_INLINE void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_unlock(tsdn, &mutex->witness); if (isthreaded) { - witness_unlock(tsdn, &mutex->witness); #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 ReleaseSRWLockExclusive(&mutex->lock); @@ -128,16 +128,14 @@ JEMALLOC_INLINE void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) { - if (isthreaded) - witness_assert_owner(tsdn, &mutex->witness); + witness_assert_owner(tsdn, &mutex->witness); } JEMALLOC_INLINE void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) { - if (isthreaded) - witness_assert_not_owner(tsdn, &mutex->witness); + witness_assert_not_owner(tsdn, &mutex->witness); } #endif diff --git a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h index d68d99d8df39..80917e8434fb 100644 --- a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h +++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h @@ -223,6 +223,8 @@ #define extent_node_sn_set JEMALLOC_N(extent_node_sn_set) #define extent_node_zeroed_get JEMALLOC_N(extent_node_zeroed_get) #define extent_node_zeroed_set JEMALLOC_N(extent_node_zeroed_set) +#define extent_size_quantize_ceil JEMALLOC_N(extent_size_quantize_ceil) +#define extent_size_quantize_floor JEMALLOC_N(extent_size_quantize_floor) #define extent_tree_ad_destroy JEMALLOC_N(extent_tree_ad_destroy) #define extent_tree_ad_destroy_recurse JEMALLOC_N(extent_tree_ad_destroy_recurse) #define extent_tree_ad_empty JEMALLOC_N(extent_tree_ad_empty) @@ -389,6 +391,7 @@ #define opt_redzone JEMALLOC_N(opt_redzone) #define opt_stats_print JEMALLOC_N(opt_stats_print) #define opt_tcache JEMALLOC_N(opt_tcache) +#define opt_thp JEMALLOC_N(opt_thp) #define opt_utrace JEMALLOC_N(opt_utrace) #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) @@ -528,6 +531,9 @@ #define tcache_get JEMALLOC_N(tcache_get) #define tcache_get_hard JEMALLOC_N(tcache_get_hard) #define tcache_maxclass JEMALLOC_N(tcache_maxclass) +#define tcache_postfork_child JEMALLOC_N(tcache_postfork_child) +#define tcache_postfork_parent JEMALLOC_N(tcache_postfork_parent) +#define tcache_prefork JEMALLOC_N(tcache_prefork) #define tcache_salloc JEMALLOC_N(tcache_salloc) #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) #define tcaches JEMALLOC_N(tcaches) @@ -611,14 +617,16 @@ #define valgrind_make_mem_defined JEMALLOC_N(valgrind_make_mem_defined) #define valgrind_make_mem_noaccess JEMALLOC_N(valgrind_make_mem_noaccess) #define valgrind_make_mem_undefined JEMALLOC_N(valgrind_make_mem_undefined) +#define witness_assert_depth JEMALLOC_N(witness_assert_depth) +#define witness_assert_depth_to_rank JEMALLOC_N(witness_assert_depth_to_rank) #define witness_assert_lockless JEMALLOC_N(witness_assert_lockless) #define witness_assert_not_owner JEMALLOC_N(witness_assert_not_owner) #define witness_assert_owner JEMALLOC_N(witness_assert_owner) +#define witness_depth_error JEMALLOC_N(witness_depth_error) #define witness_fork_cleanup JEMALLOC_N(witness_fork_cleanup) #define witness_init JEMALLOC_N(witness_init) #define witness_lock JEMALLOC_N(witness_lock) #define witness_lock_error JEMALLOC_N(witness_lock_error) -#define witness_lockless_error JEMALLOC_N(witness_lockless_error) #define witness_not_owner_error JEMALLOC_N(witness_not_owner_error) #define witness_owner JEMALLOC_N(witness_owner) #define witness_owner_error JEMALLOC_N(witness_owner_error) diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache.h b/contrib/jemalloc/include/jemalloc/internal/tcache.h index 01ba062dea6f..5fe5ebfa3794 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tcache.h +++ b/contrib/jemalloc/include/jemalloc/internal/tcache.h @@ -149,6 +149,9 @@ bool tcaches_create(tsd_t *tsd, unsigned *r_ind); void tcaches_flush(tsd_t *tsd, unsigned ind); void tcaches_destroy(tsd_t *tsd, unsigned ind); bool tcache_boot(tsdn_t *tsdn); +void tcache_prefork(tsdn_t *tsdn); +void tcache_postfork_parent(tsdn_t *tsdn); +void tcache_postfork_child(tsdn_t *tsdn); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/tsd.h b/contrib/jemalloc/include/jemalloc/internal/tsd.h index 9055acafd2e7..9f3743357b1b 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tsd.h +++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h @@ -479,13 +479,14 @@ a_name##tsd_wrapper_get(bool init) \ \ if (init && unlikely(wrapper == NULL)) { \ tsd_init_block_t block; \ - wrapper = tsd_init_check_recursion( \ - &a_name##tsd_init_head, &block); \ + wrapper = (a_name##tsd_wrapper_t *) \ + tsd_init_check_recursion(&a_name##tsd_init_head, \ + &block); \ if (wrapper) \ return (wrapper); \ wrapper = (a_name##tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - block.data = wrapper; \ + block.data = (void *)wrapper; \ if (wrapper == NULL) { \ malloc_write(": Error allocating" \ " TSD for "#a_name"\n"); \ diff --git a/contrib/jemalloc/include/jemalloc/internal/witness.h b/contrib/jemalloc/include/jemalloc/internal/witness.h index cdf15d797d0c..30d8c7e902d0 100644 --- a/contrib/jemalloc/include/jemalloc/internal/witness.h +++ b/contrib/jemalloc/include/jemalloc/internal/witness.h @@ -12,21 +12,32 @@ typedef int witness_comp_t (const witness_t *, const witness_t *); */ #define WITNESS_RANK_OMIT 0U +#define WITNESS_RANK_MIN 1U + #define WITNESS_RANK_INIT 1U #define WITNESS_RANK_CTL 1U -#define WITNESS_RANK_ARENAS 2U +#define WITNESS_RANK_TCACHES 2U +#define WITNESS_RANK_ARENAS 3U -#define WITNESS_RANK_PROF_DUMP 3U -#define WITNESS_RANK_PROF_BT2GCTX 4U -#define WITNESS_RANK_PROF_TDATAS 5U -#define WITNESS_RANK_PROF_TDATA 6U -#define WITNESS_RANK_PROF_GCTX 7U +#define WITNESS_RANK_PROF_DUMP 4U +#define WITNESS_RANK_PROF_BT2GCTX 5U +#define WITNESS_RANK_PROF_TDATAS 6U +#define WITNESS_RANK_PROF_TDATA 7U +#define WITNESS_RANK_PROF_GCTX 8U -#define WITNESS_RANK_ARENA 8U -#define WITNESS_RANK_ARENA_CHUNKS 9U -#define WITNESS_RANK_ARENA_NODE_CACHE 10 +/* + * Used as an argument to witness_assert_depth_to_rank() in order to validate + * depth excluding non-core locks with lower ranks. Since the rank argument to + * witness_assert_depth_to_rank() is inclusive rather than exclusive, this + * definition can have the same value as the minimally ranked core lock. + */ +#define WITNESS_RANK_CORE 9U -#define WITNESS_RANK_BASE 11U +#define WITNESS_RANK_ARENA 9U +#define WITNESS_RANK_ARENA_CHUNKS 10U +#define WITNESS_RANK_ARENA_NODE_CACHE 11U + +#define WITNESS_RANK_BASE 12U #define WITNESS_RANK_LEAF 0xffffffffU #define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF @@ -91,10 +102,12 @@ extern witness_not_owner_error_t *witness_not_owner_error; void witness_not_owner_error(const witness_t *witness); #endif #ifdef JEMALLOC_JET -typedef void (witness_lockless_error_t)(const witness_list_t *); -extern witness_lockless_error_t *witness_lockless_error; +typedef void (witness_depth_error_t)(const witness_list_t *, + witness_rank_t rank_inclusive, unsigned depth); +extern witness_depth_error_t *witness_depth_error; #else -void witness_lockless_error(const witness_list_t *witnesses); +void witness_depth_error(const witness_list_t *witnesses, + witness_rank_t rank_inclusive, unsigned depth); #endif void witnesses_cleanup(tsd_t *tsd); @@ -111,6 +124,9 @@ void witness_postfork_child(tsd_t *tsd); bool witness_owner(tsd_t *tsd, const witness_t *witness); void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness); void witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness); +void witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive, + unsigned depth); +void witness_assert_depth(tsdn_t *tsdn, unsigned depth); void witness_assert_lockless(tsdn_t *tsdn); void witness_lock(tsdn_t *tsdn, witness_t *witness); void witness_unlock(tsdn_t *tsdn, witness_t *witness); @@ -123,6 +139,8 @@ witness_owner(tsd_t *tsd, const witness_t *witness) witness_list_t *witnesses; witness_t *w; + cassert(config_debug); + witnesses = tsd_witnessesp_get(tsd); ql_foreach(w, witnesses, link) { if (w == witness) @@ -175,9 +193,10 @@ witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) } JEMALLOC_INLINE void -witness_assert_lockless(tsdn_t *tsdn) -{ +witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive, + unsigned depth) { tsd_t *tsd; + unsigned d; witness_list_t *witnesses; witness_t *w; @@ -188,10 +207,29 @@ witness_assert_lockless(tsdn_t *tsdn) return; tsd = tsdn_tsd(tsdn); + d = 0; witnesses = tsd_witnessesp_get(tsd); w = ql_last(witnesses, link); - if (w != NULL) - witness_lockless_error(witnesses); + if (w != NULL) { + ql_reverse_foreach(w, witnesses, link) { + if (w->rank < rank_inclusive) { + break; + } + d++; + } + } + if (d != depth) + witness_depth_error(witnesses, rank_inclusive, depth); +} + +JEMALLOC_INLINE void +witness_assert_depth(tsdn_t *tsdn, unsigned depth) { + witness_assert_depth_to_rank(tsdn, WITNESS_RANK_MIN, depth); +} + +JEMALLOC_INLINE void +witness_assert_lockless(tsdn_t *tsdn) { + witness_assert_depth(tsdn, 0); } JEMALLOC_INLINE void diff --git a/contrib/jemalloc/include/jemalloc/jemalloc.h b/contrib/jemalloc/include/jemalloc/jemalloc.h index d3b175dd0a24..6b386623f3e0 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc/include/jemalloc/jemalloc.h @@ -87,12 +87,12 @@ extern "C" { #include #include -#define JEMALLOC_VERSION "4.4.0-0-gf1f76357313e7dcad7262f17a48ff0a2e005fcdc" +#define JEMALLOC_VERSION "4.5.0-0-g04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5" #define JEMALLOC_VERSION_MAJOR 4 -#define JEMALLOC_VERSION_MINOR 4 +#define JEMALLOC_VERSION_MINOR 5 #define JEMALLOC_VERSION_BUGFIX 0 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "f1f76357313e7dcad7262f17a48ff0a2e005fcdc" +#define JEMALLOC_VERSION_GID "04380e79f1e2428bd0ad000bbc6e3d2dfc6b66a5" # define MALLOCX_LG_ALIGN(la) ((int)(la)) # if LG_SIZEOF_PTR == 2 diff --git a/contrib/jemalloc/src/arena.c b/contrib/jemalloc/src/arena.c index 648a8da3ab4b..a9dff0b0a266 100644 --- a/contrib/jemalloc/src/arena.c +++ b/contrib/jemalloc/src/arena.c @@ -4,6 +4,8 @@ /******************************************************************************/ /* Data. */ +bool opt_thp = true; +static bool thp_initially_huge; purge_mode_t opt_purge = PURGE_DEFAULT; const char *purge_mode_names[] = { "ratio", @@ -568,8 +570,8 @@ arena_chunk_init_spare(arena_t *arena) } static bool -arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, - size_t sn, bool zero) +arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, size_t sn, bool zero, + bool *gdump) { /* @@ -580,7 +582,7 @@ arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, */ extent_node_init(&chunk->node, arena, chunk, chunksize, sn, zero, true); extent_node_achunk_set(&chunk->node, true); - return (chunk_register(tsdn, chunk, &chunk->node)); + return (chunk_register(chunk, &chunk->node, gdump)); } static arena_chunk_t * @@ -591,6 +593,8 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena, size_t sn; malloc_mutex_unlock(tsdn, &arena->lock); + /* prof_gdump() requirement. */ + witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0); chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, chunksize, chunksize, &sn, zero, commit); @@ -603,16 +607,20 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena, chunk = NULL; } } - if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, sn, - *zero)) { - if (!*commit) { - /* Undo commit of header. */ - chunk_hooks->decommit(chunk, chunksize, 0, map_bias << - LG_PAGE, arena->ind); + if (chunk != NULL) { + bool gdump; + if (arena_chunk_register(arena, chunk, sn, *zero, &gdump)) { + if (!*commit) { + /* Undo commit of header. */ + chunk_hooks->decommit(chunk, chunksize, 0, + map_bias << LG_PAGE, arena->ind); + } + chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, + (void *)chunk, chunksize, sn, *zero, *commit); + chunk = NULL; } - chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk, - chunksize, sn, *zero, *commit); - chunk = NULL; + if (config_prof && opt_prof && gdump) + prof_gdump(tsdn); } malloc_mutex_lock(tsdn, &arena->lock); @@ -627,14 +635,24 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero, chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; size_t sn; + /* prof_gdump() requirement. */ + witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1); + malloc_mutex_assert_owner(tsdn, &arena->lock); + chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize, chunksize, &sn, zero, commit, true); if (chunk != NULL) { - if (arena_chunk_register(tsdn, arena, chunk, sn, *zero)) { + bool gdump; + if (arena_chunk_register(arena, chunk, sn, *zero, &gdump)) { chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, chunksize, sn, true); return (NULL); } + if (config_prof && opt_prof && gdump) { + malloc_mutex_unlock(tsdn, &arena->lock); + prof_gdump(tsdn); + malloc_mutex_lock(tsdn, &arena->lock); + } } if (chunk == NULL) { chunk = arena_chunk_alloc_internal_hard(tsdn, arena, @@ -664,7 +682,9 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena) if (chunk == NULL) return (NULL); - chunk->hugepage = true; + if (config_thp && opt_thp) { + chunk->hugepage = thp_initially_huge; + } /* * Initialize the map to contain one maximal free untouched run. Mark @@ -729,14 +749,17 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena) static void arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) { - size_t sn, hugepage; + size_t sn; + UNUSED bool hugepage JEMALLOC_CC_SILENCE_INIT(false); bool committed; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; chunk_deregister(chunk, &chunk->node); sn = extent_node_sn_get(&chunk->node); - hugepage = chunk->hugepage; + if (config_thp && opt_thp) { + hugepage = chunk->hugepage; + } committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0); if (!committed) { /* @@ -749,13 +772,16 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE, arena->ind); } - if (!hugepage) { + if (config_thp && opt_thp && hugepage != thp_initially_huge) { /* - * Convert chunk back to the default state, so that all - * subsequent chunk allocations start out with chunks that can - * be backed by transparent huge pages. + * Convert chunk back to initial THP state, so that all + * subsequent chunk allocations start out in a consistent state. */ - pages_huge(chunk, chunksize); + if (thp_initially_huge) { + pages_huge(chunk, chunksize); + } else { + pages_nohuge(chunk, chunksize); + } } chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize, @@ -1695,13 +1721,13 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, /* * If this is the first run purged within chunk, mark - * the chunk as non-huge. This will prevent all use of - * transparent huge pages for this chunk until the chunk - * as a whole is deallocated. + * the chunk as non-THP-capable. This will prevent all + * use of THPs for this chunk until the chunk as a whole + * is deallocated. */ - if (chunk->hugepage) { - pages_nohuge(chunk, chunksize); - chunk->hugepage = false; + if (config_thp && opt_thp && chunk->hugepage) { + chunk->hugepage = pages_nohuge(chunk, + chunksize); } assert(pageind + npages <= chunk_npages); @@ -2694,6 +2720,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, return (arena_malloc_small(tsdn, arena, ind, zero)); if (likely(size <= large_maxclass)) return (arena_malloc_large(tsdn, arena, ind, zero)); + assert(index2size(ind) >= chunksize); return (huge_malloc(tsdn, arena, index2size(ind), zero)); } @@ -3755,11 +3782,78 @@ bin_info_init(void) #undef SC } +static void +init_thp_initially_huge(void) { + int fd; + char buf[sizeof("[always] madvise never\n")]; + ssize_t nread; + static const char *enabled_states[] = { + "[always] madvise never\n", + "always [madvise] never\n", + "always madvise [never]\n" + }; + static const bool thp_initially_huge_states[] = { + true, + false, + false + }; + unsigned i; + + if (config_debug) { + for (i = 0; i < sizeof(enabled_states)/sizeof(const char *); + i++) { + assert(sizeof(buf) > strlen(enabled_states[i])); + } + } + assert(sizeof(enabled_states)/sizeof(const char *) == + sizeof(thp_initially_huge_states)/sizeof(bool)); + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) + fd = (int)syscall(SYS_open, + "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#else + fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#endif + if (fd == -1) { + goto label_error; + } + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) + nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); +#else + nread = read(fd, &buf, sizeof(buf)); +#endif + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) + syscall(SYS_close, fd); +#else + close(fd); +#endif + + if (nread < 1) { + goto label_error; + } + for (i = 0; i < sizeof(enabled_states)/sizeof(const char *); + i++) { + if (strncmp(buf, enabled_states[i], (size_t)nread) == 0) { + thp_initially_huge = thp_initially_huge_states[i]; + return; + } + } + +label_error: + thp_initially_huge = false; +} + void arena_boot(void) { unsigned i; + if (config_thp && opt_thp) { + init_thp_initially_huge(); + } + arena_lg_dirty_mult_default_set(opt_lg_dirty_mult); arena_decay_time_default_set(opt_decay_time); @@ -3790,15 +3884,8 @@ arena_boot(void) arena_maxrun = chunksize - (map_bias << LG_PAGE); assert(arena_maxrun > 0); large_maxclass = index2size(size2index(chunksize)-1); - if (large_maxclass > arena_maxrun) { - /* - * For small chunk sizes it's possible for there to be fewer - * non-header pages available than are necessary to serve the - * size classes just below chunksize. - */ - large_maxclass = arena_maxrun; - } assert(large_maxclass > 0); + assert(large_maxclass + large_pad <= arena_maxrun); nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS); nhclasses = NSIZES - nlclasses - NBINS; diff --git a/contrib/jemalloc/src/chunk.c b/contrib/jemalloc/src/chunk.c index c1c514a860fa..94f28f2df4b1 100644 --- a/contrib/jemalloc/src/chunk.c +++ b/contrib/jemalloc/src/chunk.c @@ -141,7 +141,7 @@ chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena, } bool -chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node) +chunk_register(const void *chunk, const extent_node_t *node, bool *gdump) { assert(extent_node_addr_get(node) == chunk); @@ -160,8 +160,7 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node) */ high = atomic_read_z(&highchunks); } - if (cur > high && prof_gdump_get_unlocked()) - prof_gdump(tsdn); + *gdump = (cur > high && prof_gdump_get_unlocked()); } return (false); @@ -189,12 +188,17 @@ chunk_deregister(const void *chunk, const extent_node_t *node) static extent_node_t * chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szsnad, size_t size) { + extent_node_t *node; + size_t qsize; extent_node_t key; assert(size == CHUNK_CEILING(size)); - extent_node_init(&key, arena, NULL, size, 0, false, false); - return (extent_tree_szsnad_nsearch(chunks_szsnad, &key)); + qsize = extent_size_quantize_ceil(size); + extent_node_init(&key, arena, NULL, qsize, 0, false, false); + node = extent_tree_szsnad_nsearch(chunks_szsnad, &key); + assert(node == NULL || extent_node_size_get(node) >= size); + return node; } static void * diff --git a/contrib/jemalloc/src/chunk_dss.c b/contrib/jemalloc/src/chunk_dss.c index ee3f83888e0f..8c6793957d95 100644 --- a/contrib/jemalloc/src/chunk_dss.c +++ b/contrib/jemalloc/src/chunk_dss.c @@ -115,8 +115,9 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, * malloc. */ while (true) { - void *ret, *cpad, *max_cur, *dss_next, *dss_prev; - size_t gap_size, cpad_size; + void *ret, *max_cur, *dss_next, *dss_prev; + void *gap_addr_chunk, *gap_addr_subchunk; + size_t gap_size_chunk, gap_size_subchunk; intptr_t incr; max_cur = chunk_dss_max_update(new_addr); @@ -124,25 +125,32 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, goto label_oom; /* - * Calculate how much padding is necessary to - * chunk-align the end of the DSS. - */ - gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) & - chunksize_mask; - /* - * Compute how much chunk-aligned pad space (if any) is + * Compute how much chunk-aligned gap space (if any) is * necessary to satisfy alignment. This space can be * recycled for later use. */ - cpad = (void *)((uintptr_t)dss_max + gap_size); - ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max, - alignment); - cpad_size = (uintptr_t)ret - (uintptr_t)cpad; + gap_addr_chunk = (void *)(CHUNK_CEILING( + (uintptr_t)max_cur)); + ret = (void *)ALIGNMENT_CEILING( + (uintptr_t)gap_addr_chunk, alignment); + gap_size_chunk = (uintptr_t)ret - + (uintptr_t)gap_addr_chunk; + /* + * Compute the address just past the end of the desired + * allocation space. + */ dss_next = (void *)((uintptr_t)ret + size); - if ((uintptr_t)ret < (uintptr_t)dss_max || - (uintptr_t)dss_next < (uintptr_t)dss_max) + if ((uintptr_t)ret < (uintptr_t)max_cur || + (uintptr_t)dss_next < (uintptr_t)max_cur) goto label_oom; /* Wrap-around. */ - incr = gap_size + cpad_size + size; + /* Compute the increment, including subchunk bytes. */ + gap_addr_subchunk = max_cur; + gap_size_subchunk = (uintptr_t)ret - + (uintptr_t)gap_addr_subchunk; + incr = gap_size_subchunk + size; + + assert((uintptr_t)max_cur + incr == (uintptr_t)ret + + size); /* * Optimistically update dss_max, and roll back below if @@ -157,11 +165,12 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, dss_prev = chunk_dss_sbrk(incr); if (dss_prev == max_cur) { /* Success. */ - if (cpad_size != 0) { + if (gap_size_chunk != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; chunk_dalloc_wrapper(tsdn, arena, - &chunk_hooks, cpad, cpad_size, + &chunk_hooks, gap_addr_chunk, + gap_size_chunk, arena_extent_sn_next(arena), false, true); } diff --git a/contrib/jemalloc/src/ctl.c b/contrib/jemalloc/src/ctl.c index bc78b20558a5..56bc4f4cca91 100644 --- a/contrib/jemalloc/src/ctl.c +++ b/contrib/jemalloc/src/ctl.c @@ -84,6 +84,7 @@ CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) CTL_PROTO(config_tcache) +CTL_PROTO(config_thp) CTL_PROTO(config_tls) CTL_PROTO(config_utrace) CTL_PROTO(config_valgrind) @@ -104,6 +105,7 @@ CTL_PROTO(opt_utrace) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_max) +CTL_PROTO(opt_thp) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) @@ -258,6 +260,7 @@ static const ctl_named_node_t config_node[] = { {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, {NAME("tcache"), CTL(config_tcache)}, + {NAME("thp"), CTL(config_thp)}, {NAME("tls"), CTL(config_tls)}, {NAME("utrace"), CTL(config_utrace)}, {NAME("valgrind"), CTL(config_valgrind)}, @@ -281,6 +284,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, + {NAME("thp"), CTL(opt_thp)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, @@ -1268,6 +1272,7 @@ CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) CTL_RO_CONFIG_GEN(config_stats, bool) CTL_RO_CONFIG_GEN(config_tcache, bool) +CTL_RO_CONFIG_GEN(config_thp, bool) CTL_RO_CONFIG_GEN(config_tls, bool) CTL_RO_CONFIG_GEN(config_utrace, bool) CTL_RO_CONFIG_GEN(config_valgrind, bool) @@ -1291,6 +1296,7 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) +CTL_RO_NL_CGEN(config_thp, opt_thp, opt_thp, bool) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) @@ -1476,7 +1482,6 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, if (!config_tcache) return (ENOENT); - malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); READONLY(); if (tcaches_create(tsd, &tcache_ind)) { ret = EFAULT; @@ -1486,8 +1491,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, ret = 0; label_return: - malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); - return (ret); + return ret; } static int diff --git a/contrib/jemalloc/src/extent.c b/contrib/jemalloc/src/extent.c index 218156c608c1..ff8de2fe916c 100644 --- a/contrib/jemalloc/src/extent.c +++ b/contrib/jemalloc/src/extent.c @@ -3,13 +3,11 @@ /******************************************************************************/ -/* - * Round down to the nearest chunk size that can actually be requested during - * normal huge allocation. - */ -JEMALLOC_INLINE_C size_t -extent_quantize(size_t size) -{ +#ifndef JEMALLOC_JET +static +#endif +size_t +extent_size_quantize_floor(size_t size) { size_t ret; szind_t ind; @@ -25,11 +23,32 @@ extent_quantize(size_t size) return (ret); } +size_t +extent_size_quantize_ceil(size_t size) { + size_t ret; + + assert(size > 0); + + ret = extent_size_quantize_floor(size); + if (ret < size) { + /* + * Skip a quantization that may have an adequately large extent, + * because under-sized extents may be mixed in. This only + * happens when an unusual size is requested, i.e. for aligned + * allocation, and is just one of several places where linear + * search would potentially find sufficiently aligned available + * memory somewhere lower. + */ + ret = index2size(size2index(ret + 1)); + } + return ret; +} + JEMALLOC_INLINE_C int extent_sz_comp(const extent_node_t *a, const extent_node_t *b) { - size_t a_qsize = extent_quantize(extent_node_size_get(a)); - size_t b_qsize = extent_quantize(extent_node_size_get(b)); + size_t a_qsize = extent_size_quantize_floor(extent_node_size_get(a)); + size_t b_qsize = extent_size_quantize_floor(extent_node_size_get(b)); return ((a_qsize > b_qsize) - (a_qsize < b_qsize)); } diff --git a/contrib/jemalloc/src/huge.c b/contrib/jemalloc/src/huge.c index 8abd8c00caa1..0fbaa41a1ecf 100644 --- a/contrib/jemalloc/src/huge.c +++ b/contrib/jemalloc/src/huge.c @@ -15,20 +15,20 @@ huge_node_get(const void *ptr) } static bool -huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node) +huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node, bool *gdump) { assert(extent_node_addr_get(node) == ptr); assert(!extent_node_achunk_get(node)); - return (chunk_register(tsdn, ptr, node)); + return (chunk_register(ptr, node, gdump)); } static void -huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node) +huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node, bool *gdump) { bool err; - err = huge_node_set(tsdn, ptr, node); + err = huge_node_set(tsdn, ptr, node, gdump); assert(!err); } @@ -57,11 +57,13 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, arena_t *iarena; extent_node_t *node; size_t sn; - bool is_zeroed; + bool is_zeroed, gdump; /* Allocate one or more contiguous chunks for this request. */ assert(!tsdn_null(tsdn) || arena != NULL); + /* prof_gdump() requirement. */ + witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0); ausize = sa2u(usize, alignment); if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS)) @@ -91,11 +93,13 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, extent_node_init(node, arena, ret, usize, sn, is_zeroed, true); - if (huge_node_set(tsdn, ret, node)) { + if (huge_node_set(tsdn, ret, node, &gdump)) { arena_chunk_dalloc_huge(tsdn, arena, ret, usize, sn); idalloctm(tsdn, node, NULL, true, true); return (NULL); } + if (config_prof && opt_prof && gdump) + prof_gdump(tsdn); /* Insert node into huge. */ malloc_mutex_lock(tsdn, &arena->huge_mtx); @@ -144,7 +148,10 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize, extent_node_t *node; arena_t *arena; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; - bool pre_zeroed, post_zeroed; + bool pre_zeroed, post_zeroed, gdump; + + /* prof_gdump() requirement. */ + witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0); /* Increase usize to incorporate extra. */ for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1)) @@ -178,10 +185,13 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize, huge_node_unset(ptr, node); assert(extent_node_size_get(node) != usize); extent_node_size_set(node, usize); - huge_node_reset(tsdn, ptr, node); + huge_node_reset(tsdn, ptr, node, &gdump); /* Update zeroed. */ extent_node_zeroed_set(node, post_zeroed); malloc_mutex_unlock(tsdn, &arena->huge_mtx); + /* gdump without any locks held. */ + if (config_prof && opt_prof && gdump) + prof_gdump(tsdn); arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize); @@ -207,7 +217,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, arena_t *arena; chunk_hooks_t chunk_hooks; size_t cdiff; - bool pre_zeroed, post_zeroed; + bool pre_zeroed, post_zeroed, gdump; node = huge_node_get(ptr); arena = extent_node_arena_get(node); @@ -215,6 +225,8 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, chunk_hooks = chunk_hooks_get(tsdn, arena); assert(oldsize > usize); + /* prof_gdump() requirement. */ + witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0); /* Split excess chunks. */ cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); @@ -241,10 +253,13 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize, /* Update the size of the huge allocation. */ huge_node_unset(ptr, node); extent_node_size_set(node, usize); - huge_node_reset(tsdn, ptr, node); + huge_node_reset(tsdn, ptr, node, &gdump); /* Update zeroed. */ extent_node_zeroed_set(node, post_zeroed); malloc_mutex_unlock(tsdn, &arena->huge_mtx); + /* gdump without any locks held. */ + if (config_prof && opt_prof && gdump) + prof_gdump(tsdn); /* Zap the excess chunks. */ arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize, @@ -258,7 +273,7 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize, bool zero) { extent_node_t *node; arena_t *arena; - bool is_zeroed_subchunk, is_zeroed_chunk; + bool is_zeroed_subchunk, is_zeroed_chunk, gdump; node = huge_node_get(ptr); arena = extent_node_arena_get(node); @@ -266,6 +281,9 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize, is_zeroed_subchunk = extent_node_zeroed_get(node); malloc_mutex_unlock(tsdn, &arena->huge_mtx); + /* prof_gdump() requirement. */ + witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0); + /* * Use is_zeroed_chunk to detect whether the trailing memory is zeroed, * update extent's zeroed field, and zero as necessary. @@ -280,8 +298,11 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize, extent_node_size_set(node, usize); extent_node_zeroed_set(node, extent_node_zeroed_get(node) && is_zeroed_chunk); - huge_node_reset(tsdn, ptr, node); + huge_node_reset(tsdn, ptr, node, &gdump); malloc_mutex_unlock(tsdn, &arena->huge_mtx); + /* gdump without any locks held. */ + if (config_prof && opt_prof && gdump) + prof_gdump(tsdn); if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed_subchunk) { diff --git a/contrib/jemalloc/src/jemalloc.c b/contrib/jemalloc/src/jemalloc.c index 8a49f267183e..fcfe204a9bef 100644 --- a/contrib/jemalloc/src/jemalloc.c +++ b/contrib/jemalloc/src/jemalloc.c @@ -794,18 +794,19 @@ stats_print_atexit(void) * Begin initialization functions. */ -#ifndef JEMALLOC_HAVE_SECURE_GETENV static char * -secure_getenv(const char *name) +jemalloc_secure_getenv(const char *name) { - +#ifdef JEMALLOC_HAVE_SECURE_GETENV + return secure_getenv(name); +#else # ifdef JEMALLOC_HAVE_ISSETUGID if (issetugid() != 0) return (NULL); # endif return (getenv(name)); -} #endif +} static unsigned malloc_ncpus(void) @@ -1022,7 +1023,7 @@ malloc_conf_init(void) #endif ; - if ((opts = secure_getenv(envname)) != NULL) { + if ((opts = jemalloc_secure_getenv(envname)) != NULL) { /* * Do nothing; opts is already initialized to * the value of the MALLOC_CONF environment @@ -1078,18 +1079,18 @@ malloc_conf_init(void) k, klen, v, vlen); \ } else if (clip) { \ if (CONF_MIN_##check_min(um, \ - (min))) \ + (t)(min))) \ o = (t)(min); \ else if (CONF_MAX_##check_max( \ - um, (max))) \ + um, (t)(max))) \ o = (t)(max); \ else \ o = (t)um; \ } else { \ if (CONF_MIN_##check_min(um, \ - (min)) || \ + (t)(min)) || \ CONF_MAX_##check_max(um, \ - (max))) { \ + (t)(max))) { \ malloc_conf_error( \ "Out-of-range " \ "conf value", \ @@ -1139,16 +1140,18 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_abort, "abort", true) /* - * Chunks always require at least one header page, - * as many as 2^(LG_SIZE_CLASS_GROUP+1) data pages, and - * possibly an additional page in the presence of - * redzones. In order to simplify options processing, - * use a conservative bound that accommodates all these - * constraints. + * Chunks always require at least one header page, as + * many as 2^(LG_SIZE_CLASS_GROUP+1) data pages (plus an + * additional page in the presence of cache-oblivious + * large), and possibly an additional page in the + * presence of redzones. In order to simplify options + * processing, use a conservative bound that + * accommodates all these constraints. */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + - LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1), - (sizeof(size_t) << 3) - 1, yes, yes, true) + LG_SIZE_CLASS_GROUP + 1 + ((config_cache_oblivious + || config_fill) ? 1 : 0), (sizeof(size_t) << 3) - 1, + yes, yes, true) if (strncmp("dss", k, klen) == 0) { int i; bool match = false; @@ -1273,6 +1276,9 @@ malloc_conf_init(void) "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) } + if (config_thp) { + CONF_HANDLE_BOOL(opt_thp, "thp", true) + } if (config_prof) { CONF_HANDLE_BOOL(opt_prof, "prof", true) CONF_HANDLE_CHAR_P(opt_prof_prefix, @@ -2932,6 +2938,7 @@ _malloc_prefork(void) witness_prefork(tsd); /* Acquire all mutexes in a safe order. */ ctl_prefork(tsd_tsdn(tsd)); + tcache_prefork(tsd_tsdn(tsd)); malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock); prof_prefork0(tsd_tsdn(tsd)); for (i = 0; i < 3; i++) { @@ -2991,6 +2998,7 @@ _malloc_postfork(void) } prof_postfork_parent(tsd_tsdn(tsd)); malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock); + tcache_postfork_parent(tsd_tsdn(tsd)); ctl_postfork_parent(tsd_tsdn(tsd)); } @@ -3015,6 +3023,7 @@ jemalloc_postfork_child(void) } prof_postfork_child(tsd_tsdn(tsd)); malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock); + tcache_postfork_child(tsd_tsdn(tsd)); ctl_postfork_child(tsd_tsdn(tsd)); } diff --git a/contrib/jemalloc/src/pages.c b/contrib/jemalloc/src/pages.c index 5f0c9669d2b8..7698e49bff84 100644 --- a/contrib/jemalloc/src/pages.c +++ b/contrib/jemalloc/src/pages.c @@ -199,7 +199,7 @@ pages_huge(void *addr, size_t size) assert(PAGE_ADDR2BASE(addr) == addr); assert(PAGE_CEILING(size) == size); -#ifdef JEMALLOC_THP +#ifdef JEMALLOC_HAVE_MADVISE_HUGE return (madvise(addr, size, MADV_HUGEPAGE) != 0); #else return (false); @@ -213,7 +213,7 @@ pages_nohuge(void *addr, size_t size) assert(PAGE_ADDR2BASE(addr) == addr); assert(PAGE_CEILING(size) == size); -#ifdef JEMALLOC_THP +#ifdef JEMALLOC_HAVE_MADVISE_HUGE return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); #else return (false); diff --git a/contrib/jemalloc/src/stats.c b/contrib/jemalloc/src/stats.c index 1360f3bd0012..b76afc5a756d 100644 --- a/contrib/jemalloc/src/stats.c +++ b/contrib/jemalloc/src/stats.c @@ -39,7 +39,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, bool json, bool large, bool huge, unsigned i) { size_t page; - bool config_tcache, in_gap, in_gap_prev; + bool in_gap, in_gap_prev; unsigned nbins, j; CTL_GET("arenas.page", &page, size_t); @@ -49,7 +49,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"bins\": [\n"); } else { - CTL_GET("config.tcache", &config_tcache, bool); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, "bins: size ind allocated nmalloc" @@ -137,8 +136,16 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, availregs = nregs * curruns; milli = (availregs != 0) ? (1000 * curregs) / availregs : 1000; - assert(milli <= 1000); - if (milli < 10) { + + if (milli > 1000) { + /* + * Race detected: the counters were read in + * separate mallctl calls and concurrent + * operations happened in between. In this case + * no meaningful utilization can be computed. + */ + malloc_snprintf(util, sizeof(util), " race"); + } else if (milli < 10) { malloc_snprintf(util, sizeof(util), "0.00%zu", milli); } else if (milli < 100) { @@ -147,8 +154,10 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, } else if (milli < 1000) { malloc_snprintf(util, sizeof(util), "0.%zu", milli); - } else + } else { + assert(milli == 1000); malloc_snprintf(util, sizeof(util), "1"); + } if (config_tcache) { malloc_cprintf(write_cb, cbopaque, @@ -536,7 +545,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated); malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); + "\t\t\t\t}%s\n", (bins || large || huge) ? "," : ""); } else { malloc_cprintf(write_cb, cbopaque, "metadata: mapped: %zu, allocated: %zu\n", @@ -555,7 +564,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, static void stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool merged, bool unmerged) + bool json, bool more) { const char *cpv; bool bv; @@ -741,6 +750,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(xmalloc, ",") OPT_WRITE_BOOL(tcache, ",") OPT_WRITE_SSIZE_T(lg_tcache_max, ",") + OPT_WRITE_BOOL(thp, ",") OPT_WRITE_BOOL(prof, ",") OPT_WRITE_CHAR_P(prof_prefix, ",") OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") @@ -838,9 +848,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "\t\t\t\"nbins\": %u,\n", nbins); - CTL_GET("arenas.nhbins", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nhbins\": %u,\n", uv); + if (config_tcache) { + CTL_GET("arenas.nhbins", &uv, unsigned); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"nhbins\": %u,\n", uv); + } malloc_cprintf(write_cb, cbopaque, "\t\t\t\"bin\": [\n"); @@ -907,11 +919,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, "\t\t\t]\n"); malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); + "\t\t}%s\n", (config_prof || more) ? "," : ""); } /* prof. */ - if (json) { + if (config_prof && json) { malloc_cprintf(write_cb, cbopaque, "\t\t\"prof\": {\n"); @@ -937,8 +949,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, "\t\t\t\"lg_sample\": %zd\n", ssv); malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", (config_stats || merged || unmerged) ? "," : - ""); + "\t\t}%s\n", more ? "," : ""); } } @@ -1023,31 +1034,37 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, narenas, bins, large, huge); if (json) { malloc_cprintf(write_cb, cbopaque, - "\t\t\t}%s\n", (ninitialized > 1) ? - "," : ""); + "\t\t\t}%s\n", unmerged ? "," : + ""); } } /* Unmerged stats. */ - for (i = j = 0; i < narenas; i++) { - if (initialized[i]) { - if (json) { - j++; - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t\"%u\": {\n", i); - } else { - malloc_cprintf(write_cb, - cbopaque, "\narenas[%u]:\n", - i); - } - stats_arena_print(write_cb, cbopaque, - json, i, bins, large, huge); - if (json) { - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t}%s\n", (j < - ninitialized) ? "," : ""); + if (unmerged) { + for (i = j = 0; i < narenas; i++) { + if (initialized[i]) { + if (json) { + j++; + malloc_cprintf(write_cb, + cbopaque, + "\t\t\t\"%u\": {\n", + i); + } else { + malloc_cprintf(write_cb, + cbopaque, + "\narenas[%u]:\n", + i); + } + stats_arena_print(write_cb, + cbopaque, json, i, bins, + large, huge); + if (json) { + malloc_cprintf(write_cb, + cbopaque, + "\t\t\t}%s\n", (j < + ninitialized) ? "," + : ""); + } } } } @@ -1069,8 +1086,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t u64sz; bool json = false; bool general = true; - bool merged = true; - bool unmerged = true; + bool merged = config_stats; + bool unmerged = config_stats; bool bins = true; bool large = true; bool huge = true; @@ -1137,8 +1154,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, "___ Begin jemalloc statistics ___\n"); } - if (general) - stats_general_print(write_cb, cbopaque, json, merged, unmerged); + if (general) { + bool more = (merged || unmerged); + stats_general_print(write_cb, cbopaque, json, more); + } if (config_stats) { stats_print_helper(write_cb, cbopaque, json, merged, unmerged, bins, large, huge); diff --git a/contrib/jemalloc/src/tcache.c b/contrib/jemalloc/src/tcache.c index 21540ff46e75..e3b04be64070 100644 --- a/contrib/jemalloc/src/tcache.c +++ b/contrib/jemalloc/src/tcache.c @@ -21,6 +21,9 @@ static unsigned tcaches_past; /* Head of singly linked list tracking available tcaches elements. */ static tcaches_t *tcaches_avail; +/* Protects tcaches{,_past,_avail}. */ +static malloc_mutex_t tcaches_mtx; + /******************************************************************************/ size_t @@ -444,29 +447,56 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) } } -bool -tcaches_create(tsd_t *tsd, unsigned *r_ind) -{ - arena_t *arena; - tcache_t *tcache; - tcaches_t *elm; +static bool +tcaches_create_prep(tsd_t *tsd) { + bool err; + + malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx); if (tcaches == NULL) { tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1)); - if (tcaches == NULL) - return (true); + if (tcaches == NULL) { + err = true; + goto label_return; + } } - if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) - return (true); - arena = arena_ichoose(tsd, NULL); - if (unlikely(arena == NULL)) - return (true); - tcache = tcache_create(tsd_tsdn(tsd), arena); - if (tcache == NULL) - return (true); + if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) { + err = true; + goto label_return; + } + err = false; +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx); + return err; +} + +bool +tcaches_create(tsd_t *tsd, unsigned *r_ind) { + bool err; + arena_t *arena; + tcache_t *tcache; + tcaches_t *elm; + + if (tcaches_create_prep(tsd)) { + err = true; + goto label_return; + } + + arena = arena_ichoose(tsd, NULL); + if (unlikely(arena == NULL)) { + err = true; + goto label_return; + } + tcache = tcache_create(tsd_tsdn(tsd), arena); + if (tcache == NULL) { + err = true; + goto label_return; + } + + malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx); if (tcaches_avail != NULL) { elm = tcaches_avail; tcaches_avail = tcaches_avail->next; @@ -478,41 +508,50 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) *r_ind = tcaches_past; tcaches_past++; } + malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx); - return (false); + err = false; +label_return: + malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &tcaches_mtx); + return err; } static void -tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) -{ +tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) { + malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx); - if (elm->tcache == NULL) + if (elm->tcache == NULL) { return; + } tcache_destroy(tsd, elm->tcache); elm->tcache = NULL; } void -tcaches_flush(tsd_t *tsd, unsigned ind) -{ - +tcaches_flush(tsd_t *tsd, unsigned ind) { + malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx); tcaches_elm_flush(tsd, &tcaches[ind]); + malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx); } void -tcaches_destroy(tsd_t *tsd, unsigned ind) -{ - tcaches_t *elm = &tcaches[ind]; +tcaches_destroy(tsd_t *tsd, unsigned ind) { + tcaches_t *elm; + + malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx); + elm = &tcaches[ind]; tcaches_elm_flush(tsd, elm); elm->next = tcaches_avail; tcaches_avail = elm; + malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx); } bool -tcache_boot(tsdn_t *tsdn) -{ +tcache_boot(tsdn_t *tsdn) { unsigned i; + cassert(config_tcache); + /* * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is * known. @@ -524,6 +563,10 @@ tcache_boot(tsdn_t *tsdn) else tcache_maxclass = (ZU(1) << opt_lg_tcache_max); + if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES)) { + return true; + } + nhbins = size2index(tcache_maxclass) + 1; /* Initialize tcache_bin_info. */ @@ -553,3 +596,24 @@ tcache_boot(tsdn_t *tsdn) return (false); } + +void +tcache_prefork(tsdn_t *tsdn) { + if (!config_prof && opt_tcache) { + malloc_mutex_prefork(tsdn, &tcaches_mtx); + } +} + +void +tcache_postfork_parent(tsdn_t *tsdn) { + if (!config_prof && opt_tcache) { + malloc_mutex_postfork_parent(tsdn, &tcaches_mtx); + } +} + +void +tcache_postfork_child(tsdn_t *tsdn) { + if (!config_prof && opt_tcache) { + malloc_mutex_postfork_child(tsdn, &tcaches_mtx); + } +} diff --git a/contrib/jemalloc/src/witness.c b/contrib/jemalloc/src/witness.c index 23753f246eb0..c3a65f7cdcd1 100644 --- a/contrib/jemalloc/src/witness.c +++ b/contrib/jemalloc/src/witness.c @@ -71,15 +71,16 @@ witness_not_owner_error_t *witness_not_owner_error = #endif #ifdef JEMALLOC_JET -#undef witness_lockless_error -#define witness_lockless_error JEMALLOC_N(n_witness_lockless_error) +#undef witness_depth_error +#define witness_depth_error JEMALLOC_N(n_witness_depth_error) #endif void -witness_lockless_error(const witness_list_t *witnesses) -{ +witness_depth_error(const witness_list_t *witnesses, + witness_rank_t rank_inclusive, unsigned depth) { witness_t *w; - malloc_printf(": Should not own any locks:"); + malloc_printf(": Should own %u lock%s of rank >= %u:", depth, + (depth != 1) ? "s" : "", rank_inclusive); ql_foreach(w, witnesses, link) { malloc_printf(" %s(%u)", w->name, w->rank); } @@ -87,10 +88,9 @@ witness_lockless_error(const witness_list_t *witnesses) abort(); } #ifdef JEMALLOC_JET -#undef witness_lockless_error -#define witness_lockless_error JEMALLOC_N(witness_lockless_error) -witness_lockless_error_t *witness_lockless_error = - JEMALLOC_N(n_witness_lockless_error); +#undef witness_depth_error +#define witness_depth_error JEMALLOC_N(witness_depth_error) +witness_depth_error_t *witness_depth_error = JEMALLOC_N(n_witness_depth_error); #endif void